En este documento estimaremos los modelos con variables exógenas vistos en clase.

Primero, cargamos los paquetes necesarios.

library(readxl)
library(dynlm)
library(ggplot2)
library(tseries)
library(tsDyn)
library(strucchange)
library(fUnitRoots)
library(forecast)
library(tidyverse)
library(zoo)
library(lubridate)

Cargamos los datos que vamos a utilizar:

data <- read_excel("dataUK.xlsx")

int <- data[,2]
ecosent <- data[,3]

int <- ts(int, start = c(2004,1), frequency = 12)
ecosent <- ts(ecosent, start = c(2004,1), frequency = 12)
date <- as.yearmon(data$date,"%Y%B")

plot(int, type="l", col="red", ylab="Interest Rate", xlab="")

Pruebas de raíces unitarias

unitrootTest(int)
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     DF: -1.1104
##   P VALUE:
##     t: 0.2416 
##     n: 0.4523 
## 
## Description:
##  Sun May 01 17:57:01 2022 by user: sanbo
kpss.test(int)
## Warning in kpss.test(int): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  int
## KPSS Level = 2.4382, Truncation lag parameter = 4, p-value = 0.01

Modelo estacional

Dummies

d_month <- month(date)
dlm <- lm(int ~ as.factor(d_month))
summary(dlm)
## 
## Call:
## lm(formula = int ~ as.factor(d_month))
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8650 -1.0855 -0.4971  1.4218  2.4650 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.440000   0.365193   6.681 4.21e-10 ***
## as.factor(d_month)2  -0.007857   0.516461  -0.015    0.988    
## as.factor(d_month)3  -0.030714   0.516461  -0.059    0.953    
## as.factor(d_month)4  -0.037143   0.516461  -0.072    0.943    
## as.factor(d_month)5  -0.047143   0.516461  -0.091    0.927    
## as.factor(d_month)6  -0.020714   0.516461  -0.040    0.968    
## as.factor(d_month)7  -0.002857   0.516461  -0.006    0.996    
## as.factor(d_month)8   0.015000   0.516461   0.029    0.977    
## as.factor(d_month)9   0.167692   0.526299   0.319    0.750    
## as.factor(d_month)10  0.177692   0.526299   0.338    0.736    
## as.factor(d_month)11  0.131538   0.526299   0.250    0.803    
## as.factor(d_month)12  0.042308   0.526299   0.080    0.936    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.366 on 152 degrees of freedom
## Multiple R-squared:  0.00334,    Adjusted R-squared:  -0.06879 
## F-statistic: 0.0463 on 11 and 152 DF,  p-value: 1

SARIMA

intseas <- arima(int, order=c(1,1,0), seasonal = list(order = c(1,0,0)))
intseas
## 
## Call:
## arima(x = int, order = c(1, 1, 0), seasonal = list(order = c(1, 0, 0)))
## 
## Coefficients:
##          ar1    sar1
##       0.6527  0.0313
## s.e.  0.0587  0.0789
## 
## sigma^2 estimated as 0.01096:  log likelihood = 136.31,  aic = -266.62

Analisis de Intervención

Quiebre

pq_int <- breakpoints(int ~ 1, breaks=1)
summary(pq_int)
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = int ~ 1, breaks = 1)
## 
## Breakpoints at observation number:
##           
## m = 1   59
## 
## Corresponding to breakdates:
##                 
## m = 1   2008(11)
## 
## Fit:
##                  
## m   0      1     
## RSS 284.75  36.88
## BIC 566.10 241.10
ic_int <- confint(pq_int) 

plot(int,xlab="years",ylab="Interest Rate")
title("Punto de quiebre e intervalo de confianza")
lines(pq_int)
lines(ic_int)

Variable Escalón

escalon <- rep(0,164)
escalon[60:164] <- 1

arescalon <- arima(int, order=c(1,0,0), 
                   seasonal = list(order = c(1,0,0)),
                    xreg = escalon)

Estimación con modelos distintos antes y despues del quiebre

tar <- setar(int,thVar = escalon, th=0.5, mL=1, mH=1)
tar
## 
## Non linear autoregressive model
## 
## SETAR model ( 2 regimes)
## Coefficients:
## Low regime:
##   const.L    phiL.1 
## 0.2066565 0.9483165 
## 
## High regime:
##    const.H     phiH.1 
## 0.02164635 0.97338589 
## 
## Threshold:
## -Variable: external-Value: 0.5 (fixed)
## Proportion of points in low regime: 36.2%     High regime: 63.8%

Separamos la muestra en dos para ver si es estacionaria

int1 <- window(int,start=c(2004,1),end=c(2008,10))
int2 <- window(int, start=c(2008,11))

unitrootTest(int1)
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     DF: 1.0149
##   P VALUE:
##     t: 0.9165 
##     n: 0.9115 
## 
## Description:
##  Sun May 01 17:57:02 2022 by user: sanbo
kpss.test(int1)
## Warning in kpss.test(int1): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  int1
## KPSS Level = 1.0911, Truncation lag parameter = 3, p-value = 0.01
unitrootTest(int2)
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     DF: -0.6582
##   P VALUE:
##     t: 0.4298 
##     n: 0.5345 
## 
## Description:
##  Sun May 01 17:57:02 2022 by user: sanbo
kpss.test(int2)
## Warning in kpss.test(int2): p-value smaller than printed p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  int2
## KPSS Level = 1.09, Truncation lag parameter = 4, p-value = 0.01

Vemos si es necesario incluir más quiebres

pq_int2 <- breakpoints(int ~ 1)
summary(pq_int2)
## 
##   Optimal (m+1)-segment partition: 
## 
## Call:
## breakpoints.formula(formula = int ~ 1)
## 
## Breakpoints at observation number:
##                         
## m = 1      59           
## m = 2      59        133
## m = 3   35 59        133
## m = 4   35 59 87 123    
## m = 5   35 59 87 116 140
## 
## Corresponding to breakdates:
##                                                  
## m = 1            2008(11)                        
## m = 2            2008(11)                 2015(1)
## m = 3   2006(11) 2008(11)                 2015(1)
## m = 4   2006(11) 2008(11) 2011(3) 2014(3)        
## m = 5   2006(11) 2008(11) 2011(3) 2013(8) 2015(8)
## 
## Fit:
##                                                    
## m   0       1       2       3       4       5      
## RSS 284.754  36.882  22.833  15.482  12.046   9.988
## BIC 566.100 241.101 172.658 119.143  88.186  67.651

Calcula 5 quiebres, pero no tienen sentido economico, por lo cual parece indicar que el problema es de no estacionariedad.

Diferencio la serie

dint <- diff(int)

plot(dint, type="l",col="red")

modelamos la serie diferenciada con un impulso

impulso <- diff(escalon)

arimpulso <- arima(dint, order=c(1,0,0), 
                   xreg = impulso)
arimpulso
## 
## Call:
## arima(x = dint, order = c(1, 0, 0), xreg = impulso)
## 
## Coefficients:
##          ar1  intercept  impulso
##       0.6074    -0.0107  -0.5720
## s.e.  0.0656     0.0179   0.0826
## 
## sigma^2 estimated as 0.008239:  log likelihood = 159.59,  aic = -311.19

Variable Exógena estocastica

ecosent2 <- (ecosent-mean(ecosent))/stdev(ecosent)

fecha <- seq(as.Date("2004/1/1"),by="month",length.out=164)

base <- data.frame(cbind(fecha,int,ecosent2))
base$fecha <- as.Date(base$fecha)

base <- base %>%
        mutate(dint = c(NA,diff(int)))

ggplot(base) +
  geom_line((aes(x=fecha, y=dint, colour="Interes"))) +
  geom_line((aes(x=fecha, y=ecosent2,colour="Ecosent")))+
  scale_colour_manual("",
                      breaks = c("Interes","Ecosent"),
                      values = c("red","blue"))
## Warning: Removed 1 row(s) containing missing values (geom_path).

Primer modelo con la tasa de interes en Niveles

adl <- dynlm(int ~ L(int,1) + ecosent + L(ecosent,1))
summary(adl)
## 
## Time series regression with "ts" data:
## Start = 2004(2), End = 2017(8)
## 
## Call:
## dynlm(formula = int ~ L(int, 1) + ecosent + L(ecosent, 1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.99962 -0.04683  0.00189  0.07351  0.24796 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   -0.480161   0.097843  -4.907 2.27e-06 ***
## L(int, 1)      1.002099   0.007885 127.094  < 2e-16 ***
## ecosent        0.006274   0.003440   1.824   0.0701 .  
## L(ecosent, 1) -0.001726   0.003446  -0.501   0.6172    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1302 on 159 degrees of freedom
## Multiple R-squared:  0.9905, Adjusted R-squared:  0.9903 
## F-statistic:  5541 on 3 and 159 DF,  p-value: < 2.2e-16
ecosent3 <-  ts(ecosent[2:164], start = c(2004,2), frequency = 12)

adldiff <- dynlm(dint ~ L(dint,1) + L(ecosent3,1))
summary(adldiff)
## 
## Time series regression with "ts" data:
## Start = 2004(3), End = 2017(8)
## 
## Call:
## dynlm(formula = dint ~ L(dint, 1) + L(ecosent3, 1))
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.78298 -0.03038  0.00434  0.04142  0.21564 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -0.1567098  0.0824126  -1.902   0.0590 .  
## L(dint, 1)      0.6106012  0.0637695   9.575   <2e-16 ***
## L(ecosent3, 1)  0.0014909  0.0008075   1.846   0.0667 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1048 on 159 degrees of freedom
## Multiple R-squared:  0.4395, Adjusted R-squared:  0.4325 
## F-statistic: 62.34 on 2 and 159 DF,  p-value: < 2.2e-16

Modelos con variables umbral estocastica

tar <- setar(dint,thDelay = 1, ML=1,MH=1,thVar = ecosent3,th=100)
tar
## 
## Non linear autoregressive model
## 
## SETAR model ( 2 regimes)
## Coefficients:
## Low regime:
##      const.L       phiL.1 
## -0.008333618  0.697620429 
## 
## High regime:
##       const.H        phiH.1 
## -0.0008402702  0.4101814115 
## 
## Threshold:
## -Variable: external-Value: 100 (fixed)
## Proportion of points in low regime: 42.24%    High regime: 57.76%
summary(tar)
## 
## Non linear autoregressive model
## 
## SETAR model ( 2 regimes)
## Coefficients:
## Low regime:
##      const.L       phiL.1 
## -0.008333618  0.697620429 
## 
## High regime:
##       const.H        phiH.1 
## -0.0008402702  0.4101814115 
## 
## Threshold:
## -Variable: external-Value: 100 (fixed)
## Proportion of points in low regime: 42.24%    High regime: 57.76% 
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.7719276 -0.0304470  0.0031457  0.0383336  0.2355013 
## 
## Fit:
## residuals variance = 0.0106,  AIC = -733, MAPE = 131.3%
## 
## Coefficient(s):
## 
##            Estimate  Std. Error  t value  Pr(>|t|)    
## const.L -0.00833362  0.01303121  -0.6395  0.523410    
## phiL.1   0.69762043  0.06653145  10.4856 < 2.2e-16 ***
## const.H -0.00084027  0.01087961  -0.0772  0.938535    
## phiH.1   0.41018141  0.14259220   2.8766  0.004572 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold
## Variable: external
## Value: 100 (fixed)
sel <- selectSETAR(dint,thDelay = 1,mL=1,mH=1,nthresh=1,
                   criterion = "BIC",thVar = ecosent3)
## Using only first 162 elements of thVar
## Searching on 89 possible threshold values within regimes with sufficient ( 15% ) number of observations
## Searching on  89  combinations of thresholds ( 89 ), thDelay ( 1 ), mL ( 1 ) and MM ( 2 )

sel
## Results of the grid search for 1 threshold
##    thDelay mL mH   th       BIC
## 1        1  1  1 91.3 -724.5250
## 2        1  1  1 91.4 -723.8406
## 3        1  1  1 91.5 -723.3230
## 4        1  1  1 92.0 -723.0310
## 5        1  1  1 92.2 -722.1431
## 6        1  1  1 92.4 -721.9448
## 7        1  1  1 92.9 -721.9422
## 8        1  1  1 93.1 -721.9393
## 9        1  1  1 94.4 -721.6513
## 10       1  1  1 93.3 -721.6285
tar2 <- setar(dint,thDelay = 1, ML=1,MH=1,thVar = ecosent3,th=91.3)
summary(tar2)
## 
## Non linear autoregressive model
## 
## SETAR model ( 2 regimes)
## Coefficients:
## Low regime:
##     const.L      phiL.1 
## -0.03343551  0.72131638 
## 
## High regime:
##     const.H      phiH.1 
## 0.001304042 0.314487983 
## 
## Threshold:
## -Variable: external-Value: 91.3 (fixed)
## Proportion of points in low regime: 15.53%    High regime: 84.47% 
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.7342668 -0.0313040  0.0012755  0.0386960  0.2788491 
## 
## Fit:
## residuals variance = 0.01004,  AIC = -742, MAPE = 117.5%
## 
## Coefficient(s):
## 
##           Estimate  Std. Error  t value  Pr(>|t|)    
## const.L -0.0334355   0.0217746  -1.5355  0.126642    
## phiL.1   0.7213164   0.0700501  10.2972 < 2.2e-16 ***
## const.H  0.0013040   0.0087061   0.1498  0.881124    
## phiH.1   0.3144880   0.1184663   2.6547  0.008746 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold
## Variable: external
## Value: 91.3 (fixed)
ecosent4 <-  (ecosent3 - mean(ecosent3))/stdev(ecosent3)


star <- lstar(dint,thDelay = 1,mL=1,mH=1,thVar = ecosent4)
## Performing grid search for starting values...
## Starting values fixed: gamma =  100 , th =  -0.009648241 ; SSE =  1.670445 
## Grid search selected lower/upper bound gamma (was:  1 100 ]). 
##                    Might try to widen bound with arg: 'starting.control=list(gammaInt=c(1,200))'
## Optimization algorithm converged
## Optimized values fixed for regime 2  : gamma =  100.0001 , th =  -0.009908859 ; SSE =  1.670442
summary(star)
## 
## Non linear autoregressive model
## 
## LSTAR model
## Coefficients:
## Low regime:
##     const.L      phiL.1 
## -0.01358579  0.53307519 
## 
## High regime:
##     const.H      phiH.1 
## 0.004075266 0.420645961 
## 
## Smoothing parameter: gamma = 100 
## 
## Threshold
## Variable: Z(t) = + (0) X(t) + (1) X(t-1)
## 
## Value: -0.009909 
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.6454877 -0.0261268  0.0097055  0.0389227  0.2296670 
## 
## Fit:
## residuals variance = 0.01025,  AIC = -735, MAPE = 180.5%
## 
## Coefficient(s):
##            Estimate  Std. Error  t value  Pr(>|z|)    
## const.L  -0.0135858   0.0148297  -0.9161  0.359602    
## phiL.1    0.5330752   0.0713521   7.4710 7.949e-14 ***
## const.H   0.0040753   0.0207956   0.1960  0.844635    
## phiH.1    0.4206460   0.1402887   2.9984  0.002714 ** 
## gamma   100.0000581  59.0560457   1.6933  0.090397 .  
## th       -0.0099089   0.0174938  -0.5664  0.571108    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Non-linearity test of full-order LSTAR model against full-order AR model
##  F = 2.3358 ; p-value = 0.10011 
## 
## Threshold 
## Variable: Z(t) = + (0) X(t) + (1) X(t-1)