En este documento estimaremos los modelos con variables exógenas vistos en clase.
Primero, cargamos los paquetes necesarios.
library(readxl)
library(dynlm)
library(ggplot2)
library(tseries)
library(tsDyn)
library(strucchange)
library(fUnitRoots)
library(forecast)
library(tidyverse)
library(zoo)
library(lubridate)
Cargamos los datos que vamos a utilizar:
data <- read_excel("dataUK.xlsx")
int <- data[,2]
ecosent <- data[,3]
int <- ts(int, start = c(2004,1), frequency = 12)
ecosent <- ts(ecosent, start = c(2004,1), frequency = 12)
date <- as.yearmon(data$date,"%Y%B")
plot(int, type="l", col="red", ylab="Interest Rate", xlab="")
unitrootTest(int)
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 1
## STATISTIC:
## DF: -1.1104
## P VALUE:
## t: 0.2416
## n: 0.4523
##
## Description:
## Sun May 01 17:57:01 2022 by user: sanbo
kpss.test(int)
## Warning in kpss.test(int): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: int
## KPSS Level = 2.4382, Truncation lag parameter = 4, p-value = 0.01
d_month <- month(date)
dlm <- lm(int ~ as.factor(d_month))
summary(dlm)
##
## Call:
## lm(formula = int ~ as.factor(d_month))
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8650 -1.0855 -0.4971 1.4218 2.4650
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.440000 0.365193 6.681 4.21e-10 ***
## as.factor(d_month)2 -0.007857 0.516461 -0.015 0.988
## as.factor(d_month)3 -0.030714 0.516461 -0.059 0.953
## as.factor(d_month)4 -0.037143 0.516461 -0.072 0.943
## as.factor(d_month)5 -0.047143 0.516461 -0.091 0.927
## as.factor(d_month)6 -0.020714 0.516461 -0.040 0.968
## as.factor(d_month)7 -0.002857 0.516461 -0.006 0.996
## as.factor(d_month)8 0.015000 0.516461 0.029 0.977
## as.factor(d_month)9 0.167692 0.526299 0.319 0.750
## as.factor(d_month)10 0.177692 0.526299 0.338 0.736
## as.factor(d_month)11 0.131538 0.526299 0.250 0.803
## as.factor(d_month)12 0.042308 0.526299 0.080 0.936
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.366 on 152 degrees of freedom
## Multiple R-squared: 0.00334, Adjusted R-squared: -0.06879
## F-statistic: 0.0463 on 11 and 152 DF, p-value: 1
intseas <- arima(int, order=c(1,1,0), seasonal = list(order = c(1,0,0)))
intseas
##
## Call:
## arima(x = int, order = c(1, 1, 0), seasonal = list(order = c(1, 0, 0)))
##
## Coefficients:
## ar1 sar1
## 0.6527 0.0313
## s.e. 0.0587 0.0789
##
## sigma^2 estimated as 0.01096: log likelihood = 136.31, aic = -266.62
pq_int <- breakpoints(int ~ 1, breaks=1)
summary(pq_int)
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = int ~ 1, breaks = 1)
##
## Breakpoints at observation number:
##
## m = 1 59
##
## Corresponding to breakdates:
##
## m = 1 2008(11)
##
## Fit:
##
## m 0 1
## RSS 284.75 36.88
## BIC 566.10 241.10
ic_int <- confint(pq_int)
plot(int,xlab="years",ylab="Interest Rate")
title("Punto de quiebre e intervalo de confianza")
lines(pq_int)
lines(ic_int)
escalon <- rep(0,164)
escalon[60:164] <- 1
arescalon <- arima(int, order=c(1,0,0),
seasonal = list(order = c(1,0,0)),
xreg = escalon)
Estimación con modelos distintos antes y despues del quiebre
tar <- setar(int,thVar = escalon, th=0.5, mL=1, mH=1)
tar
##
## Non linear autoregressive model
##
## SETAR model ( 2 regimes)
## Coefficients:
## Low regime:
## const.L phiL.1
## 0.2066565 0.9483165
##
## High regime:
## const.H phiH.1
## 0.02164635 0.97338589
##
## Threshold:
## -Variable: external-Value: 0.5 (fixed)
## Proportion of points in low regime: 36.2% High regime: 63.8%
Separamos la muestra en dos para ver si es estacionaria
int1 <- window(int,start=c(2004,1),end=c(2008,10))
int2 <- window(int, start=c(2008,11))
unitrootTest(int1)
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 1
## STATISTIC:
## DF: 1.0149
## P VALUE:
## t: 0.9165
## n: 0.9115
##
## Description:
## Sun May 01 17:57:02 2022 by user: sanbo
kpss.test(int1)
## Warning in kpss.test(int1): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: int1
## KPSS Level = 1.0911, Truncation lag parameter = 3, p-value = 0.01
unitrootTest(int2)
##
## Title:
## Augmented Dickey-Fuller Test
##
## Test Results:
## PARAMETER:
## Lag Order: 1
## STATISTIC:
## DF: -0.6582
## P VALUE:
## t: 0.4298
## n: 0.5345
##
## Description:
## Sun May 01 17:57:02 2022 by user: sanbo
kpss.test(int2)
## Warning in kpss.test(int2): p-value smaller than printed p-value
##
## KPSS Test for Level Stationarity
##
## data: int2
## KPSS Level = 1.09, Truncation lag parameter = 4, p-value = 0.01
Vemos si es necesario incluir más quiebres
pq_int2 <- breakpoints(int ~ 1)
summary(pq_int2)
##
## Optimal (m+1)-segment partition:
##
## Call:
## breakpoints.formula(formula = int ~ 1)
##
## Breakpoints at observation number:
##
## m = 1 59
## m = 2 59 133
## m = 3 35 59 133
## m = 4 35 59 87 123
## m = 5 35 59 87 116 140
##
## Corresponding to breakdates:
##
## m = 1 2008(11)
## m = 2 2008(11) 2015(1)
## m = 3 2006(11) 2008(11) 2015(1)
## m = 4 2006(11) 2008(11) 2011(3) 2014(3)
## m = 5 2006(11) 2008(11) 2011(3) 2013(8) 2015(8)
##
## Fit:
##
## m 0 1 2 3 4 5
## RSS 284.754 36.882 22.833 15.482 12.046 9.988
## BIC 566.100 241.101 172.658 119.143 88.186 67.651
Calcula 5 quiebres, pero no tienen sentido economico, por lo cual parece indicar que el problema es de no estacionariedad.
Diferencio la serie
dint <- diff(int)
plot(dint, type="l",col="red")
modelamos la serie diferenciada con un impulso
impulso <- diff(escalon)
arimpulso <- arima(dint, order=c(1,0,0),
xreg = impulso)
arimpulso
##
## Call:
## arima(x = dint, order = c(1, 0, 0), xreg = impulso)
##
## Coefficients:
## ar1 intercept impulso
## 0.6074 -0.0107 -0.5720
## s.e. 0.0656 0.0179 0.0826
##
## sigma^2 estimated as 0.008239: log likelihood = 159.59, aic = -311.19
ecosent2 <- (ecosent-mean(ecosent))/stdev(ecosent)
fecha <- seq(as.Date("2004/1/1"),by="month",length.out=164)
base <- data.frame(cbind(fecha,int,ecosent2))
base$fecha <- as.Date(base$fecha)
base <- base %>%
mutate(dint = c(NA,diff(int)))
ggplot(base) +
geom_line((aes(x=fecha, y=dint, colour="Interes"))) +
geom_line((aes(x=fecha, y=ecosent2,colour="Ecosent")))+
scale_colour_manual("",
breaks = c("Interes","Ecosent"),
values = c("red","blue"))
## Warning: Removed 1 row(s) containing missing values (geom_path).
Primer modelo con la tasa de interes en Niveles
adl <- dynlm(int ~ L(int,1) + ecosent + L(ecosent,1))
summary(adl)
##
## Time series regression with "ts" data:
## Start = 2004(2), End = 2017(8)
##
## Call:
## dynlm(formula = int ~ L(int, 1) + ecosent + L(ecosent, 1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.99962 -0.04683 0.00189 0.07351 0.24796
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.480161 0.097843 -4.907 2.27e-06 ***
## L(int, 1) 1.002099 0.007885 127.094 < 2e-16 ***
## ecosent 0.006274 0.003440 1.824 0.0701 .
## L(ecosent, 1) -0.001726 0.003446 -0.501 0.6172
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1302 on 159 degrees of freedom
## Multiple R-squared: 0.9905, Adjusted R-squared: 0.9903
## F-statistic: 5541 on 3 and 159 DF, p-value: < 2.2e-16
ecosent3 <- ts(ecosent[2:164], start = c(2004,2), frequency = 12)
adldiff <- dynlm(dint ~ L(dint,1) + L(ecosent3,1))
summary(adldiff)
##
## Time series regression with "ts" data:
## Start = 2004(3), End = 2017(8)
##
## Call:
## dynlm(formula = dint ~ L(dint, 1) + L(ecosent3, 1))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.78298 -0.03038 0.00434 0.04142 0.21564
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.1567098 0.0824126 -1.902 0.0590 .
## L(dint, 1) 0.6106012 0.0637695 9.575 <2e-16 ***
## L(ecosent3, 1) 0.0014909 0.0008075 1.846 0.0667 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1048 on 159 degrees of freedom
## Multiple R-squared: 0.4395, Adjusted R-squared: 0.4325
## F-statistic: 62.34 on 2 and 159 DF, p-value: < 2.2e-16
Modelos con variables umbral estocastica
tar <- setar(dint,thDelay = 1, ML=1,MH=1,thVar = ecosent3,th=100)
tar
##
## Non linear autoregressive model
##
## SETAR model ( 2 regimes)
## Coefficients:
## Low regime:
## const.L phiL.1
## -0.008333618 0.697620429
##
## High regime:
## const.H phiH.1
## -0.0008402702 0.4101814115
##
## Threshold:
## -Variable: external-Value: 100 (fixed)
## Proportion of points in low regime: 42.24% High regime: 57.76%
summary(tar)
##
## Non linear autoregressive model
##
## SETAR model ( 2 regimes)
## Coefficients:
## Low regime:
## const.L phiL.1
## -0.008333618 0.697620429
##
## High regime:
## const.H phiH.1
## -0.0008402702 0.4101814115
##
## Threshold:
## -Variable: external-Value: 100 (fixed)
## Proportion of points in low regime: 42.24% High regime: 57.76%
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7719276 -0.0304470 0.0031457 0.0383336 0.2355013
##
## Fit:
## residuals variance = 0.0106, AIC = -733, MAPE = 131.3%
##
## Coefficient(s):
##
## Estimate Std. Error t value Pr(>|t|)
## const.L -0.00833362 0.01303121 -0.6395 0.523410
## phiL.1 0.69762043 0.06653145 10.4856 < 2.2e-16 ***
## const.H -0.00084027 0.01087961 -0.0772 0.938535
## phiH.1 0.41018141 0.14259220 2.8766 0.004572 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold
## Variable: external
## Value: 100 (fixed)
sel <- selectSETAR(dint,thDelay = 1,mL=1,mH=1,nthresh=1,
criterion = "BIC",thVar = ecosent3)
## Using only first 162 elements of thVar
## Searching on 89 possible threshold values within regimes with sufficient ( 15% ) number of observations
## Searching on 89 combinations of thresholds ( 89 ), thDelay ( 1 ), mL ( 1 ) and MM ( 2 )
sel
## Results of the grid search for 1 threshold
## thDelay mL mH th BIC
## 1 1 1 1 91.3 -724.5250
## 2 1 1 1 91.4 -723.8406
## 3 1 1 1 91.5 -723.3230
## 4 1 1 1 92.0 -723.0310
## 5 1 1 1 92.2 -722.1431
## 6 1 1 1 92.4 -721.9448
## 7 1 1 1 92.9 -721.9422
## 8 1 1 1 93.1 -721.9393
## 9 1 1 1 94.4 -721.6513
## 10 1 1 1 93.3 -721.6285
tar2 <- setar(dint,thDelay = 1, ML=1,MH=1,thVar = ecosent3,th=91.3)
summary(tar2)
##
## Non linear autoregressive model
##
## SETAR model ( 2 regimes)
## Coefficients:
## Low regime:
## const.L phiL.1
## -0.03343551 0.72131638
##
## High regime:
## const.H phiH.1
## 0.001304042 0.314487983
##
## Threshold:
## -Variable: external-Value: 91.3 (fixed)
## Proportion of points in low regime: 15.53% High regime: 84.47%
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.7342668 -0.0313040 0.0012755 0.0386960 0.2788491
##
## Fit:
## residuals variance = 0.01004, AIC = -742, MAPE = 117.5%
##
## Coefficient(s):
##
## Estimate Std. Error t value Pr(>|t|)
## const.L -0.0334355 0.0217746 -1.5355 0.126642
## phiL.1 0.7213164 0.0700501 10.2972 < 2.2e-16 ***
## const.H 0.0013040 0.0087061 0.1498 0.881124
## phiH.1 0.3144880 0.1184663 2.6547 0.008746 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold
## Variable: external
## Value: 91.3 (fixed)
ecosent4 <- (ecosent3 - mean(ecosent3))/stdev(ecosent3)
star <- lstar(dint,thDelay = 1,mL=1,mH=1,thVar = ecosent4)
## Performing grid search for starting values...
## Starting values fixed: gamma = 100 , th = -0.009648241 ; SSE = 1.670445
## Grid search selected lower/upper bound gamma (was: 1 100 ]).
## Might try to widen bound with arg: 'starting.control=list(gammaInt=c(1,200))'
## Optimization algorithm converged
## Optimized values fixed for regime 2 : gamma = 100.0001 , th = -0.009908859 ; SSE = 1.670442
summary(star)
##
## Non linear autoregressive model
##
## LSTAR model
## Coefficients:
## Low regime:
## const.L phiL.1
## -0.01358579 0.53307519
##
## High regime:
## const.H phiH.1
## 0.004075266 0.420645961
##
## Smoothing parameter: gamma = 100
##
## Threshold
## Variable: Z(t) = + (0) X(t) + (1) X(t-1)
##
## Value: -0.009909
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.6454877 -0.0261268 0.0097055 0.0389227 0.2296670
##
## Fit:
## residuals variance = 0.01025, AIC = -735, MAPE = 180.5%
##
## Coefficient(s):
## Estimate Std. Error t value Pr(>|z|)
## const.L -0.0135858 0.0148297 -0.9161 0.359602
## phiL.1 0.5330752 0.0713521 7.4710 7.949e-14 ***
## const.H 0.0040753 0.0207956 0.1960 0.844635
## phiH.1 0.4206460 0.1402887 2.9984 0.002714 **
## gamma 100.0000581 59.0560457 1.6933 0.090397 .
## th -0.0099089 0.0174938 -0.5664 0.571108
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Non-linearity test of full-order LSTAR model against full-order AR model
## F = 2.3358 ; p-value = 0.10011
##
## Threshold
## Variable: Z(t) = + (0) X(t) + (1) X(t-1)