Este informe documenta el análisis de series de tiempo aplicado a datos mensuales de precipitación en Texcoco (2002–2017), con énfasis en la modelación ARIMA y la identificación de eventos de intervención para mejorar la capacidad predictiva.
url="https://raw.githubusercontent.com/Zairpv/Time-series-modeling---Climate/main/Data/Precipitacion.csv"
Precipit <- read.csv(url, header = TRUE)
precipitacion <- as.numeric(t(as.matrix(Precipit[, -1])))
serie_precipitacion <- ts(precipitacion, start = c(1960, 1), frequency = 12)
Periodo_2002_2017 <- window(serie_precipitacion, start = c(2002, 1), end = c(2017, 12))
plot(serie_precipitacion, main = "Serie Temporal: Precipitación ",
ylab = "Precipitación (mm)", xlab = "Años")
plot(Periodo_2002_2017,
main = "Precipitación (2002–2017)",
ylab = "Precipitación (mm)",
xlab = "Años")
descomposición <- decompose(Periodo_2002_2017)
plot(descomposición)
par(mfrow = c(2, 1), mar = c(5, 4, 3, 1), cex = 0.6)
acf(Periodo_2002_2017, main = "Autocorrelación (ACF)")
pacf(Periodo_2002_2017, main = "Autocorrelación Parcial (PACF)")
dev.off()
## null device
## 1
Periodo_2002_2017_log <- log(Periodo_2002_2017 + 1)
par(mfrow = c(1, 2), mar = c(5, 4, 3, 1), cex = 0.6)
hist(Periodo_2002_2017, breaks = 20, col = "lightgrey", freq = FALSE,
main = "Serie sin transformar", xlab = "Precipitación (mm)")
lines(density(Periodo_2002_2017), col = "red", lwd = 2)
hist(Periodo_2002_2017_log, breaks = 20, col = "lightgrey", freq = FALSE,
main = "Serie transformada: log(1 + x)", xlab = "log(1 + precip)")
lines(density(Periodo_2002_2017_log), col = "red", lwd = 2)
dev.off()
## null device
## 1
(media_log <- mean(Periodo_2002_2017_log))
## [1] 3.36619
(mediana_log <- median(Periodo_2002_2017_log))
## [1] 3.688879
(asimetria_log <- skewness(Periodo_2002_2017_log))
## [1] -0.4291356
(curtosis_log <- kurtosis(Periodo_2002_2017_log))
## [1] 1.870175
(jb_test_log <- jarque.bera.test(Periodo_2002_2017_log))
##
## Jarque Bera Test
##
## data: Periodo_2002_2017_log
## X-squared = 16.105, df = 2, p-value = 0.0003183
descomposición_log <- decompose(Periodo_2002_2017_log)
plot(descomposición_log)
par(mfrow = c(2, 1), mar = c(5, 4, 3, 1), cex = 0.6)
acf(Periodo_2002_2017_log, main = "ACF: log(1 + precipitación)")
pacf(Periodo_2002_2017_log, main = "PACF: log(1 + precipitación)")
dev.off()
## null device
## 1
auto.arima(Periodo_2002_2017_log)
## Series: Periodo_2002_2017_log
## ARIMA(1,0,0)(1,1,0)[12]
##
## Coefficients:
## ar1 sar1
## 0.1489 -0.5169
## s.e. 0.0739 0.0622
##
## sigma^2 = 0.5174: log likelihood = -196.97
## AIC=399.93 AICc=400.07 BIC=409.51
ari.m1 <- Arima(Periodo_2002_2017_log,
order = c(1, 0, 0),
seasonal = list(order = c(1, 1, 0), period = 12),
method = "ML")
summary(ari.m1)
## Series: Periodo_2002_2017_log
## ARIMA(1,0,0)(1,1,0)[12]
##
## Coefficients:
## ar1 sar1
## 0.1489 -0.5169
## s.e. 0.0739 0.0622
##
## sigma^2 = 0.5174: log likelihood = -196.97
## AIC=399.93 AICc=400.07 BIC=409.51
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.02926178 0.6925524 0.505594 -Inf Inf 0.7742092 -0.007002272
checkresiduals(ari.m1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0)(1,1,0)[12]
## Q* = 43.54, df = 22, p-value = 0.004047
##
## Model df: 2. Total lags used: 24
Box.test(residuals(ari.m1), lag = 24, fitdf = 2, type = "Ljung")
##
## Box-Ljung test
##
## data: residuals(ari.m1)
## X-squared = 43.54, df = 22, p-value = 0.004047
ArchTest(residuals(ari.m1), lags = 12)
##
## ARCH LM-test; Null hypothesis: no ARCH effects
##
## data: residuals(ari.m1)
## Chi-squared = 23.766, df = 12, p-value = 0.02189
resid_arima1 <- residuals(ari.m1)
pars_arima1 <- coefs2poly(ari.m1)
valores_atipicos_M1 <- locate.outliers(resid_arima1, pars_arima1)
valores_atipicos_M1
## type ind coefhat tstat
## 2 AO 62 1.775249 3.757843
## 3 TC 35 -1.394234 -3.680229
## 4 TC 59 1.446964 3.819415
## 5 TC 99 -1.669627 -4.407158
## 6 TC 122 1.531918 4.043660
time(Periodo_2002_2017_log)[c(35, 59, 62, 99, 122)]
## [1] 2004.833 2006.833 2007.083 2010.167 2012.083
intervenciones_M1 <- matrix(0, nrow = length(Periodo_2002_2017_log), ncol = 5)
colnames(intervenciones_M1) <- c("AO_62", "TC_35", "TC_59", "TC_99", "TC_122")
intervenciones_M1[62, 1] <- 1
intervenciones_M1[35, 2] <- 1
intervenciones_M1[59, 3] <- 1
intervenciones_M1[99, 4] <- 1
intervenciones_M1[122, 5] <- 1
tiempo <- seq(as.Date("2002-01-01"), by = "month", length.out = 192)
outliers_indices <- c(35, 59, 62, 99, 122)
plot(tiempo, Periodo_2002_2017_log, type = "l", col = "black", lwd = 2,
main = "Precipitaciones log-transformadas (2002–2017)",
xlab = "Tiempo", ylab = "Log(Precipitación)")
points(tiempo[outliers_indices], Periodo_2002_2017_log[outliers_indices],
col = "red", pch = 19, cex = 1.3)
legend("topright", legend = "Outliers", col = "red", pch = 19)
arim1_interv <- Arima(Periodo_2002_2017_log,
order = c(1, 0, 0),
seasonal = list(order = c(1, 1, 0), period = 12),
xreg = intervenciones_M1,
method = "ML")
summary(arim1_interv)
## Series: Periodo_2002_2017_log
## Regression with ARIMA(1,0,0)(1,1,0)[12] errors
##
## Coefficients:
## ar1 sar1 AO_62 TC_35 TC_59 TC_99 TC_122
## 0.1472 -0.5137 1.7777 -1.0018 1.3624 -1.6124 1.3803
## s.e. 0.0755 0.0642 0.5231 0.5554 0.5546 0.5240 0.5327
##
## sigma^2 = 0.4329: log likelihood = -178.33
## AIC=372.67 AICc=373.51 BIC=398.21
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.02929598 0.6245398 0.4559964 -Inf Inf 0.6982609 -0.004267495
ajustado <- fitted(arim1_interv)
Serie_original <- exp(Periodo_2002_2017_log) - 1
ajustado_original <- exp(ajustado) - 1
df_original <- data.frame(
Fecha = tiempo,
Observado = Serie_original,
Ajustado = ajustado_original
)
ggplot(df_original, aes(x = Fecha)) +
geom_line(aes(y = Observado, color = "Observado"), size = 1) +
geom_line(aes(y = Ajustado, color = "Ajustado"), size = 0.8, alpha = 0.7) +
scale_color_manual(values = c("Observado" = "black", "Ajustado" = "blue")) +
labs(title = "Precipitación mensual (escala original)",
y = "Precipitación (mm)", x = "Tiempo", color = "") +
theme_minimal() +
scale_x_date(date_breaks = "1 year", date_labels = "%Y",
limits = as.Date(c("2002-01-01", "2017-12-01"))) +
theme(legend.position = "top")
pred_simple <- forecast::forecast(ari.m1, h = 12)
pred_simple_mean <- exp(pred_simple$mean) - 1
pred_simple_lower <- exp(pred_simple$lower[,2]) - 1
pred_simple_upper <- exp(pred_simple$upper[,2]) - 1
tiempo_pred_simple <- seq(max(tiempo) + 1, by = "month", length.out = 12)
df_pred_simple <- data.frame(
Fecha = c(tiempo, tiempo_pred_simple),
Observado = c(Serie_original, rep(NA, 12)),
Ajustado = c(ajustado_original, pred_simple_mean),
Lower = c(rep(NA, length(tiempo)), pred_simple_lower),
Upper = c(rep(NA, length(tiempo)), pred_simple_upper)
)
PREDICCIONMODELOARIMA_SIMPLE <- ggplot(df_pred_simple, aes(x = Fecha)) +
geom_line(aes(y = Observado), color = "black", size = 1) +
geom_line(aes(y = Ajustado), color = "blue", size = 1, linetype = "dashed") +
geom_ribbon(aes(ymin = Lower, ymax = Upper), fill = "blue", alpha = 0.2) +
scale_y_continuous(limits = c(0,600),breaks = seq(0,600,100)) +
scale_x_date(date_breaks = "1 year", date_labels = "%Y",
limits = as.Date(c("2002-01-01", "2018-12-01"))) +
labs(title = "Predicción: ARIMA (1,0,0)(1,1,0)[12]",
subtitle = "12 meses con intervalo de confianza 95%",
x = "Tiempo", y = "Precipitación (mm)") +
theme_minimal()
PREDICCIONMODELOARIMA_SIMPLE
xreg_futuro <- matrix(0, nrow = 12, ncol = 5)
colnames(xreg_futuro) <- colnames(intervenciones_M1)
prediccion <- forecast(arim1_interv, xreg = xreg_futuro, h = 12)
pred_original <- exp(prediccion$mean) - 1
lower_original <- exp(prediccion$lower[,2]) - 1
upper_original <- exp(prediccion$upper[,2]) - 1
tiempo_pred <- seq(max(tiempo) + 1, by = "month", length.out = 12)
df_pred <- data.frame(
Fecha = c(tiempo, tiempo_pred),
Observado = c(Serie_original, rep(NA, 12)),
Ajustado = c(ajustado_original, pred_original),
Lower = c(rep(NA, length(tiempo)), lower_original),
Upper = c(rep(NA, length(tiempo)), upper_original)
)
PREDICCIONMODELOARIMA_INTERVENCION <- ggplot(df_pred, aes(x = Fecha)) +
geom_line(aes(y = Observado), color = "black", size = 1) +
geom_line(aes(y = Ajustado), color = "blue", size = 1, linetype = "dashed") +
geom_ribbon(aes(ymin = Lower, ymax = Upper), fill = "blue", alpha = 0.2) +
geom_point(data = df_pred[outliers_indices, ],
aes(x = Fecha, y = Observado), color = "red", size = 2) +
labs(title = "Predicción: ARIMA (1,0,0)(1,1,0)[12] + Intervención",
subtitle = "12 meses con intervalo de confianza 95%",
x = "Tiempo", y = "Precipitación (mm)") +
scale_x_date(date_breaks = "1 year", date_labels = "%Y",
limits = as.Date(c("2002-01-01", "2018-12-01"))) +
scale_y_continuous(limits = c(0,600),breaks = seq(0,600,100)) +
theme_minimal()
PREDICCIONMODELOARIMA_INTERVENCION
ggarrange(PREDICCIONMODELOARIMA_SIMPLE,
PREDICCIONMODELOARIMA_INTERVENCION,
ncol = 1, nrow = 2)