1 1. Introducción

Este informe documenta el análisis de series de tiempo aplicado a datos mensuales de precipitación en Texcoco (2002–2017), con énfasis en la modelación ARIMA y la identificación de eventos de intervención para mejorar la capacidad predictiva.

2 2. Librerías y configuración

3 3. Carga de datos

url="https://raw.githubusercontent.com/Zairpv/Time-series-modeling---Climate/main/Data/Precipitacion.csv"
Precipit <- read.csv(url, header = TRUE)
precipitacion <- as.numeric(t(as.matrix(Precipit[, -1])))
serie_precipitacion <- ts(precipitacion, start = c(1960, 1), frequency = 12)
Periodo_2002_2017 <- window(serie_precipitacion, start = c(2002, 1), end = c(2017, 12))

4 4. Exploración de la serie de tiempo

plot(serie_precipitacion, main = "Serie Temporal: Precipitación ",
     ylab = "Precipitación (mm)", xlab = "Años")

plot(Periodo_2002_2017,
     main = "Precipitación (2002–2017)",
     ylab = "Precipitación (mm)",
     xlab = "Años")

descomposición <- decompose(Periodo_2002_2017)
plot(descomposición)

par(mfrow = c(2, 1), mar = c(5, 4, 3, 1), cex = 0.6)
acf(Periodo_2002_2017, main = "Autocorrelación (ACF)")
pacf(Periodo_2002_2017, main = "Autocorrelación Parcial (PACF)")

dev.off()
## null device 
##           1

5 5. Transformación logarítmica y análisis estadístico

Periodo_2002_2017_log <- log(Periodo_2002_2017 + 1)

par(mfrow = c(1, 2), mar = c(5, 4, 3, 1), cex = 0.6)
hist(Periodo_2002_2017, breaks = 20, col = "lightgrey", freq = FALSE,
     main = "Serie sin transformar", xlab = "Precipitación (mm)")
lines(density(Periodo_2002_2017), col = "red", lwd = 2)

hist(Periodo_2002_2017_log, breaks = 20, col = "lightgrey", freq = FALSE,
     main = "Serie transformada: log(1 + x)", xlab = "log(1 + precip)")
lines(density(Periodo_2002_2017_log), col = "red", lwd = 2)

dev.off()
## null device 
##           1
(media_log <- mean(Periodo_2002_2017_log))
## [1] 3.36619
(mediana_log <- median(Periodo_2002_2017_log))
## [1] 3.688879
(asimetria_log <- skewness(Periodo_2002_2017_log))
## [1] -0.4291356
(curtosis_log <- kurtosis(Periodo_2002_2017_log))
## [1] 1.870175
(jb_test_log <- jarque.bera.test(Periodo_2002_2017_log))
## 
##  Jarque Bera Test
## 
## data:  Periodo_2002_2017_log
## X-squared = 16.105, df = 2, p-value = 0.0003183
descomposición_log <- decompose(Periodo_2002_2017_log)
plot(descomposición_log)

par(mfrow = c(2, 1), mar = c(5, 4, 3, 1), cex = 0.6)
acf(Periodo_2002_2017_log, main = "ACF: log(1 + precipitación)")
pacf(Periodo_2002_2017_log, main = "PACF: log(1 + precipitación)")

dev.off()
## null device 
##           1

6 6. Modelado ARIMA

auto.arima(Periodo_2002_2017_log)
## Series: Periodo_2002_2017_log 
## ARIMA(1,0,0)(1,1,0)[12] 
## 
## Coefficients:
##          ar1     sar1
##       0.1489  -0.5169
## s.e.  0.0739   0.0622
## 
## sigma^2 = 0.5174:  log likelihood = -196.97
## AIC=399.93   AICc=400.07   BIC=409.51
ari.m1 <- Arima(Periodo_2002_2017_log,
                order = c(1, 0, 0),
                seasonal = list(order = c(1, 1, 0), period = 12),
                method = "ML")
summary(ari.m1)
## Series: Periodo_2002_2017_log 
## ARIMA(1,0,0)(1,1,0)[12] 
## 
## Coefficients:
##          ar1     sar1
##       0.1489  -0.5169
## s.e.  0.0739   0.0622
## 
## sigma^2 = 0.5174:  log likelihood = -196.97
## AIC=399.93   AICc=400.07   BIC=409.51
## 
## Training set error measures:
##                       ME      RMSE      MAE  MPE MAPE      MASE         ACF1
## Training set -0.02926178 0.6925524 0.505594 -Inf  Inf 0.7742092 -0.007002272
checkresiduals(ari.m1)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,0)(1,1,0)[12]
## Q* = 43.54, df = 22, p-value = 0.004047
## 
## Model df: 2.   Total lags used: 24
Box.test(residuals(ari.m1), lag = 24, fitdf = 2, type = "Ljung")
## 
##  Box-Ljung test
## 
## data:  residuals(ari.m1)
## X-squared = 43.54, df = 22, p-value = 0.004047
ArchTest(residuals(ari.m1), lags = 12)
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  residuals(ari.m1)
## Chi-squared = 23.766, df = 12, p-value = 0.02189

7 7. Identificación de intervenciones y modelo ajustado

resid_arima1 <- residuals(ari.m1)
pars_arima1 <- coefs2poly(ari.m1)
valores_atipicos_M1 <- locate.outliers(resid_arima1, pars_arima1)
valores_atipicos_M1
##   type ind   coefhat     tstat
## 2   AO  62  1.775249  3.757843
## 3   TC  35 -1.394234 -3.680229
## 4   TC  59  1.446964  3.819415
## 5   TC  99 -1.669627 -4.407158
## 6   TC 122  1.531918  4.043660
time(Periodo_2002_2017_log)[c(35, 59, 62, 99, 122)]
## [1] 2004.833 2006.833 2007.083 2010.167 2012.083
intervenciones_M1 <- matrix(0, nrow = length(Periodo_2002_2017_log), ncol = 5)
colnames(intervenciones_M1) <- c("AO_62", "TC_35", "TC_59", "TC_99", "TC_122")
intervenciones_M1[62, 1] <- 1
intervenciones_M1[35, 2] <- 1
intervenciones_M1[59, 3] <- 1
intervenciones_M1[99, 4] <- 1
intervenciones_M1[122, 5] <- 1

tiempo <- seq(as.Date("2002-01-01"), by = "month", length.out = 192)
outliers_indices <- c(35, 59, 62, 99, 122)

plot(tiempo, Periodo_2002_2017_log, type = "l", col = "black", lwd = 2,
     main = "Precipitaciones log-transformadas (2002–2017)",
     xlab = "Tiempo", ylab = "Log(Precipitación)")
points(tiempo[outliers_indices], Periodo_2002_2017_log[outliers_indices],
       col = "red", pch = 19, cex = 1.3)
legend("topright", legend = "Outliers", col = "red", pch = 19)

arim1_interv <- Arima(Periodo_2002_2017_log,
                      order = c(1, 0, 0),
                      seasonal = list(order = c(1, 1, 0), period = 12),
                      xreg = intervenciones_M1,
                      method = "ML")
summary(arim1_interv)
## Series: Periodo_2002_2017_log 
## Regression with ARIMA(1,0,0)(1,1,0)[12] errors 
## 
## Coefficients:
##          ar1     sar1   AO_62    TC_35   TC_59    TC_99  TC_122
##       0.1472  -0.5137  1.7777  -1.0018  1.3624  -1.6124  1.3803
## s.e.  0.0755   0.0642  0.5231   0.5554  0.5546   0.5240  0.5327
## 
## sigma^2 = 0.4329:  log likelihood = -178.33
## AIC=372.67   AICc=373.51   BIC=398.21
## 
## Training set error measures:
##                       ME      RMSE       MAE  MPE MAPE      MASE         ACF1
## Training set -0.02929598 0.6245398 0.4559964 -Inf  Inf 0.6982609 -0.004267495
ajustado <- fitted(arim1_interv)
Serie_original <- exp(Periodo_2002_2017_log) - 1
ajustado_original <- exp(ajustado) - 1

df_original <- data.frame(
  Fecha = tiempo,
  Observado = Serie_original,
  Ajustado = ajustado_original
)

ggplot(df_original, aes(x = Fecha)) +
  geom_line(aes(y = Observado, color = "Observado"), size = 1) +
  geom_line(aes(y = Ajustado, color = "Ajustado"), size = 0.8, alpha = 0.7) +
  scale_color_manual(values = c("Observado" = "black", "Ajustado" = "blue")) +
  labs(title = "Precipitación mensual (escala original)",
       y = "Precipitación (mm)", x = "Tiempo", color = "") +
  theme_minimal() +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y",
               limits = as.Date(c("2002-01-01", "2017-12-01"))) +
  theme(legend.position = "top")

8 8. Predicciones (12 meses)

pred_simple <- forecast::forecast(ari.m1, h = 12)
pred_simple_mean <- exp(pred_simple$mean) - 1
pred_simple_lower <- exp(pred_simple$lower[,2]) - 1
pred_simple_upper <- exp(pred_simple$upper[,2]) - 1
tiempo_pred_simple <- seq(max(tiempo) + 1, by = "month", length.out = 12)

df_pred_simple <- data.frame(
  Fecha = c(tiempo, tiempo_pred_simple),
  Observado = c(Serie_original, rep(NA, 12)),
  Ajustado = c(ajustado_original, pred_simple_mean),
  Lower = c(rep(NA, length(tiempo)), pred_simple_lower),
  Upper = c(rep(NA, length(tiempo)), pred_simple_upper)
)

PREDICCIONMODELOARIMA_SIMPLE <- ggplot(df_pred_simple, aes(x = Fecha)) +
  geom_line(aes(y = Observado), color = "black", size = 1) +
  geom_line(aes(y = Ajustado), color = "blue", size = 1, linetype = "dashed") +
  geom_ribbon(aes(ymin = Lower, ymax = Upper), fill = "blue", alpha = 0.2) +
  scale_y_continuous(limits = c(0,600),breaks = seq(0,600,100)) +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y",
               limits = as.Date(c("2002-01-01", "2018-12-01"))) +
  labs(title = "Predicción: ARIMA (1,0,0)(1,1,0)[12]",
       subtitle = "12 meses con intervalo de confianza 95%",
       x = "Tiempo", y = "Precipitación (mm)") +
  theme_minimal()

PREDICCIONMODELOARIMA_SIMPLE

xreg_futuro <- matrix(0, nrow = 12, ncol = 5)
colnames(xreg_futuro) <- colnames(intervenciones_M1)
prediccion <- forecast(arim1_interv, xreg = xreg_futuro, h = 12)

pred_original <- exp(prediccion$mean) - 1
lower_original <- exp(prediccion$lower[,2]) - 1
upper_original <- exp(prediccion$upper[,2]) - 1
tiempo_pred <- seq(max(tiempo) + 1, by = "month", length.out = 12)

df_pred <- data.frame(
  Fecha = c(tiempo, tiempo_pred),
  Observado = c(Serie_original, rep(NA, 12)),
  Ajustado = c(ajustado_original, pred_original),
  Lower = c(rep(NA, length(tiempo)), lower_original),
  Upper = c(rep(NA, length(tiempo)), upper_original)
)

PREDICCIONMODELOARIMA_INTERVENCION <- ggplot(df_pred, aes(x = Fecha)) +
  geom_line(aes(y = Observado), color = "black", size = 1) +
  geom_line(aes(y = Ajustado), color = "blue", size = 1, linetype = "dashed") +
  geom_ribbon(aes(ymin = Lower, ymax = Upper), fill = "blue", alpha = 0.2) +
  geom_point(data = df_pred[outliers_indices, ],
             aes(x = Fecha, y = Observado), color = "red", size = 2) +
  labs(title = "Predicción: ARIMA (1,0,0)(1,1,0)[12] + Intervención",
       subtitle = "12 meses con intervalo de confianza 95%",
       x = "Tiempo", y = "Precipitación (mm)") +
  scale_x_date(date_breaks = "1 year", date_labels = "%Y",
               limits = as.Date(c("2002-01-01", "2018-12-01"))) +
  scale_y_continuous(limits = c(0,600),breaks = seq(0,600,100)) +
  theme_minimal()

PREDICCIONMODELOARIMA_INTERVENCION

ggarrange(PREDICCIONMODELOARIMA_SIMPLE,
          PREDICCIONMODELOARIMA_INTERVENCION,
          ncol = 1, nrow = 2)