Extrapolative Methods

Naive model

library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
WFJ <- read.csv("WFJ_sales.csv")
str(WFJ)
## 'data.frame':    26 obs. of  2 variables:
##  $ Week     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ WFJ_Sales: num  23056 24817 24300 23242 22862 ...
WFJ.ts <- ts(WFJ$WFJ_Sales, start = 1,frequency = 1)
WFJ.ts.forecast <- ts(WFJ.ts, start = 2, frequency = 1)

After we made a forecast, we can draw a time series plot of both the actual values and the forecast values.

We can use plot() function to draw a time series plot of the actual values. Then we can use lines() function to add a new line on the existing time series plot.

At the end, we can use legend function to the existing plot. For legend() function, the first argument is the position you want to put the legend on. Then second argument is the description you want for each item. We can sepecify the color of the items. Here for we are using drawing tiem series plots with line, I specify the type of the line in legend as the default (which is 1).

plot(WFJ.ts, main = "WFJ Sales Constant Forecast", ylab = "Sales", xlab = "Time/Week")
lines(WFJ.ts.forecast, col = "red")
legend("topright", c("Actual","Naive model"), col = c("black","red"), lty=1)

You can also use autoplot() to draw multiple time series objects in one single plot. First, we need to draw a time series plot using autoplot() function. Then at the end of the R sentence of autplot(), add a plus sign “+”, then you can click enter go to the next line.

At the next line, we can use autolayer() function to add a layer to the existing plot. If we want to add another layer, we can also add a plus sign “+” at the end of autolayer() sentence, then add another layer on the next line.

To add legend, we can specify series = “The name you want” on each layer, just as the example below. Then R will automatically assign a color to each series and create a legend.

autoplot(WFJ.ts,ylab="Sales", main = "U.S. WFJ Sales",xlab = "Time/Year",series =  "Actual Value") +
  autolayer(WFJ.ts.forecast, col = 2, series = "Naive model") 

Locally Constant Forecasts

WFJ Sales Example

mean(WFJ.ts)
## [1] 30102.24
WFJ.sale.constant <- ts(rep(mean(WFJ.ts), 25), start = 2, frequency = 1)

autoplot(WFJ.ts, main = "WFJ Sales Constant Forecast", ylab = "Sales", xlab = "Time/Week", series = "Actual Value") +
  autolayer(WFJ.sale.constant, col = 2, series = "Locally Constant Forecasts")

U.S. GDP Example

gdp <- read.csv("GDP_change_2.csv")
library(forecast)
str(gdp)
## 'data.frame':    54 obs. of  2 variables:
##  $ observation_date: chr  "1963-01-01" "1964-01-01" "1965-01-01" "1966-01-01" ...
##  $ GDP.Change      : num  5.5 7.4 8.4 9.6 5.7 9.4 8.2 5.5 8.5 9.8 ...
gdp.ts <- ts(gdp$GDP.Change, start = 1963, frequency  = 1)

gdp.constant.ts <- ts(rep(mean(gdp.ts), 53), start =1964, frequency = 1)
autoplot(gdp.ts, main = "U.S. GDP Changes", ylab = "GDP Change/percent", series = "Actual Values") +
  autolayer(gdp.constant.ts, col = 2,series = "Locally Constant Forecasts" )

K-Term Moving Averae 1-Step Ahead Forecast

Before we start: How use R to run a loop.

There are two ways to write a loop: while and for loop. Loop is very useful to do iterative and duplicated computing.

For example: calculate 1+1/2+1/3+…+1/100.

  • For Loop
summing <- 0
for(i in 1:100){
  summing <- summing + 1/i
}
summing
## [1] 5.187378
  • While Loop
i <- 1
x <- 0
while(i <= 100){
  x <- x + 1/i
  i <- i + 1
}
x
## [1] 5.187378

WFJ Sales (MA, K = 3 and 7)

In this example, we want moving average order K as 3 and 7. It means we’re going to use last 3 weeks and last 7 weeks moving average values to generate our forecast respectively.

We can create a data.frame object to store the predictions.

dim(WFJ)
## [1] 26  2
n <- dim(WFJ)[1]
WFJ_forecast <- matrix(NA, nrow = n, ncol = 2)
colnames(WFJ_forecast) <- c("MA_3", "MA_7")
rownames(WFJ_forecast) <- time(WFJ.ts)
WFJ_forecast <- as.data.frame(WFJ_forecast)
WFJ_forecast
##    MA_3 MA_7
## 1    NA   NA
## 2    NA   NA
## 3    NA   NA
## 4    NA   NA
## 5    NA   NA
## 6    NA   NA
## 7    NA   NA
## 8    NA   NA
## 9    NA   NA
## 10   NA   NA
## 11   NA   NA
## 12   NA   NA
## 13   NA   NA
## 14   NA   NA
## 15   NA   NA
## 16   NA   NA
## 17   NA   NA
## 18   NA   NA
## 19   NA   NA
## 20   NA   NA
## 21   NA   NA
## 22   NA   NA
## 23   NA   NA
## 24   NA   NA
## 25   NA   NA
## 26   NA   NA
## For MA 3, the first week we can predict is week 4 (3+1)
for(i in (3 + 1):n){
  sale_last_3 <- WFJ.ts[(i - 3) : (i - 1)] # We extract the last 3 weeks of week i.
  WFJ_forecast$MA_3[i] <- mean(sale_last_3) # We use the mean of last 3 weeks as the forecast
}

## For MA 7, the first week we can predict is week 8 (7+1)
for(i in (7 + 1):n){
  sale_last_7 <- WFJ.ts[(i - 7) : (i - 1)] # We extract the last 7 weeks of week i.
  WFJ_forecast$MA_7[i] <- mean(sale_last_7) # We use the mean of last 7 weeks as the forecast
}

WFJ_forecast
##        MA_3     MA_7
## 1        NA       NA
## 2        NA       NA
## 3        NA       NA
## 4  24057.67       NA
## 5  24119.78       NA
## 6  23468.11       NA
## 7  22989.15       NA
## 8  23038.67 23504.50
## 9  22907.72 23420.68
## 10 22700.62 23052.69
## 11 23025.79 23062.26
## 12 25354.82 23949.99
## 13 28372.20 25154.45
## 14 33154.51 27418.85
## 15 35252.10 29184.31
## 16 38077.05 31655.59
## 17 35978.81 33109.50
## 18 36563.33 34986.12
## 19 33798.53 35274.32
## 20 34867.31 35893.12
## 21 33419.43 35099.66
## 22 34363.32 34893.42
## 23 33698.48 34016.59
## 24 33611.98 34085.30
## 25 33652.15 33645.77
## 26 33701.12 33974.84

Then we can make a time series plot for the moving average forecasts and the actual values.

WFJ_MA_3.ts <- ts(WFJ_forecast$MA_3, start = 1, frequency =1)
WFJ_MA_7.ts <- ts(WFJ_forecast$MA_7, start = 1, frequency =1)


plot(WFJ.ts,ylab="Weekly Sales", main = "WFJ Sales & MA_3 MA_7 Prediction",xlab = "Time/Week")
lines(WFJ_MA_3.ts, col=2)
lines(WFJ_MA_7.ts, col=3)
legend("bottomright", c("Actual",colnames(WFJ_forecast)),col = c("black",2,3),lty=1)

autoplot(WFJ.ts,ylab="Weekly Sales", main = "WFJ Sales & MA_3 MA_7 Prediction",xlab = "Time/Week", series = "Actual Values") +
  autolayer(WFJ_MA_3.ts, col = 2, series = "MA 3") +
  autolayer(WFJ_MA_7.ts, col = 3, series = "MA 7")
## Warning: Removed 3 rows containing missing values (`geom_line()`).
## Warning: Removed 7 rows containing missing values (`geom_line()`).

U.S. GDP Example (MA 5 and MA 8)

We can create a data.frame to store the results.

dim(gdp)
## [1] 54  2
n <- dim(gdp)[1]
gdp_forecast <- matrix(NA, nrow = n, ncol = 2)
colnames(gdp_forecast) <- c("MA_5", "MA_8")
rownames(gdp_forecast) <- time(gdp.ts)
gdp_forecast <- as.data.frame(gdp_forecast)
gdp_forecast
##      MA_5 MA_8
## 1963   NA   NA
## 1964   NA   NA
## 1965   NA   NA
## 1966   NA   NA
## 1967   NA   NA
## 1968   NA   NA
## 1969   NA   NA
## 1970   NA   NA
## 1971   NA   NA
## 1972   NA   NA
## 1973   NA   NA
## 1974   NA   NA
## 1975   NA   NA
## 1976   NA   NA
## 1977   NA   NA
## 1978   NA   NA
## 1979   NA   NA
## 1980   NA   NA
## 1981   NA   NA
## 1982   NA   NA
## 1983   NA   NA
## 1984   NA   NA
## 1985   NA   NA
## 1986   NA   NA
## 1987   NA   NA
## 1988   NA   NA
## 1989   NA   NA
## 1990   NA   NA
## 1991   NA   NA
## 1992   NA   NA
## 1993   NA   NA
## 1994   NA   NA
## 1995   NA   NA
## 1996   NA   NA
## 1997   NA   NA
## 1998   NA   NA
## 1999   NA   NA
## 2000   NA   NA
## 2001   NA   NA
## 2002   NA   NA
## 2003   NA   NA
## 2004   NA   NA
## 2005   NA   NA
## 2006   NA   NA
## 2007   NA   NA
## 2008   NA   NA
## 2009   NA   NA
## 2010   NA   NA
## 2011   NA   NA
## 2012   NA   NA
## 2013   NA   NA
## 2014   NA   NA
## 2015   NA   NA
## 2016   NA   NA
## For MA 5, the first year we can predict is the 6th year (5+1)
for(i in (5 + 1):n){
  gdp_last_5 <- gdp.ts[(i - 5) : (i - 1)]
  gdp_forecast$MA_5[i] <- mean(gdp_last_5)
}

## For MA 8, the first year we can predict is the 9th year (8+1)
for(i in (8 + 1):n){
  gdp_last_8 <- gdp.ts[(i - 8) : (i - 1)]
  gdp_forecast$MA_8[i] <- mean(gdp_last_8)
}
GDP_MA_5.ts <- ts(gdp_forecast$MA_5, start = 1963, frequency =1)
GDP_MA_8.ts <- ts(gdp_forecast$MA_8, start = 1963, frequency =1)

plot(gdp.ts,ylab="GDP Changes", main = "U.S. GDP Changes & MA_5 MA_8 Forecast",xlab = "Time/Year")
lines(GDP_MA_5.ts, col=2)
lines(GDP_MA_8.ts, col = 3)
legend("topright", c("Actual",colnames(gdp_forecast)), col = c("black",2,3), lty=1)

autoplot(gdp.ts,ylab="GDP Changes", main = "U.S. GDP Changes & MA_5 MA_8 Forecast",xlab = "Time/Year", series = "Actual") +
  autolayer(GDP_MA_5.ts, col = 2, series = "MA 5") +
  autolayer(GDP_MA_8.ts, col = 3, series = "MA_8")
## Warning: Removed 5 rows containing missing values (`geom_line()`).
## Warning: Removed 8 rows containing missing values (`geom_line()`).

Performance

We can use RMSE to evalute the performance of predictions based on moving average. Here because we have some NA values in our prediction, so we when calculating mean(), we specify na.rm = T to exclude those NA values.

sqrt(mean((gdp$GDP.Change - GDP_MA_5.ts)^2,na.rm = T))
## [1] 2.165916
sqrt(mean((gdp$GDP.Change - GDP_MA_8.ts)^2,na.rm = T))
## [1] 2.202939
sqrt(mean((WFJ.ts - WFJ_MA_3.ts)^2,na.rm = T))
## [1] 3535.299
sqrt(mean((WFJ.ts - WFJ_MA_7.ts)^2,na.rm = T))
## [1] 5173.082