library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
WFJ <- read.csv("WFJ_sales.csv")
str(WFJ)
## 'data.frame': 26 obs. of 2 variables:
## $ Week : int 1 2 3 4 5 6 7 8 9 10 ...
## $ WFJ_Sales: num 23056 24817 24300 23242 22862 ...
WFJ.ts <- ts(WFJ$WFJ_Sales, start = 1,frequency = 1)
WFJ.ts.forecast <- ts(WFJ.ts, start = 2, frequency = 1)
After we made a forecast, we can draw a time series plot of both the actual values and the forecast values.
We can use plot() function to draw a time series plot of the actual values. Then we can use lines() function to add a new line on the existing time series plot.
At the end, we can use legend function to the existing plot. For legend() function, the first argument is the position you want to put the legend on. Then second argument is the description you want for each item. We can sepecify the color of the items. Here for we are using drawing tiem series plots with line, I specify the type of the line in legend as the default (which is 1).
plot(WFJ.ts, main = "WFJ Sales Constant Forecast", ylab = "Sales", xlab = "Time/Week")
lines(WFJ.ts.forecast, col = "red")
legend("topright", c("Actual","Naive model"), col = c("black","red"), lty=1)
You can also use autoplot() to draw multiple time series objects in one single plot. First, we need to draw a time series plot using autoplot() function. Then at the end of the R sentence of autplot(), add a plus sign “+”, then you can click enter go to the next line.
At the next line, we can use autolayer() function to add a layer to the existing plot. If we want to add another layer, we can also add a plus sign “+” at the end of autolayer() sentence, then add another layer on the next line.
To add legend, we can specify series = “The name you want” on each layer, just as the example below. Then R will automatically assign a color to each series and create a legend.
autoplot(WFJ.ts,ylab="Sales", main = "U.S. WFJ Sales",xlab = "Time/Year",series = "Actual Value") +
autolayer(WFJ.ts.forecast, col = 2, series = "Naive model")
mean(WFJ.ts)
## [1] 30102.24
WFJ.sale.constant <- ts(rep(mean(WFJ.ts), 25), start = 2, frequency = 1)
autoplot(WFJ.ts, main = "WFJ Sales Constant Forecast", ylab = "Sales", xlab = "Time/Week", series = "Actual Value") +
autolayer(WFJ.sale.constant, col = 2, series = "Locally Constant Forecasts")
gdp <- read.csv("GDP_change_2.csv")
library(forecast)
str(gdp)
## 'data.frame': 54 obs. of 2 variables:
## $ observation_date: chr "1963-01-01" "1964-01-01" "1965-01-01" "1966-01-01" ...
## $ GDP.Change : num 5.5 7.4 8.4 9.6 5.7 9.4 8.2 5.5 8.5 9.8 ...
gdp.ts <- ts(gdp$GDP.Change, start = 1963, frequency = 1)
gdp.constant.ts <- ts(rep(mean(gdp.ts), 53), start =1964, frequency = 1)
autoplot(gdp.ts, main = "U.S. GDP Changes", ylab = "GDP Change/percent", series = "Actual Values") +
autolayer(gdp.constant.ts, col = 2,series = "Locally Constant Forecasts" )
There are two ways to write a loop: while and for loop. Loop is very useful to do iterative and duplicated computing.
For example: calculate 1+1/2+1/3+…+1/100.
summing <- 0
for(i in 1:100){
summing <- summing + 1/i
}
summing
## [1] 5.187378
i <- 1
x <- 0
while(i <= 100){
x <- x + 1/i
i <- i + 1
}
x
## [1] 5.187378
In this example, we want moving average order K as 3 and 7. It means we’re going to use last 3 weeks and last 7 weeks moving average values to generate our forecast respectively.
We can create a data.frame object to store the predictions.
dim(WFJ)
## [1] 26 2
n <- dim(WFJ)[1]
WFJ_forecast <- matrix(NA, nrow = n, ncol = 2)
colnames(WFJ_forecast) <- c("MA_3", "MA_7")
rownames(WFJ_forecast) <- time(WFJ.ts)
WFJ_forecast <- as.data.frame(WFJ_forecast)
WFJ_forecast
## MA_3 MA_7
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
## 7 NA NA
## 8 NA NA
## 9 NA NA
## 10 NA NA
## 11 NA NA
## 12 NA NA
## 13 NA NA
## 14 NA NA
## 15 NA NA
## 16 NA NA
## 17 NA NA
## 18 NA NA
## 19 NA NA
## 20 NA NA
## 21 NA NA
## 22 NA NA
## 23 NA NA
## 24 NA NA
## 25 NA NA
## 26 NA NA
## For MA 3, the first week we can predict is week 4 (3+1)
for(i in (3 + 1):n){
sale_last_3 <- WFJ.ts[(i - 3) : (i - 1)] # We extract the last 3 weeks of week i.
WFJ_forecast$MA_3[i] <- mean(sale_last_3) # We use the mean of last 3 weeks as the forecast
}
## For MA 7, the first week we can predict is week 8 (7+1)
for(i in (7 + 1):n){
sale_last_7 <- WFJ.ts[(i - 7) : (i - 1)] # We extract the last 7 weeks of week i.
WFJ_forecast$MA_7[i] <- mean(sale_last_7) # We use the mean of last 7 weeks as the forecast
}
WFJ_forecast
## MA_3 MA_7
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 24057.67 NA
## 5 24119.78 NA
## 6 23468.11 NA
## 7 22989.15 NA
## 8 23038.67 23504.50
## 9 22907.72 23420.68
## 10 22700.62 23052.69
## 11 23025.79 23062.26
## 12 25354.82 23949.99
## 13 28372.20 25154.45
## 14 33154.51 27418.85
## 15 35252.10 29184.31
## 16 38077.05 31655.59
## 17 35978.81 33109.50
## 18 36563.33 34986.12
## 19 33798.53 35274.32
## 20 34867.31 35893.12
## 21 33419.43 35099.66
## 22 34363.32 34893.42
## 23 33698.48 34016.59
## 24 33611.98 34085.30
## 25 33652.15 33645.77
## 26 33701.12 33974.84
Then we can make a time series plot for the moving average forecasts and the actual values.
WFJ_MA_3.ts <- ts(WFJ_forecast$MA_3, start = 1, frequency =1)
WFJ_MA_7.ts <- ts(WFJ_forecast$MA_7, start = 1, frequency =1)
plot(WFJ.ts,ylab="Weekly Sales", main = "WFJ Sales & MA_3 MA_7 Prediction",xlab = "Time/Week")
lines(WFJ_MA_3.ts, col=2)
lines(WFJ_MA_7.ts, col=3)
legend("bottomright", c("Actual",colnames(WFJ_forecast)),col = c("black",2,3),lty=1)
autoplot(WFJ.ts,ylab="Weekly Sales", main = "WFJ Sales & MA_3 MA_7 Prediction",xlab = "Time/Week", series = "Actual Values") +
autolayer(WFJ_MA_3.ts, col = 2, series = "MA 3") +
autolayer(WFJ_MA_7.ts, col = 3, series = "MA 7")
## Warning: Removed 3 rows containing missing values (`geom_line()`).
## Warning: Removed 7 rows containing missing values (`geom_line()`).
We can create a data.frame to store the results.
dim(gdp)
## [1] 54 2
n <- dim(gdp)[1]
gdp_forecast <- matrix(NA, nrow = n, ncol = 2)
colnames(gdp_forecast) <- c("MA_5", "MA_8")
rownames(gdp_forecast) <- time(gdp.ts)
gdp_forecast <- as.data.frame(gdp_forecast)
gdp_forecast
## MA_5 MA_8
## 1963 NA NA
## 1964 NA NA
## 1965 NA NA
## 1966 NA NA
## 1967 NA NA
## 1968 NA NA
## 1969 NA NA
## 1970 NA NA
## 1971 NA NA
## 1972 NA NA
## 1973 NA NA
## 1974 NA NA
## 1975 NA NA
## 1976 NA NA
## 1977 NA NA
## 1978 NA NA
## 1979 NA NA
## 1980 NA NA
## 1981 NA NA
## 1982 NA NA
## 1983 NA NA
## 1984 NA NA
## 1985 NA NA
## 1986 NA NA
## 1987 NA NA
## 1988 NA NA
## 1989 NA NA
## 1990 NA NA
## 1991 NA NA
## 1992 NA NA
## 1993 NA NA
## 1994 NA NA
## 1995 NA NA
## 1996 NA NA
## 1997 NA NA
## 1998 NA NA
## 1999 NA NA
## 2000 NA NA
## 2001 NA NA
## 2002 NA NA
## 2003 NA NA
## 2004 NA NA
## 2005 NA NA
## 2006 NA NA
## 2007 NA NA
## 2008 NA NA
## 2009 NA NA
## 2010 NA NA
## 2011 NA NA
## 2012 NA NA
## 2013 NA NA
## 2014 NA NA
## 2015 NA NA
## 2016 NA NA
## For MA 5, the first year we can predict is the 6th year (5+1)
for(i in (5 + 1):n){
gdp_last_5 <- gdp.ts[(i - 5) : (i - 1)]
gdp_forecast$MA_5[i] <- mean(gdp_last_5)
}
## For MA 8, the first year we can predict is the 9th year (8+1)
for(i in (8 + 1):n){
gdp_last_8 <- gdp.ts[(i - 8) : (i - 1)]
gdp_forecast$MA_8[i] <- mean(gdp_last_8)
}
GDP_MA_5.ts <- ts(gdp_forecast$MA_5, start = 1963, frequency =1)
GDP_MA_8.ts <- ts(gdp_forecast$MA_8, start = 1963, frequency =1)
plot(gdp.ts,ylab="GDP Changes", main = "U.S. GDP Changes & MA_5 MA_8 Forecast",xlab = "Time/Year")
lines(GDP_MA_5.ts, col=2)
lines(GDP_MA_8.ts, col = 3)
legend("topright", c("Actual",colnames(gdp_forecast)), col = c("black",2,3), lty=1)
autoplot(gdp.ts,ylab="GDP Changes", main = "U.S. GDP Changes & MA_5 MA_8 Forecast",xlab = "Time/Year", series = "Actual") +
autolayer(GDP_MA_5.ts, col = 2, series = "MA 5") +
autolayer(GDP_MA_8.ts, col = 3, series = "MA_8")
## Warning: Removed 5 rows containing missing values (`geom_line()`).
## Warning: Removed 8 rows containing missing values (`geom_line()`).
We can use RMSE to evalute the performance of predictions based on moving average. Here because we have some NA values in our prediction, so we when calculating mean(), we specify na.rm = T to exclude those NA values.
sqrt(mean((gdp$GDP.Change - GDP_MA_5.ts)^2,na.rm = T))
## [1] 2.165916
sqrt(mean((gdp$GDP.Change - GDP_MA_8.ts)^2,na.rm = T))
## [1] 2.202939
sqrt(mean((WFJ.ts - WFJ_MA_3.ts)^2,na.rm = T))
## [1] 3535.299
sqrt(mean((WFJ.ts - WFJ_MA_7.ts)^2,na.rm = T))
## [1] 5173.082