-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathNat Gas Forecast.py
126 lines (101 loc) · 5.72 KB
/
Nat Gas Forecast.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf
from sktime.forecasting.base import ForecastingHorizon
from sktime.forecasting.fbprophet import Prophet
natgas=pd.read_csv("C:/Users/alich/Desktop/Python/Finance/Forage JPMorgan Job Simulations/Quantitative Research/Nat_Gas.csv"
, parse_dates=["Dates"])
natgas.index=natgas["Dates"] #Define the index for the natgas dataframe to be the date column
natgas=natgas.drop(columns=["Dates"]) #drop the date column, the dates are encoded in the index
newindex=pd.date_range(natgas.index[0],natgas.index[-1]) #define a new range of dates to be every day
#from the first date to the last date provided
natgas=natgas.reindex(newindex, fill_value=np.nan)#Set the new index for natgas to be the range just created,
#and fill all of the missing price entries with NaN
natgas=natgas.interpolate("cubic")#Replace the NaN entries with interpolated prices between the provided entries
trend=natgas.rolling(356).mean()#There's a roughly yearly seasonality to the data; calculate the trend according to this
decomp=sm.tsa.seasonal_decompose(natgas, model="additive")
plot_acf(decomp.resid) #0 autocorrelation between the residuals after decomposing additively
plt.title("Residuals Autocorrelation - Additive")
plt.grid()
plt.show()
decomp=sm.tsa.seasonal_decompose(natgas, model="multiplicative")
plot_acf(decomp.resid) #0 autocorrelation between the residuals after decomposing multiplicatively
plt.title("Residuals Autocorrelation - Multiplicative")
plt.grid()
plt.show()
#Since there's no autocorrelation regardless of whether we decompose additively or multiplicatively,
#it doesn't matter which we choose. We'll choose to run Prophet in seasonality_mode = "multiplicative".
plt.figure(figsize=(10,6))#Plot the price data
plt.plot(natgas)
plt.xlabel("Time")
plt.ylabel("Price")
plt.title("Natural Gas Price History")
plt.grid()
plt.show()
plt.figure(figsize=(10,6))#Plot the trend
plt.plot(trend)
plt.title("Trend in Natural Gas Prices")
plt.xlabel("Time")
plt.ylabel("Price")
plt.grid()
plt.show()
#At this point, we have an estimated price for any point in the range of the provided data. We have provided more
#granularity to the client. However, in case the client wants an estimate of future prices, we are now going to
#perform some time series forecasting to forecast the prices 1 year into the future, past the final date provided.
forecaster=Prophet(changepoint_prior_scale=0.0025, seasonality_mode="multiplicative")#Initialise the Prophet model for time series forecasting
horizon=365#Forecast 365 days into the future
y_train=natgas[:-horizon] #Let the training data be everything from the beginning of natgas until 365 days from the end
y_test=natgas.tail(horizon)#Let the testing data be the last 365 days of prices.
forecaster.fit(y_train)#Fit our initialised model to the training data
fh=ForecastingHorizon(y_test.index, is_relative=False)#Define our forecasting horizon to be the days in the testing index;
#namely we predict the last 365 days of data. This is to compare the
#predictions to the known results.
#y_pred=forecaster.predict(fh)#Predict on our forecasting horizon
#ci=forecaster.predict_interval(fh,coverage=0.5)#Give a confidence interval for the prediction
#y_true=natgas.tail(horizon)#The true values are the last 365 days of the natgas dataframe
#plt.figure(figsize=(10,6))
#plt.plot(natgas.tail(horizon*3), label="Actual", color="black")
#plt.gca().fill_between(ci.index, ci.iloc[:,0], ci.iloc[:,1], color="b", alpha=0.1)
#plt.plot(y_pred, label="Predicted")
#plt.title("Natural Gas Prices - Test")
#plt.ylim(bottom=9.5)
#plt.grid()
#plt.legend()
#plt.show()
model=Prophet(changepoint_prior_scale=0.0025, seasonality_mode="multiplicative")
model.fit(natgas)
last_date=natgas.index.max()
future=ForecastingHorizon(pd.date_range(str(last_date), periods=horizon, freq="D"), is_relative=False)
y_pred=model.predict(future)
ci=model.predict_interval(future, coverage=0.5).astype("float")
plt.figure(figsize=(10,6))
plt.plot(natgas, label="Actual", color="black")
plt.gca().fill_between(ci.index, ci.iloc[:,0], ci.iloc[:,1], color="b", alpha=0.09)
plt.plot(y_pred, label="Predicted")
plt.title("Natural Gas Prices - Forecast (with confidence intervals)")
plt.xlabel("Time")
plt.ylabel("Price")
plt.ylim(bottom=9.5)
plt.grid()
plt.legend()
plt.show()
forecasthistory=pd.concat([natgas, y_pred[1:]], axis=0) #first entry of y_pred is the prediction for 2024-09-30, which is an
#already-existing data point. Therefore, concatenate from the 2nd entry.
forecasthistory.loc["2024-10-01":"2024-10-20"]=np.nan #Take out some days and interpolate to smooth the sudden jump
#between the end of the collected data and the start of the estimated data.
forecasthistory=forecasthistory.interpolate("cubic")
forecasthistory.to_csv("Forecast.csv", index=True)
plt.figure(figsize=(10,6))#Plot the forecasted data with the transition between available & forecasted data smoothed
plt.plot(forecasthistory[:-horizon], color = "black", label="History")
plt.plot(forecasthistory[-horizon:], color = "royalblue", label="Forecast")
plt.title("Natural Gas Prices - Forecast (Smoothed)")
plt.xlabel("Time")
plt.ylabel("Price")
plt.ylim(bottom=9.5)
plt.grid()
plt.legend()
plt.show()
date_input=input("Enter Date (YYYY-MM-DD):")
print("Estimated Price:", float(forecasthistory.loc[f"{date_input}"]))