df['num_col'].autocorr() # autocorrelation value
# Plot ACF and PACF graph
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
plot_acf(df['num_col'], lags= 20, alpha=0.05) # alpha = 1 - confidence interval
plot_pacf(df['num_col'], lags= 20, alpha=0.05)
from statsmodels.tsa.stattools import acf
acf(df['num_col']) # See acf values
# White noise
import numpy as np
noise = np.random.normal(loc=0, scale=1, size=500)
# Dickey Fuller test for random walk
from statsmodels.tsa.stattools import adfuller
adfuller(df['num_col'])
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import TimeSeriesSplit
import numpy as np
import matplotlib.pyplot as plt
df = YHOO # Assuming YHOO is your DataFrame containing stock data
num_lags = 3 # Adjust this according to your needs
for i in range(1, num_lags + 1): # Creating features
df[f'lag_{i}'] = df['close'].shift(i)
df.dropna(inplace=True) # Drop rows with NaN values resulting from the shift operation
X = df.drop(columns=['close']) # Features (lagged versions)
y = df['close'] # Target variable
actual_values = []
predicted_values = []
tscv = TimeSeriesSplit(n_splits=5, max_train_size=30) # window = 30
model = LinearRegression()
# Train the model and make predictions
for train_index, test_index in tscv.split(X):
X_train, X_test = X.iloc[train_index], X.iloc[test_index]
y_train, y_test = y.iloc[train_index], y.iloc[test_index]
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
actual_values.extend(y_test) # Append actual and predicted values to the lists (Use extend to append the values instead of whole list)
predicted_values.extend(y_pred)
result = pd.DataFrame({'Actual': actual_values, 'Predicted': predicted_values}, index=df.index[-len(predicted_values):])
result.plot()
plt.xlabel('Time')
plt.ylabel('Close Price')
plt.title('Actual vs Predicted Values')
plt.show()