survival8: Prediction of Infy Stock Market Price using LSTM based model

Download Code and Data

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras
#import the Keras layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,Dense, Dropout, LSTM, Dropout,Activation
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle 

# Loading data
data = pd.read_csv('files_input/infy/infy_2000 to 2008.csv')
data.info() 


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2496 entries, 0 to 2495
Data columns (total 15 columns):
    #   Column                  Non-Null Count  Dtype  
   ---  ------                  --------------  -----  
    0   Symbol                  2496 non-null   object 
    1   Series                  2496 non-null   object 
    2   Date                    2496 non-null   object 
    3   Prev Close              2496 non-null   float64
    4   Open Price              2496 non-null   float64
    5   High Price              2496 non-null   float64
    6   Low Price               2496 non-null   float64
    7   Last Price              2496 non-null   float64
    8   Close Price             2496 non-null   float64
    9   Average Price           2496 non-null   float64
    10  Total Traded Quantity   2496 non-null   int64  
    11  Turnover                2496 non-null   float64
    12  No. of Trades           2496 non-null   object 
    13  Deliverable Qty         2496 non-null   object 
    14  % Dly Qt to Traded Qty  2496 non-null   object 
dtypes: float64(8), int64(1), object(6)
memory usage: 292.6+ KB     



data.head()





# Selecting only Date and Average Price columns
data = data[['Open Price', 'Average Price']]

# Scaling the values in the range of 0 to 1
scaler = MinMaxScaler(feature_range = (0, 1))
scaled_price = scaler.fit_transform(data.loc[:, 'Average Price'].values.reshape(-1, 1))

# Splitting dataset in the ratio of 75:25 for training and test
train_size = int(data.shape[0] * 0.75)
train, test = scaled_price[0:train_size, :], scaled_price[train_size:data.shape[0], :]
print("Number of entries (training set, test set): " + str((len(train), len(test))))


Number of entries (training set, test set): (1872, 624)


def create_dataset(scaled_price, window_size=1):
    data_X, data_Y = [], []
    for i in range(len(scaled_price) - window_size - 1):
        a = scaled_price[i:(i + window_size), 0]
        data_X.append(a)
        data_Y.append(scaled_price[i + window_size, 0])
    return(np.array(data_X), np.array(data_Y))

# Create test and training sets for one-step-ahead regression.
window_size = 3
train_X, train_Y = create_dataset(train, window_size)
test_X, test_Y = create_dataset(test, window_size)
print("Original training data shape:")
print(train_X.shape)

# Reshape the input data into appropriate form for Keras.
train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))
test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1]))
print("New training data shape:")
print(train_X.shape)



Original training data shape:
(1868, 3)
New training data shape:
(1868, 1, 3)

The LSTM architecture here consists of:

One input layer.
One LSTM layer of 4 blocks.
One Dense layer to produce a single output.
MSE as loss function.    



# Designing the LSTM model
model = Sequential()
model.add(LSTM(4, input_shape = (1, window_size)))
model.add(Dense(1))



2022-08-30 18:27:28.769044: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.    



# Compiling the model
model.compile(loss = "mean_squared_error", optimizer = "adam")

# Training the model
model.fit(train_X, train_Y, epochs=10, batch_size=1)



Epoch 1/10
1868/1868 [==============================] - 8s 3ms/step - loss: 0.0053
Epoch 2/10
1868/1868 [==============================] - 5s 3ms/step - loss: 4.7545e-04
Epoch 3/10
1868/1868 [==============================] - 5s 3ms/step - loss: 4.2540e-04
Epoch 4/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.7605e-04
Epoch 5/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.4645e-04
Epoch 6/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.4557e-04
Epoch 7/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.2880e-04
Epoch 8/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.2757e-04
Epoch 9/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.0206e-04
Epoch 10/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.0305e-04

<keras.callbacks.History at 0x7fc9645e75b0>



def predict_and_score(model, X, Y):
    # Make predictions on the original scale of the data.
    predicted = scaler.inverse_transform(model.predict(X))
    # Prepare Y data to also be on the original scale for interpretability.
    orig_data = scaler.inverse_transform([Y])
    # Calculate RMSE.
    score = np.sqrt(mean_squared_error(orig_data[0], predicted[:, 0]))
    return(score, predicted)

rmse_train, train_predict = predict_and_score(model, train_X, train_Y)
rmse_test, test_predict = predict_and_score(model, test_X, test_Y)

print("Training data score: %.2f RMSE" % rmse_train)
print("Test data score: %.2f RMSE" % rmse_test)



59/59 [==============================] - 1s 2ms/step
20/20 [==============================] - 0s 2ms/step
Training data score: 248.61 RMSE
Test data score: 63.50 RMSE



# Create the plot for predicted and the training data.
plt.figure(figsize = (15, 5))
plt.plot(scaler.inverse_transform(scaled_price), label = "True value")
plt.plot(train_predict, label = "Training set prediction")

plt.xlabel("Days")
plt.ylabel("Average Price")
plt.title("Comparison true vs. predicted training set")
plt.legend()
plt.show()





test_predict_padded = np.concatenate(([[1900], [1900], [1900], [1900]], test_predict))
print("test_predict_padded.shape: ", test_predict_padded.shape)


test_predict_padded.shape:  (624, 1)


test_orig = data[['Average Price']].iloc[train_size:data.shape[0], :]
test_orig.reset_index(inplace = True, drop=True)
print("test_orig.shape: ", test_orig.shape)
print("test_predict.shape: ", test_predict.shape)



test_orig.shape:  (624, 1)
test_predict.shape:  (620, 1)



# Create the plot for predicted and the training data.
plt.figure(figsize = (15, 5))
plt.plot(test_predict_padded[0:200], label = "Test set prediction")
plt.plot(test_orig[0:200], label = "Test set actual data points")

plt.xlabel("Days")
plt.ylabel("Average Price")
plt.title("Comparison true vs. predicted on test set")
plt.legend()
plt.show()
survival8

Pages

Tuesday, August 30, 2022

Prediction of Infy Stock Market Price using LSTM based model

No comments:

Post a Comment