import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow import keras
#import the Keras layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding,Dense, Dropout, LSTM, Dropout,Activation
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.utils import shuffle
# Loading data
data = pd.read_csv('files_input/infy/infy_2000 to 2008.csv')
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2496 entries, 0 to 2495
Data columns (total 15 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Symbol 2496 non-null object
1 Series 2496 non-null object
2 Date 2496 non-null object
3 Prev Close 2496 non-null float64
4 Open Price 2496 non-null float64
5 High Price 2496 non-null float64
6 Low Price 2496 non-null float64
7 Last Price 2496 non-null float64
8 Close Price 2496 non-null float64
9 Average Price 2496 non-null float64
10 Total Traded Quantity 2496 non-null int64
11 Turnover 2496 non-null float64
12 No. of Trades 2496 non-null object
13 Deliverable Qty 2496 non-null object
14 % Dly Qt to Traded Qty 2496 non-null object
dtypes: float64(8), int64(1), object(6)
memory usage: 292.6+ KB
data.head()
# Selecting only Date and Average Price columns
data = data[['Open Price', 'Average Price']]
# Scaling the values in the range of 0 to 1
scaler = MinMaxScaler(feature_range = (0, 1))
scaled_price = scaler.fit_transform(data.loc[:, 'Average Price'].values.reshape(-1, 1))
# Splitting dataset in the ratio of 75:25 for training and test
train_size = int(data.shape[0] * 0.75)
train, test = scaled_price[0:train_size, :], scaled_price[train_size:data.shape[0], :]
print("Number of entries (training set, test set): " + str((len(train), len(test))))
Number of entries (training set, test set): (1872, 624)
def create_dataset(scaled_price, window_size=1):
data_X, data_Y = [], []
for i in range(len(scaled_price) - window_size - 1):
a = scaled_price[i:(i + window_size), 0]
data_X.append(a)
data_Y.append(scaled_price[i + window_size, 0])
return(np.array(data_X), np.array(data_Y))
# Create test and training sets for one-step-ahead regression.
window_size = 3
train_X, train_Y = create_dataset(train, window_size)
test_X, test_Y = create_dataset(test, window_size)
print("Original training data shape:")
print(train_X.shape)
# Reshape the input data into appropriate form for Keras.
train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1]))
test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1]))
print("New training data shape:")
print(train_X.shape)
Original training data shape:
(1868, 3)
New training data shape:
(1868, 1, 3)
The LSTM architecture here consists of:
One input layer.
One LSTM layer of 4 blocks.
One Dense layer to produce a single output.
MSE as loss function.
# Designing the LSTM model
model = Sequential()
model.add(LSTM(4, input_shape = (1, window_size)))
model.add(Dense(1))
2022-08-30 18:27:28.769044: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
# Compiling the model
model.compile(loss = "mean_squared_error", optimizer = "adam")
# Training the model
model.fit(train_X, train_Y, epochs=10, batch_size=1)
Epoch 1/10
1868/1868 [==============================] - 8s 3ms/step - loss: 0.0053
Epoch 2/10
1868/1868 [==============================] - 5s 3ms/step - loss: 4.7545e-04
Epoch 3/10
1868/1868 [==============================] - 5s 3ms/step - loss: 4.2540e-04
Epoch 4/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.7605e-04
Epoch 5/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.4645e-04
Epoch 6/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.4557e-04
Epoch 7/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.2880e-04
Epoch 8/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.2757e-04
Epoch 9/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.0206e-04
Epoch 10/10
1868/1868 [==============================] - 5s 3ms/step - loss: 3.0305e-04
<keras.callbacks.History at 0x7fc9645e75b0>
def predict_and_score(model, X, Y):
# Make predictions on the original scale of the data.
predicted = scaler.inverse_transform(model.predict(X))
# Prepare Y data to also be on the original scale for interpretability.
orig_data = scaler.inverse_transform([Y])
# Calculate RMSE.
score = np.sqrt(mean_squared_error(orig_data[0], predicted[:, 0]))
return(score, predicted)
rmse_train, train_predict = predict_and_score(model, train_X, train_Y)
rmse_test, test_predict = predict_and_score(model, test_X, test_Y)
print("Training data score: %.2f RMSE" % rmse_train)
print("Test data score: %.2f RMSE" % rmse_test)
59/59 [==============================] - 1s 2ms/step
20/20 [==============================] - 0s 2ms/step
Training data score: 248.61 RMSE
Test data score: 63.50 RMSE
# Create the plot for predicted and the training data.
plt.figure(figsize = (15, 5))
plt.plot(scaler.inverse_transform(scaled_price), label = "True value")
plt.plot(train_predict, label = "Training set prediction")
plt.xlabel("Days")
plt.ylabel("Average Price")
plt.title("Comparison true vs. predicted training set")
plt.legend()
plt.show()
test_predict_padded = np.concatenate(([[1900], [1900], [1900], [1900]], test_predict))
print("test_predict_padded.shape: ", test_predict_padded.shape)
test_predict_padded.shape: (624, 1)
test_orig = data[['Average Price']].iloc[train_size:data.shape[0], :]
test_orig.reset_index(inplace = True, drop=True)
print("test_orig.shape: ", test_orig.shape)
print("test_predict.shape: ", test_predict.shape)
test_orig.shape: (624, 1)
test_predict.shape: (620, 1)
# Create the plot for predicted and the training data.
plt.figure(figsize = (15, 5))
plt.plot(test_predict_padded[0:200], label = "Test set prediction")
plt.plot(test_orig[0:200], label = "Test set actual data points")
plt.xlabel("Days")
plt.ylabel("Average Price")
plt.title("Comparison true vs. predicted on test set")
plt.legend()
plt.show()
Pages
- Index of Lessons in Technology
- Index of Book Summaries
- Index of Book Lists And Downloads
- Index For Job Interviews Preparation
- Index of "Algorithms: Design and Analysis"
- Python Course (Index)
- Data Analytics Course (Index)
- Index of Machine Learning
- Postings Index
- Index of BITS WILP Exam Papers and Content
- Lessons in Investing
- Index of Math Lessons
- Index of Management Lessons
- Book Requests
- Index of English Lessons
- Index of Medicines
- Index of Quizzes (Educational)
Tuesday, August 30, 2022
Prediction of Infy Stock Market Price using LSTM based model
Download Code and Data
Labels:
Deep Learning
Subscribe to:
Post Comments (Atom)



No comments:
Post a Comment