import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from tensorflow import keras #import the Keras layers from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Embedding,Dense, Dropout, LSTM, Dropout,Activation from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error from sklearn.utils import shuffle # Loading data data = pd.read_csv('files_input/infy/infy_2000 to 2008.csv') data.info() <class 'pandas.core.frame.DataFrame'> RangeIndex: 2496 entries, 0 to 2495 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Symbol 2496 non-null object 1 Series 2496 non-null object 2 Date 2496 non-null object 3 Prev Close 2496 non-null float64 4 Open Price 2496 non-null float64 5 High Price 2496 non-null float64 6 Low Price 2496 non-null float64 7 Last Price 2496 non-null float64 8 Close Price 2496 non-null float64 9 Average Price 2496 non-null float64 10 Total Traded Quantity 2496 non-null int64 11 Turnover 2496 non-null float64 12 No. of Trades 2496 non-null object 13 Deliverable Qty 2496 non-null object 14 % Dly Qt to Traded Qty 2496 non-null object dtypes: float64(8), int64(1), object(6) memory usage: 292.6+ KB data.head() # Selecting only Date and Average Price columns data = data[['Open Price', 'Average Price']] # Scaling the values in the range of 0 to 1 scaler = MinMaxScaler(feature_range = (0, 1)) scaled_price = scaler.fit_transform(data.loc[:, 'Average Price'].values.reshape(-1, 1)) # Splitting dataset in the ratio of 75:25 for training and test train_size = int(data.shape[0] * 0.75) train, test = scaled_price[0:train_size, :], scaled_price[train_size:data.shape[0], :] print("Number of entries (training set, test set): " + str((len(train), len(test)))) Number of entries (training set, test set): (1872, 624) def create_dataset(scaled_price, window_size=1): data_X, data_Y = [], [] for i in range(len(scaled_price) - window_size - 1): a = scaled_price[i:(i + window_size), 0] data_X.append(a) data_Y.append(scaled_price[i + window_size, 0]) return(np.array(data_X), np.array(data_Y)) # Create test and training sets for one-step-ahead regression. window_size = 3 train_X, train_Y = create_dataset(train, window_size) test_X, test_Y = create_dataset(test, window_size) print("Original training data shape:") print(train_X.shape) # Reshape the input data into appropriate form for Keras. train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1])) test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1])) print("New training data shape:") print(train_X.shape) Original training data shape: (1868, 3) New training data shape: (1868, 1, 3) The LSTM architecture here consists of: One input layer. One LSTM layer of 4 blocks. One Dense layer to produce a single output. MSE as loss function. # Designing the LSTM model model = Sequential() model.add(LSTM(4, input_shape = (1, window_size))) model.add(Dense(1)) 2022-08-30 18:27:28.769044: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. # Compiling the model model.compile(loss = "mean_squared_error", optimizer = "adam") # Training the model model.fit(train_X, train_Y, epochs=10, batch_size=1) Epoch 1/10 1868/1868 [==============================] - 8s 3ms/step - loss: 0.0053 Epoch 2/10 1868/1868 [==============================] - 5s 3ms/step - loss: 4.7545e-04 Epoch 3/10 1868/1868 [==============================] - 5s 3ms/step - loss: 4.2540e-04 Epoch 4/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.7605e-04 Epoch 5/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.4645e-04 Epoch 6/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.4557e-04 Epoch 7/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.2880e-04 Epoch 8/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.2757e-04 Epoch 9/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.0206e-04 Epoch 10/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.0305e-04 <keras.callbacks.History at 0x7fc9645e75b0> def predict_and_score(model, X, Y): # Make predictions on the original scale of the data. predicted = scaler.inverse_transform(model.predict(X)) # Prepare Y data to also be on the original scale for interpretability. orig_data = scaler.inverse_transform([Y]) # Calculate RMSE. score = np.sqrt(mean_squared_error(orig_data[0], predicted[:, 0])) return(score, predicted) rmse_train, train_predict = predict_and_score(model, train_X, train_Y) rmse_test, test_predict = predict_and_score(model, test_X, test_Y) print("Training data score: %.2f RMSE" % rmse_train) print("Test data score: %.2f RMSE" % rmse_test) 59/59 [==============================] - 1s 2ms/step 20/20 [==============================] - 0s 2ms/step Training data score: 248.61 RMSE Test data score: 63.50 RMSE # Create the plot for predicted and the training data. plt.figure(figsize = (15, 5)) plt.plot(scaler.inverse_transform(scaled_price), label = "True value") plt.plot(train_predict, label = "Training set prediction") plt.xlabel("Days") plt.ylabel("Average Price") plt.title("Comparison true vs. predicted training set") plt.legend() plt.show() test_predict_padded = np.concatenate(([[1900], [1900], [1900], [1900]], test_predict)) print("test_predict_padded.shape: ", test_predict_padded.shape) test_predict_padded.shape: (624, 1) test_orig = data[['Average Price']].iloc[train_size:data.shape[0], :] test_orig.reset_index(inplace = True, drop=True) print("test_orig.shape: ", test_orig.shape) print("test_predict.shape: ", test_predict.shape) test_orig.shape: (624, 1) test_predict.shape: (620, 1) # Create the plot for predicted and the training data. plt.figure(figsize = (15, 5)) plt.plot(test_predict_padded[0:200], label = "Test set prediction") plt.plot(test_orig[0:200], label = "Test set actual data points") plt.xlabel("Days") plt.ylabel("Average Price") plt.title("Comparison true vs. predicted on test set") plt.legend() plt.show()
Tuesday, August 30, 2022
Prediction of Infy Stock Market Price using LSTM based model
Download Code and Data
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment