import numpy as np import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from tensorflow import keras #import the Keras layers from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Embedding,Dense, Dropout, LSTM, Dropout,Activation from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import mean_squared_error from sklearn.utils import shuffle # Loading data data = pd.read_csv('files_input/infy/infy_2000 to 2008.csv') data.info() <class 'pandas.core.frame.DataFrame'> RangeIndex: 2496 entries, 0 to 2495 Data columns (total 15 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Symbol 2496 non-null object 1 Series 2496 non-null object 2 Date 2496 non-null object 3 Prev Close 2496 non-null float64 4 Open Price 2496 non-null float64 5 High Price 2496 non-null float64 6 Low Price 2496 non-null float64 7 Last Price 2496 non-null float64 8 Close Price 2496 non-null float64 9 Average Price 2496 non-null float64 10 Total Traded Quantity 2496 non-null int64 11 Turnover 2496 non-null float64 12 No. of Trades 2496 non-null object 13 Deliverable Qty 2496 non-null object 14 % Dly Qt to Traded Qty 2496 non-null object dtypes: float64(8), int64(1), object(6) memory usage: 292.6+ KB data.head() # Selecting only Date and Average Price columns data = data[['Open Price', 'Average Price']] # Scaling the values in the range of 0 to 1 scaler = MinMaxScaler(feature_range = (0, 1)) scaled_price = scaler.fit_transform(data.loc[:, 'Average Price'].values.reshape(-1, 1)) # Splitting dataset in the ratio of 75:25 for training and test train_size = int(data.shape[0] * 0.75) train, test = scaled_price[0:train_size, :], scaled_price[train_size:data.shape[0], :] print("Number of entries (training set, test set): " + str((len(train), len(test)))) Number of entries (training set, test set): (1872, 624) def create_dataset(scaled_price, window_size=1): data_X, data_Y = [], [] for i in range(len(scaled_price) - window_size - 1): a = scaled_price[i:(i + window_size), 0] data_X.append(a) data_Y.append(scaled_price[i + window_size, 0]) return(np.array(data_X), np.array(data_Y)) # Create test and training sets for one-step-ahead regression. window_size = 3 train_X, train_Y = create_dataset(train, window_size) test_X, test_Y = create_dataset(test, window_size) print("Original training data shape:") print(train_X.shape) # Reshape the input data into appropriate form for Keras. train_X = np.reshape(train_X, (train_X.shape[0], 1, train_X.shape[1])) test_X = np.reshape(test_X, (test_X.shape[0], 1, test_X.shape[1])) print("New training data shape:") print(train_X.shape) Original training data shape: (1868, 3) New training data shape: (1868, 1, 3) The LSTM architecture here consists of: One input layer. One LSTM layer of 4 blocks. One Dense layer to produce a single output. MSE as loss function. # Designing the LSTM model model = Sequential() model.add(LSTM(4, input_shape = (1, window_size))) model.add(Dense(1)) 2022-08-30 18:27:28.769044: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: SSE4.1 SSE4.2 AVX AVX2 FMA To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags. # Compiling the model model.compile(loss = "mean_squared_error", optimizer = "adam") # Training the model model.fit(train_X, train_Y, epochs=10, batch_size=1) Epoch 1/10 1868/1868 [==============================] - 8s 3ms/step - loss: 0.0053 Epoch 2/10 1868/1868 [==============================] - 5s 3ms/step - loss: 4.7545e-04 Epoch 3/10 1868/1868 [==============================] - 5s 3ms/step - loss: 4.2540e-04 Epoch 4/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.7605e-04 Epoch 5/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.4645e-04 Epoch 6/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.4557e-04 Epoch 7/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.2880e-04 Epoch 8/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.2757e-04 Epoch 9/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.0206e-04 Epoch 10/10 1868/1868 [==============================] - 5s 3ms/step - loss: 3.0305e-04 <keras.callbacks.History at 0x7fc9645e75b0> def predict_and_score(model, X, Y): # Make predictions on the original scale of the data. predicted = scaler.inverse_transform(model.predict(X)) # Prepare Y data to also be on the original scale for interpretability. orig_data = scaler.inverse_transform([Y]) # Calculate RMSE. score = np.sqrt(mean_squared_error(orig_data[0], predicted[:, 0])) return(score, predicted) rmse_train, train_predict = predict_and_score(model, train_X, train_Y) rmse_test, test_predict = predict_and_score(model, test_X, test_Y) print("Training data score: %.2f RMSE" % rmse_train) print("Test data score: %.2f RMSE" % rmse_test) 59/59 [==============================] - 1s 2ms/step 20/20 [==============================] - 0s 2ms/step Training data score: 248.61 RMSE Test data score: 63.50 RMSE # Create the plot for predicted and the training data. plt.figure(figsize = (15, 5)) plt.plot(scaler.inverse_transform(scaled_price), label = "True value") plt.plot(train_predict, label = "Training set prediction") plt.xlabel("Days") plt.ylabel("Average Price") plt.title("Comparison true vs. predicted training set") plt.legend() plt.show() test_predict_padded = np.concatenate(([[1900], [1900], [1900], [1900]], test_predict)) print("test_predict_padded.shape: ", test_predict_padded.shape) test_predict_padded.shape: (624, 1) test_orig = data[['Average Price']].iloc[train_size:data.shape[0], :] test_orig.reset_index(inplace = True, drop=True) print("test_orig.shape: ", test_orig.shape) print("test_predict.shape: ", test_predict.shape) test_orig.shape: (624, 1) test_predict.shape: (620, 1) # Create the plot for predicted and the training data. plt.figure(figsize = (15, 5)) plt.plot(test_predict_padded[0:200], label = "Test set prediction") plt.plot(test_orig[0:200], label = "Test set actual data points") plt.xlabel("Days") plt.ylabel("Average Price") plt.title("Comparison true vs. predicted on test set") plt.legend() plt.show()
Pages
- Index of Lessons in Technology
- Index of Book Summaries
- Index of Book Lists And Downloads
- Index For Job Interviews Preparation
- Index of "Algorithms: Design and Analysis"
- Python Course (Index)
- Data Analytics Course (Index)
- Index of Machine Learning
- Postings Index
- Index of BITS WILP Exam Papers and Content
- Lessons in Investing
- Index of Math Lessons
- Downloads
- Index of Management Lessons
- Book Requests
- Index of English Lessons
- Index of Medicines
- Index of Quizzes (Educational)
Tuesday, August 30, 2022
Prediction of Infy Stock Market Price using LSTM based model
Download Code and Data
Labels:
Deep Learning
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment