#importing required libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import os
from IPython import display
import warnings as wn
from datetime import datetime as dt
wn.filterwarnings('ignore')

#plt.style.available


#Getting the file-names from directory 

files_list=os.listdir("D:\python\Jupyter\Projects\TimeSeriesAnalysis") #Directory
files_list

['.ipynb_checkpoints',
 'AAPL_data.csv',
 'AMZN_data.csv',
 'GOOG_data.csv',
 'MSFT_data.csv',
 'TimeseriesForcastingOnStocks.ipynb',
 'Untitled.ipynb']


#Removing the unnecesary elements / filenames

files_list.pop(0) 
files_list.pop(-1)
files_list.pop(-1)

files_list

['AAPL_data.csv', 'AMZN_data.csv', 'GOOG_data.csv', 'MSFT_data.csv']


#for later analysis

#getting the datframes through indices of names in files_list list
apple=pd.read_csv(files_list[0]) 
amazon=pd.read_csv(files_list[1])
google=pd.read_csv(files_list[2])
microsoft=pd.read_csv(files_list[3])


#Replacing the company abbreviations to their official names.
apple['Name']=apple['Name'].apply(lambda x:x.replace('AAPL','apple'))
amazon['Name']=amazon['Name'].apply(lambda x:x.replace('AMZN','amazon'))
google['Name']=google['Name'].apply(lambda x:x.replace('GOOG','google'))
microsoft['Name']=microsoft['Name'].apply(lambda x:x.replace('MSFT','microsoft'))

#converting date variable / column to datetime
apple['date']=pd.to_datetime(apple['date'],format='%Y-%m-%d')
microsoft['date']=pd.to_datetime(microsoft['date'],format='%Y-%m-%d')
google['date']=pd.to_datetime(google['date'],format='%Y-%m-%d')
amazon['date']=pd.to_datetime(amazon['date'],format='%Y-%m-%d')

#setting the date column as index so that it be 
apple.set_index('date',inplace=True)
amazon.set_index('date',inplace=True)
microsoft.set_index('date',inplace=True)
google.set_index('date',inplace=True)


# Concatenating all datasets row-wise into a single dataframe
df=pd.concat([apple,amazon,microsoft,google]) 
#print(f"Dataset contains the stocks from date{df.index[0,0]})
df.head(10).style.background_gradient(cmap=sns.color_palette('bone_r',as_cmap=True),high=1,low=1.6)


df.index[0]

Timestamp('2013-02-08 00:00:00')


#datatypes of the values in each column/variable

df.dtypes

open      float64
high      float64
low       float64
close     float64
volume      int64
Name       object
dtype: object


#basic description of our data.

df.describe().style.background_gradient(cmap=sns.color_palette('bone_r',as_cmap=True),high=1,low=1.6)


# count of Rows of each company in our dataset

list1=df['Name'].value_counts()
list1

apple        1259
amazon       1259
microsoft    1259
google        975
Name: Name, dtype: int64


#list of mnc company names which we have in our dataset
mnc_list=list1.index


plt.style.use('seaborn-whitegrid')
plt.style.use("tableau-colorblind10")

plt.figure(figsize=(15,10))


colors=['cadetblue','seagreen','darkcyan','darkslategrey']
x=0
for i,company in enumerate(mnc_list,1):  #Running an iterator for our company's closing price plots
    plt.subplot(2,2,i) 
    df1=df[df['Name']==company]
    plt.plot(df1.index,df1['close'],color=colors[x])
    x=x+1
    plt.title(company+"'s closing price")
    plt.xlabel('Year')
    plt.ylabel('Price')
plt.xlabel('Year-Month')

plt.subplot(2,2,1) 
#plt.axhline(y=94,xmin=0.29,xmax=0.64 ,color = 'r',alpha=0.3)
#plt.axvline(x = 16935, ymin = 0, ymax = 0.3, color = 'r')
plt.axvline(x = dt(2015,5,22), color = 'red')
plt.axvspan(dt(2014,6,13), dt(2016,5,11),alpha=0.2, color='darkcyan')
plt.show()


colors=['cadetblue','seagreen','darkcyan','darkslategrey']
x=0
for company in mnc_list:  #Runnig an iterator for our company's closing price plots
        df1=df[df['Name']==company]
        fig=px.line(df1,x=df1.index,y='volume',title=f"{company}'s Trade Volume").update_traces(line_color=colors[x])
        x+=1
        fig.show()


display.Image("https://www.assetmacro.com/wp-content/uploads/2015/06/Rate-of-Return.png")
#https://www.assetmacro.com/financial-terms/rate-of-return/


df['PriceChange']=df['close']-df['open']
df['returns(%)']=(df['PriceChange']/df['close'])*100  
df.head(10).style.background_gradient(cmap=sns.color_palette('bone_r',as_cmap=True),high=1,low=1.6)


colors=['cadetblue','seagreen','darkcyan','darkslategrey']
x=0
for company in mnc_list:  #Runnig an iterator for our company's closing price plots
        df1=df[df['Name']==company]
        fig=px.line(df1,x=df1.index,y='returns(%)',title=f"{company}'s Returns").update_traces(line_color=colors[x])
        x+=1
        fig.show()


close=pd.DataFrame()
close['Apple']=apple['close']
close['Amazon']=amazon['close']
close['Microsoft']=microsoft['close']
close['Google']=google['close']
#close.head(10).style.background_gradient(cmap=sns.color_palette('bone_r',as_cmap=True),high=1,low=1.6)


palette = sns.color_palette('bone_r',as_cmap=True)
plt.figure(figsize=(16, 6))
sns.heatmap(close.corr(),annot=True,cmap=palette)
plt.show()


x=sns.PairGrid(data=close)
x.map_upper(sns.scatterplot,color='cadetblue')#cadetblue
x.map_lower(sns.kdeplot, color='cadetblue')#lightseagreen
x.map_diag(sns.kdeplot, lw=3, color='cadetblue')#plt.hist,bins=25,color='black'
x.fig.suptitle('correlation between monthly closing prices of companies',fontsize='x-large',y=1.04)
plt.show()


df1=pd.DataFrame()
df1['apple_returns(%)']=((apple['close']-apple['open'])/apple['close'])*100
df1['microsoft_returns(%)']=((microsoft['close']-microsoft['open'])/microsoft['close'])*100
df1['google_returns(%)']=((google['close']-google['open'])/google['close'])*100
df1['amazon_returns(%)']=((amazon['close']-amazon['open'])/amazon['close'])*100
df1.head(5).style.background_gradient(cmap=sns.color_palette('bone_r',as_cmap=True),high=1,low=1.6)


plt.figure(figsize=(16, 6))
sns.heatmap(df1.corr(),annot=True,cmap=palette)
plt.show()


x=sns.PairGrid(data=df1)
x.map_upper(sns.scatterplot,color='cadetblue')#cadetblue
x.map_lower(sns.kdeplot, color='cadetblue')#lightseagreen
x.map_diag(sns.kdeplot, lw=3, color='cadetblue')#plt.hist,bins=25,color='black'
x.fig.suptitle('correlation between returns(in%) of companies',fontsize='x-large',y=1.04)
plt.show()


df1['apple_returns(%)']

date
2013-02-08    0.206325
2013-02-11    0.714688
2013-02-12   -2.481344
2013-02-13   -0.042869
2013-02-14    0.443624
                ...   
2018-02-01    0.366551
2018-02-02   -3.426791
2018-02-05   -1.667838
2018-02-06    5.029749
2018-02-07   -2.222013
Name: apple_returns(%), Length: 1259, dtype: float64


plt.figure(figsize=(15,10))

colors=['cadetblue','seagreen','darkcyan','darkslategrey']

x=0

for i,company in enumerate(mnc_list,1):  #Running an iterator for our company's closing price plots
    plt.subplot(2,2,i) 
    sns.distplot(df1[company+'_returns(%)'],color=colors[x])
    x=x+1
    plt.title(company+"'s change")
    plt.xlabel('Year')
    plt.ylabel('Price')
plt.xlabel('Year-Month')
plt.show()


ap68=df1['apple_returns(%)'].std() # 68%of of data
ap95=df1['apple_returns(%)'].std()*2 #95% of enitre data
ap99=df1['apple_returns(%)'].std()*3


print(f" For Apple's returns,\n\n There is 68% of chance that our daily returns will fall under the range of -{ap68:.3f}% to {ap68:.3f}%, \n\n Similarly there is 95% chance that our daily returns will fall under  the range of -{ap95:.3f}% to {ap95:.3f}%\n\n and There is 99.7% chance that our daily returns will fall under the range of -{ap99:.3f}% to {ap99:.3f}%")

 For Apple's returns,

 There is 68% of chance that our daily returns will fall under the range of -1.187% to 1.187%, 

 Similarly there is 95% chance that our daily returns will fall under  the range of -2.374% to 2.374%

 and There is 99.7% chance that our daily returns will fall under the range of -3.561% to 3.561%


am68=df1['amazon_returns(%)'].std() # 68%of of data
am95=df1['amazon_returns(%)'].std()*2 #95% of enitre data
am99=df1['amazon_returns(%)'].std()*3

print(f" For Amazon's returns,\n\nThere is 68% of chance that our daily returns will fall under the range of -{am68:.3f}% to {am68:.3f}%, \n\nSimilarly there is 95% chance that our daily returns will fall under  the range of -{am95:.3f}% to {am95:.3f}%\n\nand There is 99.7% chance that our daily returns will fall under the range of -{am99:.3f}% to {am99:.3f}%")

 For Amazon's returns,

There is 68% of chance that our daily returns will fall under the range of -1.359% to 1.359%, 

Similarly there is 95% chance that our daily returns will fall under  the range of -2.717% to 2.717%

and There is 99.7% chance that our daily returns will fall under the range of -4.076% to 4.076%


msft68=df1['microsoft_returns(%)'].std() # 68%of of data
msft95=df1['microsoft_returns(%)'].std()*2 #95% of enitre data
msft99=df1['microsoft_returns(%)'].std()*3

print(f" For Microsoft's returns,\n\n There is 68% of chance that our daily returns will fall under the range of -{msft68:.3f}% to {msft68:.3f}%, \n\n Similarly there is 95% chance that our daily returns will fall under  the range of -{msft95:.3f}% to {msft95:.3f}%\n\n and There is 99.7% chance that our daily returns will fall under the range of -{msft99:.3f}% to {msft99:.3f}%")

 For Microsoft's returns,

 There is 68% of chance that our daily returns will fall under the range of -1.059% to 1.059%, 

 Similarly there is 95% chance that our daily returns will fall under  the range of -2.119% to 2.119%

 and There is 99.7% chance that our daily returns will fall under the range of -3.178% to 3.178%


goog68=df1['google_returns(%)'].std() # 68%of of data
goog95=df1['google_returns(%)'].std()*2 #95% of enitre data
goog99=df1['google_returns(%)'].std()*3

print(f" For Google's returns,\n\n There is 68% of chance that our daily returns will fall under the range of -{goog68:.3f}% to {goog68:.3f}%, \n\n Similarly there is 95% chance that our daily returns will fall under  the range of -{goog95:.3f}% to {goog95:.3f}%\n\n and There is 99.7% chance that our daily returns will fall under the range of -{goog99:.3f}% to {goog99:.3f}%")

 For Google's returns,

 There is 68% of chance that our daily returns will fall under the range of -1.093% to 1.093%, 

 Similarly there is 95% chance that our daily returns will fall under  the range of -2.185% to 2.185%

 and There is 99.7% chance that our daily returns will fall under the range of -3.278% to 3.278%


df1.describe().T.style.background_gradient(cmap=sns.color_palette('bone_r',as_cmap=True),high=1,low=1.6)

	open	high	low	close	volume	Name
date
2013-02-08 00:00:00	67.714200	68.401400	66.892800	67.854200	158168416	apple
2013-02-11 00:00:00	68.071400	69.277100	67.607100	68.561400	129029425	apple
2013-02-12 00:00:00	68.501400	68.911400	66.820500	66.842800	151829363	apple
2013-02-13 00:00:00	66.744200	67.662800	66.174200	66.715600	118721995	apple
2013-02-14 00:00:00	66.359900	67.377100	66.288500	66.655600	88809154	apple
2013-02-15 00:00:00	66.978500	67.165600	65.702800	65.737100	97924631	apple
2013-02-19 00:00:00	65.871400	66.104200	64.835600	65.712800	108854046	apple
2013-02-20 00:00:00	65.384200	65.384200	64.114200	64.121400	118891367	apple
2013-02-21 00:00:00	63.714200	64.167100	63.259900	63.722800	111596821	apple
2013-02-22 00:00:00	64.178500	64.514200	63.799900	64.401400	82583823	apple

	open	high	low	close	volume
count	4752.000000	4752.000000	4752.000000	4752.000000	4752.000000
mean	344.075960	346.906095	340.992481	344.100091	24652318.779882
std	329.660871	332.181676	326.876297	329.705792	29567243.626507
min	27.350000	27.600000	27.230000	27.370000	7932.000000
25%	67.031000	67.668125	66.528900	66.836000	2562695.750000
50%	153.740000	154.260000	152.685000	153.470000	17965286.000000
75%	587.527500	592.625000	582.050000	587.092500	36016327.500000
max	1477.390000	1498.000000	1450.040000	1450.890000	266833581.000000

	open	high	low	close	volume	Name	PriceChange	returns(%)
date
2013-02-08 00:00:00	67.714200	68.401400	66.892800	67.854200	158168416	apple	0.140000	0.206325
2013-02-11 00:00:00	68.071400	69.277100	67.607100	68.561400	129029425	apple	0.490000	0.714688
2013-02-12 00:00:00	68.501400	68.911400	66.820500	66.842800	151829363	apple	-1.658600	-2.481344
2013-02-13 00:00:00	66.744200	67.662800	66.174200	66.715600	118721995	apple	-0.028600	-0.042869
2013-02-14 00:00:00	66.359900	67.377100	66.288500	66.655600	88809154	apple	0.295700	0.443624
2013-02-15 00:00:00	66.978500	67.165600	65.702800	65.737100	97924631	apple	-1.241400	-1.888431
2013-02-19 00:00:00	65.871400	66.104200	64.835600	65.712800	108854046	apple	-0.158600	-0.241353
2013-02-20 00:00:00	65.384200	65.384200	64.114200	64.121400	118891367	apple	-1.262800	-1.969389
2013-02-21 00:00:00	63.714200	64.167100	63.259900	63.722800	111596821	apple	0.008600	0.013496
2013-02-22 00:00:00	64.178500	64.514200	63.799900	64.401400	82583823	apple	0.222900	0.346110

	count	mean	std	min	25%	50%	75%	max
apple_returns(%)	1259.000000	-0.000215	1.187138	-7.104299	-0.658021	0.042230	0.715427	8.000388
microsoft_returns(%)	1259.000000	0.076404	1.059260	-5.177618	-0.509241	0.061069	0.703264	4.861491
google_returns(%)	975.000000	-0.012495	1.092560	-5.952266	-0.551963	0.024951	0.672649	4.943550
amazon_returns(%)	1259.000000	-0.000398	1.358679	-9.363077	-0.738341	-0.002623	0.852568	5.640265

Data Preprocessing¶

Closing prices of stocks¶

Volume of stocks being traded each day.¶

Daily Stock Returns¶