""" credits: Karthik Rangappa todays output: correlation and basics stats """ import matplotlib.pyplot as plt import easy_algo as ea import pdb import pandas as pd def stats(column, name): print("stats for:", name) print("mean:", column.mean()) print("median:", column.median()) print("mode:", column.mode()[0]) print("standard deviation", column.std()) print("absolute deviation", column.mad(), "\n") hdfc_data = ea.jsonreader('HDFC', "E:\\data storage\\1 day\\2018\\") hdfc = pd.DataFrame(hdfc_data) hdfcbank_data = ea.jsonreader('HDFCBANK', "E:\\data storage\\1 day\\2018\\") hdfcbank = pd.DataFrame(hdfcbank_data) df = hdfc.merge(hdfcbank, on='date', how='left', suffixes=("_hdfc", "_hdfcbank") ) df['yesterday_hdfc'] = df['close_hdfc'].shift(1) df['closing_hdfc'] = df['close_hdfc'] -df['yesterday_hdfc'] df['yesterday_hdfcbank'] = df['close_hdfcbank'].shift(1) df['closing_hdfcbank'] = df['close_hdfcbank'] -df['yesterday_hdfcbank'] # rearrange the columns df = df[['date', 'close_hdfc', 'yesterday_hdfc', 'closing_hdfc', 'close_hdfcbank', 'yesterday_hdfcbank', 'closing_hdfcbank']] # calculate various aspects df['spread'] = df['closing_hdfc'] - df['closing_hdfcbank'] df['differential'] = df['close_hdfc'] - df['close_hdfcbank'] df['ratio'] = df['close_hdfc'] / df['close_hdfcbank'] # find percentage change df['hdfc_change'] = df['close_hdfc'].pct_change()*100 df['hdfcbank_change'] = df['close_hdfcbank'].pct_change()*100 correlation = df['hdfc_change'].corr(df['hdfcbank_change']) print("correlation value", correlation) # mean median mode stats(df['spread'], "spread") stats(df['differential'], "differential") stats(df['ratio'], "ratio") # density curve df['ratio'].plot.kde() pdb.set_trace()