In [None]:
import sys
import os
import sys
import os
import numpy as np
#!!!!!!!!!!!!!!!!!!!!!! IMPORTANT  IMPORTANT  IMPORTANT !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
# This line is to add the folder of IBridgePy to PYTHONPATH
# Otherwise, this error will show up: ModuleNotFoundError: No module named 'trader_factory'
# For example, Dr. Hui Liu installs IBridgePy at /Users/huil/Documents/Yellowstone
sys.path.append('/Users/huil/Documents/Yellowstone') 

In [None]:
import pandas as pd
from trader_factory import build_active_IBridgePy_plus, build_trader_for_backtest
from IBridgePy.IbridgepyTools import symbol
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt

In [None]:
t = build_trader_for_backtest()  # t is the IBridgePy_plus object

In [None]:
# Retrieve historical data of SPY, daily bar, and go back 8000 days.
hist = t.request_historical_data(symbol('SPY'), '1 day', '8000 D', dataProviderName='YahooFinance')

# In case, yahoo finance is down
# hist = pd.read_csv(os.path.join('Input', 'SPY.csv'), header=0, index_col=0)
# hist.columns = map(str.lower, hist.columns)

In [None]:
hist.tail()

In [None]:
def add_yield(hist):
    hist['close_yesterday'] = hist['close'].shift(1)  # add a new column, call it "close_yesterday"
    hist['close_price_change_from_yesterday_to_today'] = (hist['close'] - hist['close_yesterday']) / hist['close_yesterday']
    hist['close_price_change_from_today_to_tomorrow'] = hist['close_price_change_from_yesterday_to_today'].shift(-1)
    return hist

hist = add_yield(hist)

In [None]:
hist.dropna(inplace=True)  # sklearn cannot handle NA and has to drop them. Two rows are droppes as a result.

In [None]:
hist.tail()

In [None]:
# Use machine learning package and build a linear regression model to predit tomorrow's price.
x = hist[['close_price_change_from_yesterday_to_today']]
y = hist['close_price_change_from_today_to_tomorrow']
model = LinearRegression()
model.fit(x, y)
print(model.coef_)
print(model.intercept_)

In [None]:
# Create a column of predicted prices using the linear regression model
preds = model.predict(hist.loc[:, ["close_price_change_from_yesterday_to_today"]])

In [None]:
# Draw a scatter plot.
# The black line is drawn to visualize the trend
hist.plot.scatter(x = "close_price_change_from_yesterday_to_today", y = "close_price_change_from_today_to_tomorrow")
plt.plot(hist['close_price_change_from_yesterday_to_today'], preds, color="black")

In [None]:
# Summarize everything into a function to calculate linear regression coef
# Then, build a stock screener

def machine_learning(hist):
    hist['close_yesterday'] = hist['close'].shift(1)  # add a new column, call it "close_yesterday"
    hist['close_price_change_from_yesterday_to_today'] = (hist['close'] - hist['close_yesterday']) / hist['close_yesterday']
    hist['close_price_change_from_today_to_tomorrow'] = hist['close_price_change_from_yesterday_to_today'].shift(-1)
    hist.dropna(inplace=True)
    x = hist[['close_price_change_from_yesterday_to_today']]
    y = hist['close_price_change_from_today_to_tomorrow']
    model = LinearRegression()
    model.fit(x, y)
    return model.coef_

In [None]:
# Build a stock screener to check which stocks have a strong autocorrelation
ans = {}
for ticker in ['SPY', 'QQQ', 'AAPL', 'GOOG', 'TSLA', 'BTC-USD']:  # You can add more tickers to try
    hist = t.request_historical_data(symbol(ticker), '1 day', '8000 D', dataProviderName='YahooFinance')
    coef = machine_learning(hist)
    ans[ticker] = coef[0]
ans
#{'SPY': -0.09470177988805199,
# 'QQQ': -0.06317144933170069,
# 'AAPL': -0.04901759160663959,
# 'GOOG': -0.021715123012021144,
# 'TSLA': 0.0026507635409099566}

In [None]:
def machine_learning_on_MACD(hist, drawPlot=False):
    # Add indicators of moving average and MACD
    hist['MA_10'] = hist['close'].rolling(window=10, center=False, min_periods=1).mean()
    hist['MA_30'] = hist['close'].rolling(window=30, center=False, min_periods=1).mean()
    hist['MACD'] = (hist['MA_10'] - hist['MA_30']) / hist['close']
    
    # Build liner regression model
    hist = add_yield(hist)
    hist.dropna(inplace=True)
    x = hist[['MACD']]
    y = hist['close_price_change_from_today_to_tomorrow']
    model = LinearRegression()
    model.fit(x, y)
    
    if drawPlot:
        # Draw plot to visualize
        preds = model.predict(hist.loc[:, ["MACD"]])
        hist.plot.scatter(x = "MACD", y = "close_price_change_from_today_to_tomorrow")
        plt.plot(hist['MACD'], preds, color="black")
    return model.coef_
hist_SPY = t.request_historical_data(symbol('SPY'), '1 day', '8000 D', dataProviderName='YahooFinance')
machine_learning_on_MACD(hist_SPY, drawPlot=True)

In [None]:
# Build a stock screener
ans = {}
for ticker in ['SPY', 'QQQ', 'AAPL', 'GOOG', 'TSLA', 'BTC-USD']:
    hist = t.request_historical_data(symbol(ticker), '1 day', '8000 D', dataProviderName='YahooFinance')
    coef = machine_learning_on_MACD(hist)
    ans[ticker] = coef[0]
ans
#{'SPY': -0.013643619115926403,
# 'QQQ': -0.001030927719538247,
# 'AAPL': 0.003731356518772369,
# 'GOOG': 0.003867687194693938,
# 'TSLA': 0.00657561500065246}