import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
#import chart_studio.plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import os
import warnings
warnings.filterwarnings('ignore')
data_dir = "./data/"
price_file = os.path.join(data_dir, "baidu-prices.csv")
sentiment_file = os.path.join(data_dir, "baidu-scores-history.csv")
price_data = pd.read_csv(price_file, parse_dates=['Date'])
price_data = price_data.set_index('Date')
sentiment_data = pd.read_csv(sentiment_file, parse_dates=['emeaTimestamp'])
sentiment_data = sentiment_data.assign(Date = sentiment_data.emeaTimestamp.dt.date)
pos = sentiment_data.groupby('Date')['sentimentPositive','relevance'].apply(lambda x : np.average(x.sentimentPositive, weights = x.relevance))
neg = sentiment_data.groupby('Date')['sentimentNegative','relevance'].apply(lambda x : np.average(x.sentimentNegative, weights = x.relevance))
neutral = sentiment_data.groupby('Date')['sentimentNeutral','relevance'].apply(lambda x : np.average(x.sentimentNeutral, weights = x.relevance))
sentiment_data = pd.concat([pos, neg, neutral], axis=1)
sentiment_data.columns = ['pos','neg','neutral']
def fit_statespace_model(model_criterion, sentiment_type):
data = sentiment_data[[sentiment_type]]
if model_criterion=="Raw Data":
results = {'sentiment_data':data}
return(results)
model_metadata = {}
if model_criterion=="Local Level Model":
model_metadata = {'irregular': True, 'level': True, 'stochastic_level': True,
'trend': False, 'stochastic_trend': False, 'cycle': False,
'damped_cycle': False, 'stochastic_cycle': False}
if model_criterion=="Deterministic Trend Model":
model_metadata = {'irregular': True, 'level': True, 'stochastic_level': False,
'trend': True, 'stochastic_trend': False, 'cycle': False,
'damped_cycle': False, 'stochastic_cycle': False}
if model_criterion=="Local Level with Deterministic Trend Model":
model_metadata = {'irregular': True, 'level': True, 'stochastic_level': True,
'trend': True, 'stochastic_trend': False, 'cycle': False,
'damped_cycle': False, 'stochastic_cycle': False}
if model_criterion=="Local Linear Trend Model":
model_metadata = {'irregular': True, 'level': True, 'stochastic_level': True,
'trend': True, 'stochastic_trend': True, 'cycle': False,
'damped_cycle': False, 'stochastic_cycle': False}
if model_criterion=="Smooth Trend Model":
model_metadata= {'irregular': True, 'level': True, 'stochastic_level': False,
'trend': True, 'stochastic_trend': True, 'cycle': False,
'damped_cycle': False, 'stochastic_cycle': False}
model = sm.tsa.UnobservedComponents(data[sentiment_type], **model_metadata)
fit = model.fit(method='powell', disp=False)
summary = fit.summary()
data = data.assign(filtered=fit.level['filtered'])
data = data.rename({'filtered':sentiment_type+"f"}, axis=1)
results = {'fit':fit, 'summary': summary,'data':data}
return(results)
model_criterion = "Local Level Model"
model_fit_pos = fit_statespace_model(model_criterion, "pos")
model_fit_neg = fit_statespace_model(model_criterion, "neg")
model_fit_neutral = fit_statespace_model(model_criterion, "neutral")
model_all = pd.concat([model_fit_pos['data'], model_fit_neg['data'], model_fit_neutral['data']], axis=1)
model_all = model_all.assign(netf = model_all.apply(lambda x : (x['posf']-x['negf'])*(1. - x['neutralf']) , axis=1))
sentiment_price_data = pd.merge(model_all,price_data,left_index=True, right_index=True, how="right")
sentiment_price_data = sentiment_price_data.fillna(method='ffill')
sentiment_price_data = sentiment_price_data.assign(net = sentiment_price_data.apply(lambda x : (x['pos']-x['neg'])*(1. - x['neutral']) , axis=1))
trace1 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.pos,
mode="lines",
name='Positive Sentiment',
line = dict(color = ('rgb(216, 218, 217)'))
)
trace2 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.posf,
mode="lines",
name='Filtered Postive Sentiment',
line = dict(color = ('rgb(255, 80, 0)')),
yaxis='y2'
)
data = [trace1, trace2]
layout = go.Layout(
title='Filtered Positive Sentiment',
yaxis=dict(
title='Price'
),
yaxis2=dict(
title='Net Sentiment',
titlefont=dict(
color='rgb(255, 80, 0)'
),
tickfont=dict(
color='rgb(255, 80, 0)'
),
overlaying='y',
side='right'
),
legend= go.layout.Legend(
x= 1.05,
y= 1,
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='filtered-pos-sentiment')
trace1 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.neg,
mode="lines",
name='Negative Sentiment',
line = dict(color = ('rgb(216, 218, 217)'))
)
trace2 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.negf,
mode="lines",
name='Filtered Negative Sentiment',
line = dict(color = ('rgb(255, 80, 0)')),
yaxis='y2'
)
data = [trace1, trace2]
layout = go.Layout(
title='Filtered Negative Sentiment',
yaxis=dict(
title='Price'
),
yaxis2=dict(
title='Net Sentiment',
titlefont=dict(
color='rgb(255, 80, 0)'
),
tickfont=dict(
color='rgb(255, 80, 0)'
),
overlaying='y',
side='right'
),
legend= go.layout.Legend(
x= 1.05,
y= 1,
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='filtered-neg-sentiment')
trace1 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.neutral,
mode="lines",
name='Neutral Sentiment',
line = dict(color = ('rgb(216, 218, 217)'))
)
trace2 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.neutralf,
mode="lines",
name='Filtered Neutral Sentiment',
line = dict(color = ('rgb(255, 80, 0)')),
yaxis='y2'
)
data = [trace1, trace2]
layout = go.Layout(
title='Filtered Neutral Sentiment',
yaxis=dict(
title='Price'
),
yaxis2=dict(
title='Net Sentiment',
titlefont=dict(
color='rgb(255, 80, 0)'
),
tickfont=dict(
color='rgb(255, 80, 0)'
),
overlaying='y',
side='right'
),
legend= go.layout.Legend(
x= 1.05,
y= 1,
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='filtered-neutral-sentiment')
trace1 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.BIDU,
mode="lines",
name='BIDU Price',
line = dict(color = ('rgb(0, 30, 255)'))
)
trace2 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.net,
mode="lines",
name='Net Sentiment',
line = dict(color = ('rgb(216, 218, 217)')),
yaxis='y2'
)
data = [trace1, trace2]
layout = go.Layout(
title='Sentiment Price Overlay',
yaxis=dict(
title='Price'
),
yaxis2=dict(
title='Net Sentiment',
titlefont=dict(
color='rgb(255, 80, 0)'
),
tickfont=dict(
color='rgb(255, 80, 0)'
),
overlaying='y',
side='right'
),
legend= go.layout.Legend(
x= 1.05,
y= 1,
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='price-raw-sentiment-overlay')
trace1 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.BIDU,
mode="lines",
name='BIDU Price',
line = dict(color = ('rgb(0, 30, 255)'))
)
trace2 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.netf,
mode="lines",
name='Filtered Net Sentiment',
line = dict(color = ('rgb(255, 80, 0)')),
yaxis='y2'
)
data = [trace1, trace2]
layout = go.Layout(
title='Local Level Model',
yaxis=dict(
title='Price'
),
yaxis2=dict(
title='Net Sentiment',
titlefont=dict(
color='rgb(255, 80, 0)'
),
tickfont=dict(
color='rgb(255, 80, 0)'
),
overlaying='y',
side='right'
),
legend= go.layout.Legend(
x= 1.05,
y= 1,
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='local-level-model')
print(model_fit_pos['summary'])
Unobserved Components Results
==============================================================================
Dep. Variable: pos No. Observations: 964
Model: local level Log Likelihood 118.392
Date: Tue, 21 Mar 2023 AIC -232.784
Time: 16:40:17 BIC -223.044
Sample: 0 HQIC -229.075
- 964
Covariance Type: opg
====================================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------------
sigma2.irregular 0.0444 0.003 16.949 0.000 0.039 0.050
sigma2.level 3.239e-05 1.93e-05 1.676 0.094 -5.48e-06 7.03e-05
===================================================================================
Ljung-Box (L1) (Q): 5.09 Jarque-Bera (JB): 35.29
Prob(Q): 0.02 Prob(JB): 0.00
Heteroskedasticity (H): 1.12 Skew: 0.25
Prob(H) (two-sided): 0.30 Kurtosis: 2.21
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
print(model_fit_neg['summary'])
Unobserved Components Results
==============================================================================
Dep. Variable: neg No. Observations: 964
Model: local level Log Likelihood 13.322
Date: Tue, 21 Mar 2023 AIC -22.644
Time: 16:40:17 BIC -12.904
Sample: 0 HQIC -18.936
- 964
Covariance Type: opg
====================================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------------
sigma2.irregular 0.0556 0.003 19.283 0.000 0.050 0.061
sigma2.level 2.295e-05 1.66e-05 1.384 0.166 -9.55e-06 5.54e-05
===================================================================================
Ljung-Box (L1) (Q): 10.89 Jarque-Bera (JB): 127.87
Prob(Q): 0.00 Prob(JB): 0.00
Heteroskedasticity (H): 1.05 Skew: 0.86
Prob(H) (two-sided): 0.69 Kurtosis: 2.54
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
print(model_fit_neutral['summary'])
Unobserved Components Results
==============================================================================
Dep. Variable: neutral No. Observations: 964
Model: local level Log Likelihood 273.387
Date: Tue, 21 Mar 2023 AIC -542.773
Time: 16:40:18 BIC -533.033
Sample: 0 HQIC -539.065
- 964
Covariance Type: opg
====================================================================================
coef std err z P>|z| [0.025 0.975]
------------------------------------------------------------------------------------
sigma2.irregular 0.0327 0.001 23.127 0.000 0.030 0.035
sigma2.level 4.603e-06 4.85e-06 0.950 0.342 -4.9e-06 1.41e-05
===================================================================================
Ljung-Box (L1) (Q): 7.02 Jarque-Bera (JB): 133.64
Prob(Q): 0.01 Prob(JB): 0.00
Heteroskedasticity (H): 0.63 Skew: 0.89
Prob(H) (two-sided): 0.00 Kurtosis: 3.37
===================================================================================
Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).
model_criterion = "Deterministic Trend Model"
model_fit_pos = fit_statespace_model(model_criterion, "pos")
model_fit_neg = fit_statespace_model(model_criterion, "neg")
model_fit_neutral = fit_statespace_model(model_criterion, "neutral")
model_all = pd.concat([model_fit_pos['data'], model_fit_neg['data'], model_fit_neutral['data']], axis=1)
model_all = model_all.assign(netf = model_all.apply(lambda x : (x['posf']-x['negf'])*(1. - x['neutralf']) , axis=1))
sentiment_price_data = pd.merge(model_all,price_data,left_index=True, right_index=True, how="right")
sentiment_price_data = sentiment_price_data.fillna(method='ffill')
sentiment_price_data = sentiment_price_data.assign(net = sentiment_price_data.apply(lambda x : (x['pos']-x['neg'])*(1. - x['neutral']) , axis=1))
trace1 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.BIDU,
mode="lines",
name='BIDU Price',
line = dict(color = ('rgb(0, 30, 255)'))
)
trace2 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.netf,
mode="lines",
name='Filtered Net Sentiment',
line = dict(color = ('rgb(255, 80, 0)')),
yaxis='y2'
)
data = [trace1, trace2]
layout = go.Layout(
title=model_criterion,
yaxis=dict(
title='Price'
),
yaxis2=dict(
title='Net Sentiment',
titlefont=dict(
color='rgb(255, 80, 0)'
),
tickfont=dict(
color='rgb(255, 80, 0)'
),
overlaying='y',
side='right'
),
legend= go.layout.Legend(
x= 1.05,
y= 1,
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename=model_criterion)
model_criterion = "Local Level with Deterministic Trend Model"
model_fit_pos = fit_statespace_model(model_criterion, "pos")
model_fit_neg = fit_statespace_model(model_criterion, "neg")
model_fit_neutral = fit_statespace_model(model_criterion, "neutral")
model_all = pd.concat([model_fit_pos['data'], model_fit_neg['data'], model_fit_neutral['data']], axis=1)
model_all = model_all.assign(netf = model_all.apply(lambda x : (x['posf']-x['negf'])*(1. - x['neutralf']) , axis=1))
sentiment_price_data = pd.merge(model_all,price_data,left_index=True, right_index=True, how="right")
sentiment_price_data = sentiment_price_data.fillna(method='ffill')
sentiment_price_data = sentiment_price_data.assign(net = sentiment_price_data.apply(lambda x : (x['pos']-x['neg'])*(1. - x['neutral']) , axis=1))
trace1 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.BIDU,
mode="lines",
name='BIDU Price',
line = dict(color = ('rgb(0, 30, 255)'))
)
trace2 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.netf,
mode="lines",
name='Filtered Net Sentiment',
line = dict(color = ('rgb(255, 80, 0)')),
yaxis='y2'
)
data = [trace1, trace2]
layout = go.Layout(
title=model_criterion,
yaxis=dict(
title='Price'
),
yaxis2=dict(
title='Net Sentiment',
titlefont=dict(
color='rgb(255, 80, 0)'
),
tickfont=dict(
color='rgb(255, 80, 0)'
),
overlaying='y',
side='right'
),
legend= go.layout.Legend(
x= 1.05,
y= 1,
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename=model_criterion)
model_criterion = "Local Linear Trend Model"
model_fit_pos = fit_statespace_model(model_criterion, "pos")
model_fit_neg = fit_statespace_model(model_criterion, "neg")
model_fit_neutral = fit_statespace_model(model_criterion, "neutral")
model_all = pd.concat([model_fit_pos['data'], model_fit_neg['data'], model_fit_neutral['data']], axis=1)
model_all = model_all.assign(netf = model_all.apply(lambda x : (x['posf']-x['negf'])*(1. - x['neutralf']) , axis=1))
sentiment_price_data = pd.merge(model_all,price_data,left_index=True, right_index=True, how="right")
sentiment_price_data = sentiment_price_data.fillna(method='ffill')
sentiment_price_data = sentiment_price_data.assign(net = sentiment_price_data.apply(lambda x : (x['pos']-x['neg'])*(1. - x['neutral']) , axis=1))
trace1 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.BIDU,
mode="lines",
name='BIDU Price',
line = dict(color = ('rgb(0, 30, 255)'))
)
trace2 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.netf,
mode="lines",
name='Filtered Net Sentiment',
line = dict(color = ('rgb(255, 80, 0)')),
yaxis='y2'
)
data = [trace1, trace2]
layout = go.Layout(
title=model_criterion,
yaxis=dict(
title='Price'
),
yaxis2=dict(
title='Net Sentiment',
titlefont=dict(
color='rgb(255, 80, 0)'
),
tickfont=dict(
color='rgb(255, 80, 0)'
),
overlaying='y',
side='right'
),
legend= go.layout.Legend(
x= 1.05,
y= 1,
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename=model_criterion)
model_criterion = "Smooth Trend Model"
model_fit_pos = fit_statespace_model(model_criterion, "pos")
model_fit_neg = fit_statespace_model(model_criterion, "neg")
model_fit_neutral = fit_statespace_model(model_criterion, "neutral")
model_all = pd.concat([model_fit_pos['data'], model_fit_neg['data'], model_fit_neutral['data']], axis=1)
model_all = model_all.assign(netf = model_all.apply(lambda x : (x['posf']-x['negf'])*(1. - x['neutralf']) , axis=1))
sentiment_price_data = pd.merge(model_all,price_data,left_index=True, right_index=True, how="right")
sentiment_price_data = sentiment_price_data.fillna(method='ffill')
sentiment_price_data = sentiment_price_data.assign(net = sentiment_price_data.apply(lambda x : (x['pos']-x['neg'])*(1. - x['neutral']) , axis=1))
trace1 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.BIDU,
mode="lines",
name='BIDU Price',
line = dict(color = ('rgb(0, 30, 255)'))
)
trace2 = go.Scatter(
x=sentiment_price_data.index,
y=sentiment_price_data.netf,
mode="lines",
name='Filtered Net Sentiment',
line = dict(color = ('rgb(255, 80, 0)')),
yaxis='y2'
)
data = [trace1, trace2]
layout = go.Layout(
title=model_criterion,
yaxis=dict(
title='Price'
),
yaxis2=dict(
title='Net Sentiment',
titlefont=dict(
color='rgb(255, 80, 0)'
),
tickfont=dict(
color='rgb(255, 80, 0)'
),
overlaying='y',
side='right'
),
legend= go.layout.Legend(
x= 1.05,
y= 1,
)
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename=model_criterion)