import pandas as pd
import numpy as np
import statsmodels.api as sm
import matplotlib.pyplot as plt
#import chart_studio.plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)
import os
import warnings
warnings.filterwarnings('ignore')

data_dir       = "./data/"
price_file     = os.path.join(data_dir, "baidu-prices.csv")
sentiment_file = os.path.join(data_dir, "baidu-scores-history.csv")
price_data     = pd.read_csv(price_file, parse_dates=['Date'])
price_data     = price_data.set_index('Date')
sentiment_data = pd.read_csv(sentiment_file, parse_dates=['emeaTimestamp'])
sentiment_data = sentiment_data.assign(Date = sentiment_data.emeaTimestamp.dt.date)


pos            = sentiment_data.groupby('Date')['sentimentPositive','relevance'].apply(lambda x : np.average(x.sentimentPositive, weights = x.relevance))
neg            = sentiment_data.groupby('Date')['sentimentNegative','relevance'].apply(lambda x : np.average(x.sentimentNegative, weights = x.relevance))                                                                                      
neutral        = sentiment_data.groupby('Date')['sentimentNeutral','relevance'].apply(lambda x : np.average(x.sentimentNeutral, weights = x.relevance))
sentiment_data = pd.concat([pos, neg, neutral], axis=1)
sentiment_data.columns = ['pos','neg','neutral']


def fit_statespace_model(model_criterion, sentiment_type):
    data = sentiment_data[[sentiment_type]]
    if model_criterion=="Raw Data":
        results        = {'sentiment_data':data}
        return(results)
    model_metadata = {}
    if model_criterion=="Local Level Model":
        model_metadata = {'irregular': True, 'level': True, 'stochastic_level': True,
                              'trend': False, 'stochastic_trend': False, 'cycle': False,
                              'damped_cycle': False, 'stochastic_cycle': False}
    if model_criterion=="Deterministic Trend Model":
        model_metadata = {'irregular': True, 'level': True, 'stochastic_level': False,
                              'trend': True, 'stochastic_trend': False, 'cycle': False,
                              'damped_cycle': False, 'stochastic_cycle': False}
    if model_criterion=="Local Level with Deterministic Trend Model":
        model_metadata = {'irregular': True, 'level': True, 'stochastic_level': True,
                              'trend': True, 'stochastic_trend': False, 'cycle': False,
                              'damped_cycle': False, 'stochastic_cycle': False}
    if model_criterion=="Local Linear Trend Model":
        model_metadata = {'irregular': True, 'level': True, 'stochastic_level': True,
                              'trend': True, 'stochastic_trend': True, 'cycle': False,
                              'damped_cycle': False, 'stochastic_cycle': False}
    if model_criterion=="Smooth Trend Model":
        model_metadata= {'irregular': True, 'level': True, 'stochastic_level': False,
                              'trend': True, 'stochastic_trend': True, 'cycle': False,
                              'damped_cycle': False, 'stochastic_cycle': False}
    model          = sm.tsa.UnobservedComponents(data[sentiment_type], **model_metadata)
    fit            = model.fit(method='powell', disp=False)
    summary        = fit.summary()
    data           = data.assign(filtered=fit.level['filtered'])
    data           = data.rename({'filtered':sentiment_type+"f"}, axis=1) 
    results        = {'fit':fit, 'summary': summary,'data':data}
    return(results)


model_criterion = "Local Level Model"
model_fit_pos       = fit_statespace_model(model_criterion, "pos")
model_fit_neg       = fit_statespace_model(model_criterion, "neg")
model_fit_neutral   = fit_statespace_model(model_criterion, "neutral")
model_all           = pd.concat([model_fit_pos['data'], model_fit_neg['data'], model_fit_neutral['data']], axis=1)
model_all           = model_all.assign(netf = model_all.apply(lambda x : (x['posf']-x['negf'])*(1. - x['neutralf']) , axis=1))


sentiment_price_data = pd.merge(model_all,price_data,left_index=True, right_index=True,  how="right")
sentiment_price_data = sentiment_price_data.fillna(method='ffill')
sentiment_price_data = sentiment_price_data.assign(net = sentiment_price_data.apply(lambda x : (x['pos']-x['neg'])*(1. - x['neutral']) , axis=1))


trace1 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.pos,
    mode="lines",
    name='Positive Sentiment',
    line = dict(color = ('rgb(216, 218, 217)'))
)

trace2 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.posf,
    mode="lines",
    name='Filtered Postive Sentiment',
    line = dict(color = ('rgb(255, 80, 0)')),
    yaxis='y2'

)

data = [trace1, trace2]
layout = go.Layout(
    title='Filtered Positive Sentiment',
    yaxis=dict(
        title='Price'
    ),
    yaxis2=dict(
        title='Net Sentiment',
        titlefont=dict(
            color='rgb(255, 80, 0)'
        ),
        tickfont=dict(
            color='rgb(255, 80, 0)'
        ),
        overlaying='y',
        side='right'
    ),
   legend= go.layout.Legend(
     x= 1.05,
    y= 1,
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='filtered-pos-sentiment')


trace1 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.neg,
    mode="lines",
    name='Negative Sentiment',
    line = dict(color = ('rgb(216, 218, 217)'))
)

trace2 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.negf,
    mode="lines",
    name='Filtered Negative Sentiment',
    line = dict(color = ('rgb(255, 80, 0)')),
    yaxis='y2'

)

data = [trace1, trace2]
layout = go.Layout(
    title='Filtered Negative Sentiment',
    yaxis=dict(
        title='Price'
    ),
    yaxis2=dict(
        title='Net Sentiment',
        titlefont=dict(
            color='rgb(255, 80, 0)'
        ),
        tickfont=dict(
            color='rgb(255, 80, 0)'
        ),
        overlaying='y',
        side='right'
    ),
   legend= go.layout.Legend(
     x= 1.05,
    y= 1,
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='filtered-neg-sentiment')


trace1 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.neutral,
    mode="lines",
    name='Neutral Sentiment',
    line = dict(color = ('rgb(216, 218, 217)'))
)

trace2 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.neutralf,
    mode="lines",
    name='Filtered Neutral Sentiment',
    line = dict(color = ('rgb(255, 80, 0)')),
    yaxis='y2'

)

data = [trace1, trace2]
layout = go.Layout(
    title='Filtered Neutral Sentiment',
    yaxis=dict(
        title='Price'
    ),
    yaxis2=dict(
        title='Net Sentiment',
        titlefont=dict(
            color='rgb(255, 80, 0)'
        ),
        tickfont=dict(
            color='rgb(255, 80, 0)'
        ),
        overlaying='y',
        side='right'
    ),
   legend= go.layout.Legend(
     x= 1.05,
    y= 1,
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='filtered-neutral-sentiment')


trace1 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.BIDU,
    mode="lines",
    name='BIDU Price',
    line = dict(color = ('rgb(0, 30, 255)'))
)

trace2 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.net,
    mode="lines",
    name='Net Sentiment',
    line = dict(color = ('rgb(216, 218, 217)')),
    yaxis='y2'

)

data = [trace1, trace2]
layout = go.Layout(
    title='Sentiment Price Overlay',
    yaxis=dict(
        title='Price'
    ),
    yaxis2=dict(
        title='Net Sentiment',
        titlefont=dict(
            color='rgb(255, 80, 0)'
        ),
        tickfont=dict(
            color='rgb(255, 80, 0)'
        ),
        overlaying='y',
        side='right'
    ),
   legend= go.layout.Legend(
     x= 1.05,
    y= 1,
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='price-raw-sentiment-overlay')


trace1 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.BIDU,
    mode="lines",
    name='BIDU Price',
    line = dict(color = ('rgb(0, 30, 255)'))
)

trace2 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.netf,
    mode="lines",
    name='Filtered Net Sentiment',
    line = dict(color = ('rgb(255, 80, 0)')),
    yaxis='y2'

)

data = [trace1, trace2]
layout = go.Layout(
    title='Local Level Model',
    yaxis=dict(
        title='Price'
    ),
    yaxis2=dict(
        title='Net Sentiment',
        titlefont=dict(
            color='rgb(255, 80, 0)'
        ),
        tickfont=dict(
            color='rgb(255, 80, 0)'
        ),
        overlaying='y',
        side='right'
    ),
   legend= go.layout.Legend(
     x= 1.05,
    y= 1,
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename='local-level-model')


print(model_fit_pos['summary'])

                        Unobserved Components Results                         
==============================================================================
Dep. Variable:                    pos   No. Observations:                  964
Model:                    local level   Log Likelihood                 118.392
Date:                Tue, 21 Mar 2023   AIC                           -232.784
Time:                        16:40:17   BIC                           -223.044
Sample:                             0   HQIC                          -229.075
                                - 964                                         
Covariance Type:                  opg                                         
====================================================================================
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
sigma2.irregular     0.0444      0.003     16.949      0.000       0.039       0.050
sigma2.level      3.239e-05   1.93e-05      1.676      0.094   -5.48e-06    7.03e-05
===================================================================================
Ljung-Box (L1) (Q):                   5.09   Jarque-Bera (JB):                35.29
Prob(Q):                              0.02   Prob(JB):                         0.00
Heteroskedasticity (H):               1.12   Skew:                             0.25
Prob(H) (two-sided):                  0.30   Kurtosis:                         2.21
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


print(model_fit_neg['summary'])

                        Unobserved Components Results                         
==============================================================================
Dep. Variable:                    neg   No. Observations:                  964
Model:                    local level   Log Likelihood                  13.322
Date:                Tue, 21 Mar 2023   AIC                            -22.644
Time:                        16:40:17   BIC                            -12.904
Sample:                             0   HQIC                           -18.936
                                - 964                                         
Covariance Type:                  opg                                         
====================================================================================
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
sigma2.irregular     0.0556      0.003     19.283      0.000       0.050       0.061
sigma2.level      2.295e-05   1.66e-05      1.384      0.166   -9.55e-06    5.54e-05
===================================================================================
Ljung-Box (L1) (Q):                  10.89   Jarque-Bera (JB):               127.87
Prob(Q):                              0.00   Prob(JB):                         0.00
Heteroskedasticity (H):               1.05   Skew:                             0.86
Prob(H) (two-sided):                  0.69   Kurtosis:                         2.54
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


print(model_fit_neutral['summary'])

                        Unobserved Components Results                         
==============================================================================
Dep. Variable:                neutral   No. Observations:                  964
Model:                    local level   Log Likelihood                 273.387
Date:                Tue, 21 Mar 2023   AIC                           -542.773
Time:                        16:40:18   BIC                           -533.033
Sample:                             0   HQIC                          -539.065
                                - 964                                         
Covariance Type:                  opg                                         
====================================================================================
                       coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------------
sigma2.irregular     0.0327      0.001     23.127      0.000       0.030       0.035
sigma2.level      4.603e-06   4.85e-06      0.950      0.342    -4.9e-06    1.41e-05
===================================================================================
Ljung-Box (L1) (Q):                   7.02   Jarque-Bera (JB):               133.64
Prob(Q):                              0.01   Prob(JB):                         0.00
Heteroskedasticity (H):               0.63   Skew:                             0.89
Prob(H) (two-sided):                  0.00   Kurtosis:                         3.37
===================================================================================

Warnings:
[1] Covariance matrix calculated using the outer product of gradients (complex-step).


model_criterion = "Deterministic Trend Model"
model_fit_pos       = fit_statespace_model(model_criterion, "pos")
model_fit_neg       = fit_statespace_model(model_criterion, "neg")
model_fit_neutral   = fit_statespace_model(model_criterion, "neutral")
model_all           = pd.concat([model_fit_pos['data'], model_fit_neg['data'], model_fit_neutral['data']], axis=1)
model_all           = model_all.assign(netf = model_all.apply(lambda x : (x['posf']-x['negf'])*(1. - x['neutralf']) , axis=1))
sentiment_price_data = pd.merge(model_all,price_data,left_index=True, right_index=True,  how="right")
sentiment_price_data = sentiment_price_data.fillna(method='ffill')
sentiment_price_data = sentiment_price_data.assign(net = sentiment_price_data.apply(lambda x : (x['pos']-x['neg'])*(1. - x['neutral']) , axis=1))


trace1 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.BIDU,
    mode="lines",
    name='BIDU Price',
    line = dict(color = ('rgb(0, 30, 255)'))
)

trace2 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.netf,
    mode="lines",
    name='Filtered Net Sentiment',
    line = dict(color = ('rgb(255, 80, 0)')),
    yaxis='y2'

)

data = [trace1, trace2]
layout = go.Layout(
    title=model_criterion,
    yaxis=dict(
        title='Price'
    ),
    yaxis2=dict(
        title='Net Sentiment',
        titlefont=dict(
            color='rgb(255, 80, 0)'
        ),
        tickfont=dict(
            color='rgb(255, 80, 0)'
        ),
        overlaying='y',
        side='right'
    ),
   legend= go.layout.Legend(
     x= 1.05,
    y= 1,
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename=model_criterion)


model_criterion = "Local Level with Deterministic Trend Model"
model_fit_pos       = fit_statespace_model(model_criterion, "pos")
model_fit_neg       = fit_statespace_model(model_criterion, "neg")
model_fit_neutral   = fit_statespace_model(model_criterion, "neutral")
model_all           = pd.concat([model_fit_pos['data'], model_fit_neg['data'], model_fit_neutral['data']], axis=1)
model_all           = model_all.assign(netf = model_all.apply(lambda x : (x['posf']-x['negf'])*(1. - x['neutralf']) , axis=1))
sentiment_price_data = pd.merge(model_all,price_data,left_index=True, right_index=True,  how="right")
sentiment_price_data = sentiment_price_data.fillna(method='ffill')
sentiment_price_data = sentiment_price_data.assign(net = sentiment_price_data.apply(lambda x : (x['pos']-x['neg'])*(1. - x['neutral']) , axis=1))


trace1 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.BIDU,
    mode="lines",
    name='BIDU Price',
    line = dict(color = ('rgb(0, 30, 255)'))
)

trace2 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.netf,
    mode="lines",
    name='Filtered Net Sentiment',
    line = dict(color = ('rgb(255, 80, 0)')),
    yaxis='y2'

)

data = [trace1, trace2]
layout = go.Layout(
    title=model_criterion,
    yaxis=dict(
        title='Price'
    ),
    yaxis2=dict(
        title='Net Sentiment',
        titlefont=dict(
            color='rgb(255, 80, 0)'
        ),
        tickfont=dict(
            color='rgb(255, 80, 0)'
        ),
        overlaying='y',
        side='right'
    ),
   legend= go.layout.Legend(
     x= 1.05,
    y= 1,
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename=model_criterion)


model_criterion = "Local Linear Trend Model"
model_fit_pos       = fit_statespace_model(model_criterion, "pos")
model_fit_neg       = fit_statespace_model(model_criterion, "neg")
model_fit_neutral   = fit_statespace_model(model_criterion, "neutral")
model_all           = pd.concat([model_fit_pos['data'], model_fit_neg['data'], model_fit_neutral['data']], axis=1)
model_all           = model_all.assign(netf = model_all.apply(lambda x : (x['posf']-x['negf'])*(1. - x['neutralf']) , axis=1))
sentiment_price_data = pd.merge(model_all,price_data,left_index=True, right_index=True,  how="right")
sentiment_price_data = sentiment_price_data.fillna(method='ffill')
sentiment_price_data = sentiment_price_data.assign(net = sentiment_price_data.apply(lambda x : (x['pos']-x['neg'])*(1. - x['neutral']) , axis=1))


trace1 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.BIDU,
    mode="lines",
    name='BIDU Price',
    line = dict(color = ('rgb(0, 30, 255)'))
)

trace2 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.netf,
    mode="lines",
    name='Filtered Net Sentiment',
    line = dict(color = ('rgb(255, 80, 0)')),
    yaxis='y2'

)

data = [trace1, trace2]
layout = go.Layout(
    title=model_criterion,
    yaxis=dict(
        title='Price'
    ),
    yaxis2=dict(
        title='Net Sentiment',
        titlefont=dict(
            color='rgb(255, 80, 0)'
        ),
        tickfont=dict(
            color='rgb(255, 80, 0)'
        ),
        overlaying='y',
        side='right'
    ),
   legend= go.layout.Legend(
     x= 1.05,
    y= 1,
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename=model_criterion)


model_criterion = "Smooth Trend Model"
model_fit_pos       = fit_statespace_model(model_criterion, "pos")
model_fit_neg       = fit_statespace_model(model_criterion, "neg")
model_fit_neutral   = fit_statespace_model(model_criterion, "neutral")
model_all           = pd.concat([model_fit_pos['data'], model_fit_neg['data'], model_fit_neutral['data']], axis=1)
model_all           = model_all.assign(netf = model_all.apply(lambda x : (x['posf']-x['negf'])*(1. - x['neutralf']) , axis=1))
sentiment_price_data = pd.merge(model_all,price_data,left_index=True, right_index=True,  how="right")
sentiment_price_data = sentiment_price_data.fillna(method='ffill')
sentiment_price_data = sentiment_price_data.assign(net = sentiment_price_data.apply(lambda x : (x['pos']-x['neg'])*(1. - x['neutral']) , axis=1))


trace1 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.BIDU,
    mode="lines",
    name='BIDU Price',
    line = dict(color = ('rgb(0, 30, 255)'))
)

trace2 = go.Scatter(
    x=sentiment_price_data.index,
    y=sentiment_price_data.netf,
    mode="lines",
    name='Filtered Net Sentiment',
    line = dict(color = ('rgb(255, 80, 0)')),
    yaxis='y2'

)

data = [trace1, trace2]
layout = go.Layout(
    title=model_criterion,
    yaxis=dict(
        title='Price'
    ),
    yaxis2=dict(
        title='Net Sentiment',
        titlefont=dict(
            color='rgb(255, 80, 0)'
        ),
        tickfont=dict(
            color='rgb(255, 80, 0)'
        ),
        overlaying='y',
        side='right'
    ),
   legend= go.layout.Legend(
     x= 1.05,
    y= 1,
    )
)
fig = go.Figure(data=data, layout=layout)
iplot(fig, filename=model_criterion)

State Space Models on Sentiment Data- Hands on

Read Price and Sentiment Data¶

Create relevance weighted probabilities¶

Fitting Various State Space Models¶

Local Level Model¶

Computing Filtered Net Sentiment¶

Plot Filtered Positive Sentiment¶

Plot Filtered Negative Sentiment¶

Plot Filtered Neutral Sentiment¶

Plot Raw Sentiment Price Overlay¶

Plot Local Level Model Filtered Sentiment on Price¶

Model Diagnostics¶

Deterministic Trend Model¶

Local Level with Deterministic Trend Model¶

Local Linear Trend Model¶

Smooth Trend Model¶