from statsmodels.datasets.macrodata import load_pandas
from statsmodels.tsa.base.datetools import dates_from_range
from statsmodels.tsa.arima_model import ARIMA
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm
plt.interactive(False)

# let's examine an ARIMA model of CPI

cpi = load_pandas().data['cpi']
dates = dates_from_range('1959q1', '2009q3')
cpi.index = dates

res = ARIMA(cpi, (1, 1, 1), freq='Q').fit()
print res.summary()

# we can look at the series
cpi.diff().plot()

# maybe logs are better
log_cpi = np.log(cpi)

# check the ACF and PCF plots
acf, confint_acf = sm.tsa.acf(log_cpi.diff().values[1:], confint=95)
# center the confidence intervals about zero
#confint_acf -= confint_acf.mean(1)[:, None]
pacf = sm.tsa.pacf(log_cpi.diff().values[1:], method='ols')
# confidence interval is now an option to pacf
from scipy import stats
confint_pacf = stats.norm.ppf(1 - .025) * np.sqrt(1 / 202.)

fig = plt.figure()
ax = fig.add_subplot(121)
ax.set_title('Autocorrelation')
ax.plot(range(41), acf, 'bo', markersize=5)
ax.vlines(range(41), 0, acf)
ax.fill_between(range(41), confint_acf[:, 0], confint_acf[:, 1], alpha=.25)
fig.tight_layout()
ax = fig.add_subplot(122, sharey=ax)
ax.vlines(range(41), 0, pacf)
ax.plot(range(41), pacf, 'bo', markersize=5)
ax.fill_between(range(41), -confint_pacf, confint_pacf, alpha=.25)


#NOTE: you'll be able to just to this when tsa-plots is in master
#sm.graphics.acf_plot(x, nlags=40)
#sm.graphics.pacf_plot(x, nlags=40)


# still some seasonality
# try an arma(1, 1) with ma(4) term
