import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 18})
import sys
import logging
logging.basicConfig(level=logging.INFO, stream=sys.stdout)

np.random.seed(43)

def ar_1_process(n_samples, c, phi, eps):
    '''
    Generate a correlated random sequence with the AR(1) process.

    Parameters
    ----------
    n_samples: :obj:`int`
        Sample size.
    c: :obj:`float`
        Constant term.
    phi: :obj:`float`
        Correlation magnitude.
    eps: :obj:`float`
        Shock magnitude.
    '''
    ys = np.zeros(n_samples)
    if abs(phi) >= 1:
        raise ValueError("abs(phi) must be smaller than 1.")
    # draw initial value from normal distribution with known mean and variance
    ys[0] = np.random.normal(loc=c / (1 - phi), scale=np.sqrt(eps**2 / (1 - phi**2)))
    for i in range(1, n_samples):
        ys[i] = c + phi * ys[i - 1] + np.random.normal(loc=0., scale=eps)
    return ys

# generate simulation data using the AR(1) process

logging.info("Generating data sets for the tutorial ...")

N_SAMPLES = 100000

C_1 = 2.0
PHI_1 = 0.85
EPS_1 = 2.0
time_series_1 = ar_1_process(N_SAMPLES, C_1, PHI_1, EPS_1)

C_2 = 0.05
PHI_2 = 0.999
EPS_2 = 1.0
time_series_2 = ar_1_process(N_SAMPLES, C_2, PHI_2, EPS_2)

logging.info("Done")

INFO:root:Generating data sets for the tutorial ...
INFO:root:Done


fig = plt.figure(figsize=(10, 6))
plt.title("The first 1000 samples of both time series")
plt.plot(time_series_1[0:1000], label="time series 1")
plt.plot(time_series_2[0:1000], label="time series 2")
plt.xlabel("$i$")
plt.ylabel("$X_i$")
plt.legend()
plt.show()


# SOLUTION CELL
# naive Python solution
autocov = np.zeros(300)
avg = np.average(time_series_1)
for j in range(300):
    temp = 0.
    for i in range(N_SAMPLES - j):
        temp += (time_series_1[i] - avg) * (time_series_1[i + j] - avg)
    autocov[j] = temp / N_SAMPLES

fig = plt.figure(figsize=(10, 6))
plt.plot(autocov)
plt.xlabel("lag time $j$")
plt.ylabel(r"$\hat{K}^{XX}_j$")
plt.show()


# Numpy solution
time_series_1_centered = time_series_1 - np.average(time_series_1)
autocov = np.empty(1000)

for j in range(1000):
    autocov[j] = np.dot(time_series_1_centered[:N_SAMPLES - j], time_series_1_centered[j:])
autocov /= N_SAMPLES

fig = plt.figure(figsize=(10, 6))
plt.gca().axhline(0, color="gray", linewidth=1)
plt.plot(autocov)
plt.xlabel("lag time $j$")
plt.ylabel(r"$\hat{K}^{XX}_j$")
plt.show()


from scipy.optimize import curve_fit

def exp_fnc(x, a, b):
    return a * np.exp(-x / b)

N_MAX = 1000
j = np.arange(1, N_MAX)
j_log = np.logspace(0, 3, 100)
popt, pcov = curve_fit(exp_fnc, j, autocov[1:N_MAX], p0=[15, 10])

# compute analytical ACF of AR(1) process
AN_SIGMA_1 = np.sqrt(EPS_1 ** 2 / (1 - PHI_1 ** 2))
AN_TAU_EXP_1 = -1 / np.log(PHI_1)
an_acf_1 = AN_SIGMA_1**2 * np.exp(-j / AN_TAU_EXP_1)

fig = plt.figure(figsize=(10, 6))
plt.plot(j, autocov[1:N_MAX], "x", label="numerical ACF")
plt.plot(j, an_acf_1, "-.", linewidth=3, label="analytical ACF")
plt.plot(j_log, exp_fnc(j_log, popt[0], popt[1]), label="exponential fit")
plt.xlim((1, N_MAX))
plt.xscale("log")
plt.xlabel("lag time $j$")
plt.ylabel(r"$\hat{K}^{XX}_j$")
plt.legend()
plt.show()

print(f"Exponential autocorrelation time: {popt[1]:.2f} sampling intervals")

Exponential autocorrelation time: 6.15 sampling intervals


# compute the ACF
acf = autocov / autocov[0]

# integrate the ACF (suffix _v for vectors)
j_max_v = np.arange(1000)
tau_int_v = np.zeros(1000)
for j_max in j_max_v:
    tau_int_v[j_max] = 0.5 + np.sum(acf[1:j_max + 1])

# plot
fig = plt.figure(figsize=(10, 6))
plt.plot(j_max_v[1:], tau_int_v[1:], label="numerical summing")
plt.plot(j_max_v[(1, -1),], np.repeat(AN_TAU_EXP_1, 2), "-.", label="analytical")
plt.xscale("log")
plt.xlabel(r"sum length $j_\mathrm{max}$")
plt.ylabel(r"$\hat{\tau}_{X, \mathrm{int}}$")
plt.legend()
plt.show()


C = 5.0

# determine j_max
j_max = 0
while j_max < C * tau_int_v[j_max]:
    j_max += 1


# plot
fig = plt.figure(figsize=(10, 6))
plt.plot(j_max_v[1:], C * tau_int_v[1:])
plt.plot(j_max_v[1:], j_max_v[1:])
plt.plot([j_max], [C * tau_int_v[j_max]], "ro")
plt.xscale("log")
plt.ylim((0, 50))
plt.xlabel(r"sum length $j_\mathrm{max}$")
plt.ylabel(r"$C \times \hat{\tau}_{X, \mathrm{int}}$")
plt.show()

print(f"j_max = {j_max}")

j_max = 31


tau_int = tau_int_v[j_max]
print(f"Integrated autocorrelation time: {tau_int:.2f} time steps\n")

N_eff = N_SAMPLES / (2 * tau_int)
print(f"Original number of samples: {N_SAMPLES}")
print(f"Effective number of samples: {N_eff:.1f}")
print(f"Ratio: {N_eff / N_SAMPLES:.3f}\n")

sem = np.sqrt(autocov[0] / N_eff)
print(f"Standard error of the mean: {sem:.4f}")

Integrated autocorrelation time: 6.10 time steps

Original number of samples: 100000
Effective number of samples: 8196.2
Ratio: 0.082

Standard error of the mean: 0.0419


# SOLUTION CELL
def autocorrelation_analysis(data, C, window):
    # initial processing
    data_size = len(data)
    avg = np.average(data)
    data_centered = data - avg

    # auto-covariance function
    autocov = np.empty(window)
    for j in range(window):
        autocov[j] = np.dot(data_centered[:data_size - j], data_centered[j:])
    autocov /= data_size

    # autocorrelation function
    acf = autocov / autocov[0]

    # integrate autocorrelation function
    j_max_v = np.arange(window)
    tau_int_v = np.zeros(window)
    for j_max in j_max_v:
        tau_int_v[j_max] = 0.5 + np.sum(acf[1:j_max + 1])

    # find j_max
    j_max = 0
    while j_max < C * tau_int_v[j_max]:
        j_max += 1

    # wrap it up
    tau_int = tau_int_v[j_max]
    N_eff = data_size / (2 * tau_int)
    sem = np.sqrt(autocov[0] / N_eff)

    # create ACF plot
    fig = plt.figure(figsize=(10, 6))
    plt.gca().axhline(0, color="gray",linewidth=1)
    plt.plot(acf)
    plt.xlabel("lag time $j$")
    plt.ylabel(r"$\hat{K}^{XX}_j$")
    plt.show()

    # create integrated ACF plot
    fig = plt.figure(figsize=(10, 6))
    plt.plot(j_max_v[1:], C * tau_int_v[1:])
    plt.ylim(plt.gca().get_ylim()) # explicitly keep the limits of the first plot
    plt.plot(j_max_v[1:], j_max_v[1:])
    plt.plot([j_max], [C * tau_int_v[j_max]], "ro")
    plt.xscale("log")
    plt.xlabel(r"sum length $j_\mathrm{max}$")
    plt.ylabel(r"$C \times \hat{\tau}_{X, \mathrm{int}}$")
    plt.title("")
    plt.show()

    # print out stuff
    print(f"Mean value: {avg:.4f}")
    print(f"Standard error of the mean: {sem:.4f}")
    print(f"Integrated autocorrelation time: {tau_int:.2f} time steps")
    print(f"Effective number of samples: {N_eff:.1f}")

    return sem


sem_2 = autocorrelation_analysis(time_series_2, 5, 20000)

Mean value: 43.1782
Standard error of the mean: 2.7456
Integrated autocorrelation time: 701.00 time steps
Effective number of samples: 71.3

Tutorial: Error Estimation - Part 2 (Autocorrelation Analysis)¶

Table of contents¶

Data generation¶

Introduction¶

Computing the auto-covariance function¶

Exercise¶

Autocorrelation time¶

Exercise¶

Exercise¶

References¶