# ionization degree alpha calculated from the Henderson-Hasselbalch equation for an ideal system
def ideal_alpha(pH, pK):
    return 1. / (1 + 10**(pK - pH))


%matplotlib inline
import matplotlib.pyplot as plt
plt.rcParams.update({'font.size': 18})

import numpy as np
import pint  # module for working with units and dimensions
import time

import espressomd
espressomd.assert_features(['WCA', 'ELECTROSTATICS'])
import espressomd.electrostatics
import espressomd.reaction_methods
import espressomd.polymer
import espressomd.zn
from espressomd.interactions import HarmonicBond


ureg = pint.UnitRegistry()


TEMPERATURE = 298 * ureg.K
WATER_PERMITTIVITY = 78.5 # at 298 K
KT = TEMPERATURE * ureg.k_B
PARTICLE_SIZE = 0.355 * ureg.nm
BJERRUM_LENGTH = (ureg.e**2 / (4 * ureg.pi * ureg.eps0 * WATER_PERMITTIVITY * KT)).to('nm')

ureg.define(f'reduced_energy = {TEMPERATURE} * boltzmann_constant')
ureg.define(f'reduced_length = {PARTICLE_SIZE}')
ureg.define(f'reduced_charge = 1*e')

# store the values of some parameters in dimensionless reduced units, that will be later passed to ESPResSo
KT_REDUCED = KT.to('reduced_energy').magnitude
BJERRUM_LENGTH_REDUCED = BJERRUM_LENGTH.to('reduced_length').magnitude
PARTICLE_SIZE_REDUCED = PARTICLE_SIZE.to('reduced_length').magnitude

print("KT = {KT.to('reduced_energy'):4.3f}")
print("PARTICLE_SIZE = {PARTICLE_SIZE.to('reduced_length'):4.3f}")
print("BJERRUM_LENGTH = {BJERRUM_LENGTH.to('reduced_length'):4.3f}")

KT = {KT.to('reduced_energy'):4.3f}
PARTICLE_SIZE = {PARTICLE_SIZE.to('reduced_length'):4.3f}
BJERRUM_LENGTH = {BJERRUM_LENGTH.to('reduced_length'):4.3f}


# Parameters that define properties of the system
N_ACID = 20
C_ACID = 1e-3 * ureg.molar
C_SALT = 2 * C_ACID
pKa = 4.88  # acidity constant
pKw = 14.0  # autoprotolysis constant of water


# Number of pH values that we want to sample
NUM_PHS = 15  # number of pH values
OFFSET = 2.0  # range of pH values to be used = pKa +/- offset

pHmin = pKa - 0.5 * OFFSET  # lowest pH value to be used
pHmax = pKa + 1.5 * OFFSET  # highest pH value to be used
pHs = np.linspace(pHmin, pHmax, NUM_PHS)  # list of pH values


# Simulate an interacting system with steric repulsion (Warning: it will be slower than without WCA!)
USE_WCA = True
# Simulate an interacting system with electrostatics 
USE_ELECTROSTATICS = False
# By default, we use the Debye-Huckel potential because it allows short runtime of the tutorial
# You can also use the P3M method for full electrostatics (Warning: it will be much slower than DH!)
USE_P3M = False

if USE_ELECTROSTATICS:
    assert USE_WCA, "Use electrostatics only with a short-range repulsive potential. Otherwise, singularity occurs at zero distance."


N_BLOCKS = 16  # number of block to be used in data analysis
DESIRED_BLOCK_SIZE = 10  # desired number of samples per block

PROB_INTEGRATION = 0.6 # Probability of running MD integration after the reaction move. 
# This parameter changes the speed of convergence but not the limiting result
# to which the simulation converges

# number of reaction samples per each pH value
NUM_SAMPLES = int(N_BLOCKS * DESIRED_BLOCK_SIZE)


BOX_V = (N_ACID / (ureg.avogadro_constant * C_ACID)).to("nm^3")
BOX_L = np.cbrt(BOX_V)
BOX_L_REDUCED = BOX_L.to('reduced_length').magnitude

KAPPA = np.sqrt(C_SALT.to('mol/L').magnitude) / 0.304 / ureg.nm
KAPPA_REDUCED = KAPPA.to('1/reduced_length').magnitude
print(f"KAPPA = {KAPPA:.3f}")
print(f"KAPPA_REDUCED = {KAPPA_REDUCED:.3f}")
print(f"Debye_length: {1. / KAPPA:.2f} = {(1. / KAPPA).to('reduced_length'):.2f}")

N_SALT = int((C_SALT * BOX_V * ureg.avogadro_constant))
print("Calculated values:")
print(f"BOX_L = {BOX_L:.3g} = {BOX_L.to('reduced_length'):.3g}")
print(f"BOX_V  = {BOX_V:.3g} = {BOX_V.to('reduced_length^3'):.3g}")
print(f"N_SALT = {N_SALT}")

KAPPA = 0.147 / nanometer
KAPPA_REDUCED = 0.052
Debye_length: 6.80 nanometer = 19.15 reduced_length
Calculated values:
BOX_L = 32.1 nanometer = 90.5 reduced_length
BOX_V  = 3.32e+04 nanometer ** 3 = 7.42e+05 reduced_length ** 3
N_SALT = 40


# particle types of different species
TYPES = {
    "HA": 0,
    "A": 1,
    "B": 2,
    "Na": 3,
    "Cl": 4,
}
# particle charges of different species
CHARGES = {
    "HA": (0 * ureg.e).to("reduced_charge").magnitude,
    "A": (-1 * ureg.e).to("reduced_charge").magnitude,
    "B": (+1 * ureg.e).to("reduced_charge").magnitude,
    "Na": (+1 * ureg.e).to("reduced_charge").magnitude,
    "Cl": (-1 * ureg.e).to("reduced_charge").magnitude,
}


system = espressomd.System(box_l=[BOX_L_REDUCED] * 3)
system.time_step = 0.01
system.cell_system.skin = 2.0
np.random.seed(seed=10)  # initialize the random number generator in numpy


# we need to define bonds before creating polymers
hb = HarmonicBond(k=30, r_0=1.0)
system.bonded_inter.add(hb)

# create the polymer positions
polymers = espressomd.polymer.linear_polymer_positions(n_polymers=1,
                                                       beads_per_chain=N_ACID,
                                                       bond_length=0.9, 
                                                       seed=23)

# add the polymer particles composed of ionizable acid groups, initially in the ionized state
for polymer in polymers:
    prev_particle = None
    for position in polymer:
        p = system.part.add(pos=position, type=TYPES["A"], q=CHARGES["A"])
        if prev_particle:
            p.add_bond((hb, prev_particle))
        prev_particle = p

# add the corresponding number of H+ ions
system.part.add(pos=np.random.random((N_ACID, 3)) * BOX_L_REDUCED,
                type=[TYPES["B"]] * N_ACID,
                q=[CHARGES["B"]] * N_ACID)

# add salt ion pairs
system.part.add(pos=np.random.random((N_SALT, 3)) * BOX_L_REDUCED,
                type=[TYPES["Na"]] * N_SALT,
                q=[CHARGES["Na"]] * N_SALT)
system.part.add(pos=np.random.random((N_SALT, 3)) * BOX_L_REDUCED,
                type=[TYPES["Cl"]] * N_SALT,
                q=[CHARGES["Cl"]] * N_SALT)

print(f"The system contains {len(system.part)} particles")

The system contains 120 particles


if USE_WCA:
    # set the WCA interaction between all particle pairs
    wca_sigma = 1.0
    wca_epsilon = 1.0
    for type_1 in TYPES.values():
        for type_2 in TYPES.values():
            if type_1 >= type_2:
                system.non_bonded_inter[type_1, type_2].wca.set_params(epsilon=wca_epsilon, sigma=wca_sigma)

    # relax the overlaps with steepest descent
    mass = 1.0
    FMAX = 0.01 * wca_sigma * mass / system.time_step**2

    system.integrator.set_steepest_descent(
        f_max=FMAX,
        gamma=0.1,
        max_displacement=0.1)

    system.integrator.run(5000)
    assert np.all(np.abs(system.part.all().f)<FMAX), "Overlap removal did not converge!"

    # to switch back to velocity Verlet
    system.integrator.set_vv()

# add thermostat and short integration to let the system relax further
system.thermostat.set_langevin(kT=KT_REDUCED, gamma=1.0, seed=7)
system.integrator.run(1000)

if USE_ELECTROSTATICS:
    COULOMB_PREFACTOR=BJERRUM_LENGTH_REDUCED * KT_REDUCED
    if USE_P3M:
        coulomb = espressomd.electrostatics.P3M(prefactor=COULOMB_PREFACTOR,
                                                accuracy=1e-3)
    else:
        coulomb = espressomd.electrostatics.DH(prefactor=COULOMB_PREFACTOR,
                                               kappa=KAPPA_REDUCED,
                                               r_cut=1. / KAPPA_REDUCED)

    system.electrostatics.solver = coulomb
else:
    # this speeds up the simulation of dilute systems with small particle numbers
    system.cell_system.set_n_square()


# SOLUTION CELL
exclusion_range = PARTICLE_SIZE_REDUCED if USE_WCA else 0.0
RE = espressomd.reaction_methods.ConstantpHEnsemble(
    kT=KT_REDUCED,
    exclusion_range=exclusion_range,
    seed=77,
    constant_pH=2  # temporary value
)
RE.set_non_interacting_type(type=len(TYPES)) # this parameter helps speed up the calculation in an interacting system


# SOLUTION CELL
RE.add_reaction(
    gamma=10**(-pKa),
    reactant_types=[TYPES["HA"]],
    product_types=[TYPES["A"], TYPES["B"]],
    default_charges={TYPES["HA"]: CHARGES["HA"],
                     TYPES["A"]: CHARGES["A"],
                     TYPES["B"]: CHARGES["B"]}
)


# SOLUTION CELL
def equilibrate_reaction(reaction_steps=1):
    RE.reaction(steps=reaction_steps)


color = {TYPES["HA"]: "#7fc454", #green
         TYPES["A"]: "#225204", #dark green
         TYPES["B"]: "#fca000", #orange
         TYPES["Na"]: "#ff0000", #red
         TYPES["Cl"]: "#030ffc" #blue
        }
radii = {TYPES["HA"]: 2,
         TYPES["A"]: 2,
         TYPES["B"]: 2,
         TYPES["Na"]: 2,
         TYPES["Cl"]: 2
        }

vis = espressomd.zn.Visualizer(system, colors=color, radii=radii)
vis.update()

<MagicMock name='mock.Visualizer().update()' id='129434531109328'>


# SOLUTION CELL
def perform_sampling(type_A, num_samples, num_As:np.ndarray, reaction_steps,
                     prob_integration=0.5, integration_steps=1000):
    for i in range(num_samples):
        if USE_WCA and np.random.random() < prob_integration:
            for _ in range(integration_steps):
                system.integrator.run(1)
            global vis
            vis.update()
        # we should do at least one reaction attempt per reactive particle
        RE.reaction(steps=reaction_steps)
        num_As[i] = system.number_of_particles(type=type_A)


# empty numpy array as a placeholder for collecting the data
num_As_at_each_pH = -np.ones((len(pHs), NUM_SAMPLES))  # number of A- species observed at each sample

# run a productive simulation and collect the data
print(f"Simulated pH values: {pHs}")
for ipH, pH in enumerate(pHs):
    print(f"Run pH {pH:.2f} ...")
    RE.constant_pH = pH  # set new pH value
    start_time = time.time()
    equilibrate_reaction(reaction_steps=N_ACID + 1)  # pre-equilibrate the reaction to the new pH value
    perform_sampling(type_A=TYPES["A"],
                     num_samples=NUM_SAMPLES, 
                     num_As=num_As_at_each_pH[ipH, :],
                     reaction_steps=N_ACID + 1,
                     prob_integration=PROB_INTEGRATION)  # perform sampling / run production simulation
    runtime = (time.time() - start_time) * ureg.s
    print(f"runtime: {runtime:.2g};",
          f"measured number of A-: {np.mean(num_As_at_each_pH[ipH]):.2f}", 
          f"(ideal: {N_ACID * ideal_alpha(pH, pKa):.2f})", 
          )
print("\nDONE")

Simulated pH values: [3.88       4.16571429 4.45142857 4.73714286 5.02285714 5.30857143
 5.59428571 5.88       6.16571429 6.45142857 6.73714286 7.02285714
 7.30857143 7.59428571 7.88      ]
Run pH 3.88 ...
runtime: 2.9 second; measured number of A-: 1.61 (ideal: 1.82)
Run pH 4.17 ...
runtime: 2.8 second; measured number of A-: 3.26 (ideal: 3.24)
Run pH 4.45 ...
runtime: 2.7 second; measured number of A-: 5.21 (ideal: 5.43)
Run pH 4.74 ...
runtime: 2.7 second; measured number of A-: 8.35 (ideal: 8.37)
Run pH 5.02 ...
runtime: 3.2 second; measured number of A-: 11.66 (ideal: 11.63)
Run pH 5.31 ...
runtime: 3 second; measured number of A-: 14.67 (ideal: 14.57)
Run pH 5.59 ...
runtime: 3.1 second; measured number of A-: 16.84 (ideal: 16.76)
Run pH 5.88 ...
runtime: 3 second; measured number of A-: 18.14 (ideal: 18.18)
Run pH 6.17 ...
runtime: 2.9 second; measured number of A-: 18.84 (ideal: 19.02)
Run pH 6.45 ...
runtime: 2.8 second; measured number of A-: 19.39 (ideal: 19.48)
Run pH 6.74 ...
runtime: 2.8 second; measured number of A-: 19.77 (ideal: 19.73)
Run pH 7.02 ...
runtime: 3 second; measured number of A-: 19.86 (ideal: 19.86)
Run pH 7.31 ...
runtime: 3 second; measured number of A-: 19.96 (ideal: 19.93)
Run pH 7.59 ...
runtime: 2.6 second; measured number of A-: 19.96 (ideal: 19.96)
Run pH 7.88 ...
runtime: 2.7 second; measured number of A-: 19.96 (ideal: 19.98)

DONE


# statistical analysis of the results
def block_analyze(input_data, n_blocks=16):
    data = np.asarray(input_data)
    block = 0
    # this number of blocks is recommended by Janke as a reasonable compromise
    # between the conflicting requirements on block size and number of blocks
    block_size = int(data.shape[1] / n_blocks)
    print(f"block_size: {block_size}")
    # initialize the array of per-block averages
    block_average = np.zeros((n_blocks, data.shape[0]))
    # calculate averages per each block
    for block in range(n_blocks):
        block_average[block] = np.average(data[:, block * block_size: (block + 1) * block_size], axis=1)
    # calculate the average and average of the square
    av_data = np.average(data, axis=1)
    av2_data = np.average(data * data, axis=1)
    # calculate the variance of the block averages
    block_var = np.var(block_average, axis=0)
    # calculate standard error of the mean
    err_data = np.sqrt(block_var / (n_blocks - 1))
    # estimate autocorrelation time using the formula given by Janke
    # this assumes that the errors have been correctly estimated
    tau_data = np.zeros(av_data.shape)
    for val in range(av_data.shape[0]):
        if av_data[val] == 0:
            # unphysical value marks a failure to compute tau
            tau_data[val] = -1.0
        else:
            tau_data[val] = 0.5 * block_size * n_blocks / (n_blocks - 1) * block_var[val] \
                / (av2_data[val] - av_data[val] * av_data[val])
    return av_data, err_data, tau_data, block_size


# estimate the statistical error and the autocorrelation time using the formula given by Janke
av_num_As, err_num_As, tau, block_size = block_analyze(num_As_at_each_pH, N_BLOCKS)
print(f"av = {av_num_As}")
print(f"err = {err_num_As}")
print(f"tau = {tau}")

# calculate the average ionization degree
av_alpha = av_num_As / N_ACID
err_alpha = err_num_As / N_ACID

# plot the simulation results compared with the ideal titration curve
plt.figure(figsize=(10, 6), dpi=80)
plt.errorbar(pHs - pKa, av_alpha, err_alpha, marker='o', linestyle='none',
             label=r"simulation")
pHs2 = np.linspace(pHmin, pHmax, num=50)
plt.plot(pHs2 - pKa, ideal_alpha(pHs2, pKa), label=r"Henderson-Hasselbalch")
plt.xlabel(r'$\mathrm{pH} - \mathrm{p}K_{\mathrm{A}}$', fontsize=16)
plt.ylabel(r'$\alpha$', fontsize=16)
plt.legend(fontsize=16)
plt.show()

block_size: 10
av = [ 1.6125   3.2625   5.20625  8.35    11.6625  14.66875 16.8375  18.1375
 18.84375 19.39375 19.775   19.8625  19.95625 19.95625 19.9625 ]
err = [0.0625     0.10363196 0.09893969 0.17199806 0.09123002 0.14222012
 0.12209115 0.10522793 0.0605315  0.04784415 0.03708099 0.03400368
 0.01280869 0.01818596 0.0125    ]
tau = [0.21369804 0.38294217 0.23722356 0.46841498 0.14719123 0.45625668
 0.47726846 0.4015393  0.27736093 0.31780896 0.44110276 0.64417845
 0.31372549 0.63243075 0.34632035]


# check if the blocks contain enough data for reliable error estimates
print(f"uncorrelated samples per block:\nblock_size/tau = {block_size / tau}")
threshold = 10  # block size should be much greater than the correlation time
if np.any(block_size / tau < threshold):
    print(f"\nWarning: some blocks may contain less than {threshold} uncorrelated samples."
          "\nYour error estimated may be unreliable."
          "\nPlease, check them using a more sophisticated method or run a longer simulation.")
    print(f"? block_size/tau > threshold ? : {block_size / tau > threshold}")
else:
    print(f"\nAll blocks seem to contain more than {threshold} uncorrelated samples."
          "Error estimates should be OK.")

uncorrelated samples per block:
block_size/tau = [46.795      26.1136033  42.15432961 21.34859155 67.93882979 21.91748745
 20.95256813 24.90416275 36.05410448 31.46544369 22.67045455 15.52364865
 31.875      15.81200787 28.875     ]

All blocks seem to contain more than 10 uncorrelated samples.Error estimates should be OK.


# plot the deviations from the ideal result
plt.figure(figsize=(10, 6), dpi=80)
ylim = np.amax(abs(av_alpha - ideal_alpha(pHs, pKa)))
plt.ylim((-1.5 * ylim, 1.5 * ylim))
plt.errorbar(pHs - pKa, av_alpha - ideal_alpha(pHs, pKa),
             err_alpha, marker='o', linestyle='none', label=r"simulation")
plt.plot(pHs - pKa, 0.0 * ideal_alpha(pHs, pKa), label=r"Henderson-Hasselbalch")
plt.xlabel(r'$\mathrm{pH} - \mathrm{p}K_{\mathrm{A}}$', fontsize=16)
plt.ylabel(r'$\alpha - \alpha_{ideal}$', fontsize=16)
plt.legend(fontsize=16)
plt.show()


# average concentration of B+ is the same as the concentration of A-
av_c_Bplus = av_alpha * C_ACID
err_c_Bplus = err_alpha * C_ACID  # error in the average concentration

full_pH_range = np.linspace(2, 12, 100)
ideal_c_Aminus = ideal_alpha(full_pH_range, pKa) * C_ACID
ideal_c_OH = np.power(10.0, -(pKw - full_pH_range))*ureg.molar
ideal_c_H = np.power(10.0, -full_pH_range)*ureg.molar
# ideal_c_M is calculated from electroneutrality
ideal_c_M = (ideal_c_Aminus + ideal_c_OH - ideal_c_H)

# plot the simulation results compared with the ideal results of the cations
plt.figure(figsize=(10, 6), dpi=80)
plt.errorbar(pHs,
             av_c_Bplus.magnitude,
             err_c_Bplus.magnitude,
             marker='o', c="tab:blue", linestyle='none',
             label=r"measured $c_{\mathrm{B^+}}$", zorder=2)
plt.plot(full_pH_range, 
         ideal_c_H.magnitude, 
         c="tab:green",
         label=r"ideal $c_{\mathrm{H^+}}$", 
         zorder=0)
plt.plot(full_pH_range[np.nonzero(ideal_c_M.magnitude > 0.)], 
         ideal_c_M.magnitude[np.nonzero(ideal_c_M.magnitude > 0.)], 
         c="tab:orange",
         label=r"ideal $c_{\mathrm{M^+}}$", 
         zorder=0)
plt.plot(full_pH_range, 
         ideal_c_Aminus.magnitude, 
         c="tab:blue", 
         ls=(0, (5, 5)),
         label=r"ideal $c_{\mathrm{A^-}}$", 
         zorder=1)
plt.yscale("log")
plt.ylim(5e-6,1e-2)
plt.xlabel('input pH', fontsize=16)
plt.ylabel(r'concentration $c$ $[\mathrm{mol/L}]$', fontsize=16)
plt.legend(fontsize=16)
plt.show()


ideal_c_X = -(ideal_c_Aminus + ideal_c_OH - ideal_c_H)

ideal_ionic_strength = 0.5 * \
    (ideal_c_X + ideal_c_M + ideal_c_H + ideal_c_OH + 2 * C_SALT)
# in constant-pH simulation ideal_c_Aminus = ideal_c_Bplus
cpH_ionic_strength = 0.5 * (ideal_c_Aminus + 2 * C_SALT)
cpH_ionic_strength_measured = 0.5 * (av_c_Bplus + 2 * C_SALT)
cpH_error_ionic_strength_measured = 0.5 * err_c_Bplus

plt.figure(figsize=(10, 6), dpi=80)
plt.errorbar(pHs,
             cpH_ionic_strength_measured.magnitude,
             cpH_error_ionic_strength_measured.magnitude,
             c="tab:blue",
             linestyle='none', marker='o',
             label=r"measured", zorder=3)
plt.plot(full_pH_range,
         cpH_ionic_strength.magnitude,
         c="tab:blue",
         ls=(0, (5, 5)),
         label=r"constant-pH", zorder=2)
plt.plot(full_pH_range,
         ideal_ionic_strength.magnitude,
         c="tab:orange",
         linestyle='-',
         label=r"Henderson-Hasselbalch", zorder=1)

plt.ylim(1.8e-3,3e-3)
plt.xlabel('input pH', fontsize=16)
plt.ylabel(r'Ionic Strength [$\mathrm{mol/L}$]', fontsize=16)
plt.legend(fontsize=16)
plt.show()

The constant-pH ensemble method for acid-base reactions¶

Expected prior knowledge¶

Introduction¶

The chemical equilibrium and reaction constant¶

The constant pH method¶

Simulation setup¶

Set the reduced units of energy and length¶

Set the key physical parameters that uniquely define the system¶

Set the range of parameters that we want to vary¶

Choose which interactions should be activated¶

Set the number of samples to be collected¶

Calculate the dependent parameters¶

Set the particle types and charges¶

Initialize the ESPResSo system¶

Set up particles and bonded-interactions¶

Set up non-bonded-interactions¶

Set up the constant pH ensemble using the reaction ensemble module¶

Run the simulations¶

Results¶

Statistical analysis of the data¶

The Neutralizing Ion $\mathrm{B^+}$¶

Suggested problems for further work¶

References¶