# ionization degree alpha calculated from the Henderson-Hasselbalch equation for an ideal system
def ideal_alpha(pH, pK):
    return 1. / (1 + 10**(pK - pH))

%matplotlib inline
import matplotlib.pyplot as plt

plt.rcParams.update({'font.size': 18})

import numpy as np
import pkg_resources
import pint  # module for working with units and dimensions
import time
assert pkg_resources.packaging.specifiers.SpecifierSet('>=0.10.1').contains(pint.__version__), \
  f'pint version {pint.__version__} is too old: several numpy operations can cast away the unit'

import espressomd
espressomd.assert_features(['WCA', 'ELECTROSTATICS'])
import espressomd.electrostatics
import espressomd.reaction_methods
import espressomd.polymer
from espressomd.interactions import HarmonicBond

ureg = pint.UnitRegistry()

TEMPERATURE = 298 * ureg.K
WATER_PERMITTIVITY = 78.5 # at 298 K
KT = TEMPERATURE * ureg.k_B
PARTICLE_SIZE = 0.355 * ureg.nm
BJERRUM_LENGTH = (ureg.e**2 / (4 * ureg.pi * ureg.eps0 * WATER_PERMITTIVITY * KT)).to('nm')

ureg.define(f'reduced_energy = {TEMPERATURE} * boltzmann_constant')
ureg.define(f'reduced_length = {PARTICLE_SIZE}')
ureg.define(f'reduced_charge = 1*e')

# store the values of some parameters in dimensionless reduced units, that will be later passed to ESPResSo
KT_REDUCED = KT.to('reduced_energy').magnitude
BJERRUM_LENGTH_REDUCED = BJERRUM_LENGTH.to('reduced_length').magnitude
PARTICLE_SIZE_REDUCED = PARTICLE_SIZE.to('reduced_length').magnitude

print("KT = {KT.to('reduced_energy'):4.3f}")
print("PARTICLE_SIZE = {PARTICLE_SIZE.to('reduced_length'):4.3f}")
print("BJERRUM_LENGTH = {BJERRUM_LENGTH.to('reduced_length'):4.3f}")

KT = {KT.to('reduced_energy'):4.3f}
PARTICLE_SIZE = {PARTICLE_SIZE.to('reduced_length'):4.3f}
BJERRUM_LENGTH = {BJERRUM_LENGTH.to('reduced_length'):4.3f}

# Parameters that define properties of the system
N_ACID = 20
C_ACID = 1e-3 * ureg.molar
C_SALT = 2 * C_ACID
pKa = 4.88 # acidity constant
pKw = 14.0  # autoprotolysis constant of water

# Number of pH values that we want to sample
NUM_PHS = 15  # number of pH values
OFFSET = 2.0  # range of pH values to be used = pKa +/- offset

pHmin = pKa - 0.5 * OFFSET  # lowest pH value to be used
pHmax = pKa + 1.5 * OFFSET  # highest pH value to be used
pHs = np.linspace(pHmin, pHmax, NUM_PHS)  # list of pH values

# Simulate an interacting system with steric repulsion (Warning: it will be slower than without WCA!)
USE_WCA = True
# Simulate an interacting system with electrostatics 
USE_ELECTROSTATICS = False
# By default, we use the Debye-Huckel potential because it allows short runtime of the tutorial
# You can also use the P3M method for full electrostatics (Warning: it will be much slower than DH!)
USE_P3M = False

if USE_ELECTROSTATICS:
    assert USE_WCA, "Use electrostatics only with a short-range repulsive potential. Otherwise, singularity occurs at zero distance."

N_BLOCKS = 16  # number of block to be used in data analysis
DESIRED_BLOCK_SIZE = 10  # desired number of samples per block

PROB_INTEGRATION = 0.6 # Probability of running MD integration after the reaction move. 
# This parameter changes the speed of convergence but not the limiting result
# to which the simulation converges

# number of reaction samples per each pH value
NUM_SAMPLES = int(N_BLOCKS * DESIRED_BLOCK_SIZE)

BOX_V = (N_ACID / (ureg.avogadro_constant * C_ACID)).to("nm^3")
BOX_L = np.cbrt(BOX_V)
BOX_L_REDUCED = BOX_L.to('reduced_length').magnitude

KAPPA = np.sqrt(C_SALT.to('mol/L').magnitude) / 0.304 / ureg.nm
KAPPA_REDUCED = KAPPA.to('1/reduced_length').magnitude
print(f"KAPPA = {KAPPA:.3f}")
print(f"KAPPA_REDUCED = {KAPPA_REDUCED:.3f}")
print(f"Debye_length: {1. / KAPPA:.2f} = {(1. / KAPPA).to('reduced_length'):.2f}")

N_SALT = int((C_SALT * BOX_V * ureg.avogadro_constant))
print("Calculated values:")
print(f"BOX_L = {BOX_L:.3g} = {BOX_L.to('reduced_length'):.3g}")
print(f"BOX_V  = {BOX_V:.3g} = {BOX_V.to('reduced_length^3'):.3g}")
print(f"N_SALT = {N_SALT}")

KAPPA = 0.147 / nanometer
KAPPA_REDUCED = 0.052
Debye_length: 6.80 nanometer = 19.15 reduced_length
Calculated values:
BOX_L = 32.1 nanometer = 90.5 reduced_length
BOX_V  = 3.32e+04 nanometer ** 3 = 7.42e+05 reduced_length ** 3
N_SALT = 40

# particle types of different species
TYPES = {
    "HA": 0,
    "A": 1,
    "B": 2,
    "Na": 3,
    "Cl": 4,
}
# particle charges of different species
CHARGES = {
    "HA": (0 * ureg.e).to("reduced_charge").magnitude,
    "A": (-1 * ureg.e).to("reduced_charge").magnitude,
    "B": (+1 * ureg.e).to("reduced_charge").magnitude,
    "Na": (+1 * ureg.e).to("reduced_charge").magnitude,
    "Cl": (-1 * ureg.e).to("reduced_charge").magnitude,
}

system = espressomd.System(box_l=[BOX_L_REDUCED] * 3)
system.time_step = 0.01
system.cell_system.skin = 2.0
np.random.seed(seed=10)  # initialize the random number generator in numpy

# we need to define bonds before creating polymers
hb = HarmonicBond(k=30, r_0=1.0)
system.bonded_inter.add(hb)

# create the polymer positions
polymers = espressomd.polymer.linear_polymer_positions(n_polymers=1,
                                                       beads_per_chain=N_ACID,
                                                       bond_length=0.9, 
                                                       seed=23)

# add the polymer particles composed of ionizable acid groups, initially in the ionized state
for polymer in polymers:
    prev_particle = None
    for position in polymer:
        p = system.part.add(pos=position, type=TYPES["A"], q=CHARGES["A"])
        if prev_particle:
            p.add_bond((hb, prev_particle))
        prev_particle = p

# add the corresponding number of H+ ions
system.part.add(pos=np.random.random((N_ACID, 3)) * BOX_L_REDUCED,
                type=[TYPES["B"]] * N_ACID,
                q=[CHARGES["B"]] * N_ACID)

# add salt ion pairs
system.part.add(pos=np.random.random((N_SALT, 3)) * BOX_L_REDUCED,
                type=[TYPES["Na"]] * N_SALT,
                q=[CHARGES["Na"]] * N_SALT)
system.part.add(pos=np.random.random((N_SALT, 3)) * BOX_L_REDUCED,
                type=[TYPES["Cl"]] * N_SALT,
                q=[CHARGES["Cl"]] * N_SALT)

print(f"The system contains {len(system.part)} particles")

The system contains 120 particles

if USE_WCA:
    # set the WCA interaction between all particle pairs
    for type_1 in TYPES.values():
        for type_2 in TYPES.values():
            if type_1 >= type_2:
                system.non_bonded_inter[type_1, type_2].wca.set_params(epsilon=1.0, sigma=1.0)

    # relax the overlaps with steepest descent
    system.integrator.set_steepest_descent(f_max=0, gamma=0.1, max_displacement=0.1)
    system.integrator.run(20)
    system.integrator.set_vv()  # to switch back to velocity Verlet

# add thermostat and short integration to let the system relax further
system.thermostat.set_langevin(kT=KT_REDUCED, gamma=1.0, seed=7)
system.integrator.run(steps=1000)

if USE_ELECTROSTATICS:
    COULOMB_PREFACTOR=BJERRUM_LENGTH_REDUCED * KT_REDUCED
    if USE_P3M:
        coulomb = espressomd.electrostatics.P3M(prefactor = COULOMB_PREFACTOR, 
                                                accuracy=1e-3)
    else:
        coulomb = espressomd.electrostatics.DH(prefactor = COULOMB_PREFACTOR, 
                                               kappa = KAPPA_REDUCED, 
                                               r_cut = 1. / KAPPA_REDUCED)
        
    system.actors.add(coulomb)
else:
    # this speeds up the simulation of dilute systems with small particle numbers
    system.cell_system.set_n_square()

exclusion_range = PARTICLE_SIZE_REDUCED if USE_WCA else 0.0
RE = espressomd.reaction_methods.ConstantpHEnsemble(
    kT=KT_REDUCED,
    exclusion_range=exclusion_range,
    seed=77,
    constant_pH=2  # temporary value
)
RE.set_non_interacting_type(type=len(TYPES)) # this parameter helps speed up the calculation in an interacting system

RE.add_reaction(
    gamma=10**(-pKa),
    reactant_types=[TYPES["HA"]],
    product_types=[TYPES["A"], TYPES["B"]],
    default_charges={TYPES["HA"]: CHARGES["HA"],
                     TYPES["A"]: CHARGES["A"],
                     TYPES["B"]: CHARGES["B"]}
)

def equilibrate_reaction(reaction_steps=1):
    RE.reaction(reaction_steps=reaction_steps)

def perform_sampling(type_A, num_samples, num_As:np.ndarray, reaction_steps, 
                     prob_integration=0.5, integration_steps=1000):
    for i in range(num_samples):
        if USE_WCA and np.random.random() < prob_integration:
            system.integrator.run(integration_steps)
        # we should do at least one reaction attempt per reactive particle
        RE.reaction(reaction_steps=reaction_steps)        
        num_As[i] = system.number_of_particles(type=type_A)

# empty numpy array as a placeholder for collecting the data
num_As_at_each_pH = -np.ones((len(pHs), NUM_SAMPLES))  # number of A- species observed at each sample

# run a productive simulation and collect the data
print(f"Simulated pH values: {pHs}")
for ipH, pH in enumerate(pHs):
    print(f"Run pH {pH:.2f} ...")
    RE.constant_pH = pH  # set new pH value
    start_time = time.time()
    equilibrate_reaction(reaction_steps=N_ACID + 1)  # pre-equilibrate the reaction to the new pH value
    perform_sampling(type_A=TYPES["A"],
                     num_samples=NUM_SAMPLES, 
                     num_As=num_As_at_each_pH[ipH, :],
                     reaction_steps=N_ACID + 1,
                     prob_integration=PROB_INTEGRATION)  # perform sampling / run production simulation
    runtime = (time.time() - start_time) * ureg.s
    print(f"runtime: {runtime:.2g};",
          f"measured number of A-: {np.mean(num_As_at_each_pH[ipH]):.2f}", 
          f"(ideal: {N_ACID * ideal_alpha(pH, pKa):.2f})", 
          )
print("\nDONE")

Simulated pH values: [3.88       4.16571429 4.45142857 4.73714286 5.02285714 5.30857143
 5.59428571 5.88       6.16571429 6.45142857 6.73714286 7.02285714
 7.30857143 7.59428571 7.88      ]
Run pH 3.88 ...
runtime: 2.3 second; measured number of A-: 1.61 (ideal: 1.82)
Run pH 4.17 ...
runtime: 2.6 second; measured number of A-: 3.26 (ideal: 3.24)
Run pH 4.45 ...
runtime: 2.5 second; measured number of A-: 5.21 (ideal: 5.43)
Run pH 4.74 ...
runtime: 2.6 second; measured number of A-: 8.35 (ideal: 8.37)
Run pH 5.02 ...
runtime: 3 second; measured number of A-: 11.65 (ideal: 11.63)
Run pH 5.31 ...
runtime: 2.8 second; measured number of A-: 14.67 (ideal: 14.57)
Run pH 5.59 ...
runtime: 2.9 second; measured number of A-: 16.84 (ideal: 16.76)
Run pH 5.88 ...
runtime: 2.9 second; measured number of A-: 18.14 (ideal: 18.18)
Run pH 6.17 ...
runtime: 2.7 second; measured number of A-: 18.94 (ideal: 19.02)
Run pH 6.45 ...
runtime: 2.6 second; measured number of A-: 19.42 (ideal: 19.48)
Run pH 6.74 ...
runtime: 2.6 second; measured number of A-: 19.78 (ideal: 19.73)
Run pH 7.02 ...
runtime: 2.7 second; measured number of A-: 19.86 (ideal: 19.86)
Run pH 7.31 ...
runtime: 2.5 second; measured number of A-: 19.93 (ideal: 19.93)
Run pH 7.59 ...
runtime: 2.2 second; measured number of A-: 19.96 (ideal: 19.96)
Run pH 7.88 ...
runtime: 2.2 second; measured number of A-: 19.98 (ideal: 19.98)

DONE

# statistical analysis of the results
def block_analyze(input_data, n_blocks=16):
    data = np.asarray(input_data)
    block = 0
    # this number of blocks is recommended by Janke as a reasonable compromise
    # between the conflicting requirements on block size and number of blocks
    block_size = int(data.shape[1] / n_blocks)
    print(f"block_size: {block_size}")
    # initialize the array of per-block averages
    block_average = np.zeros((n_blocks, data.shape[0]))
    # calculate averages per each block
    for block in range(n_blocks):
        block_average[block] = np.average(data[:, block * block_size: (block + 1) * block_size], axis=1)
    # calculate the average and average of the square
    av_data = np.average(data, axis=1)
    av2_data = np.average(data * data, axis=1)
    # calculate the variance of the block averages
    block_var = np.var(block_average, axis=0)
    # calculate standard error of the mean
    err_data = np.sqrt(block_var / (n_blocks - 1))
    # estimate autocorrelation time using the formula given by Janke
    # this assumes that the errors have been correctly estimated
    tau_data = np.zeros(av_data.shape)
    for val in range(av_data.shape[0]):
        if av_data[val] == 0:
            # unphysical value marks a failure to compute tau
            tau_data[val] = -1.0
        else:
            tau_data[val] = 0.5 * block_size * n_blocks / (n_blocks - 1) * block_var[val] \
                / (av2_data[val] - av_data[val] * av_data[val])
    return av_data, err_data, tau_data, block_size

# estimate the statistical error and the autocorrelation time using the formula given by Janke
av_num_As, err_num_As, tau, block_size = block_analyze(num_As_at_each_pH, N_BLOCKS)
print(f"av = {av_num_As}")
print(f"err = {err_num_As}")
print(f"tau = {tau}")

# calculate the average ionization degree
av_alpha = av_num_As / N_ACID
err_alpha = err_num_As / N_ACID

# plot the simulation results compared with the ideal titration curve
plt.figure(figsize=(10, 6), dpi=80)
plt.errorbar(pHs - pKa, av_alpha, err_alpha, marker='o', linestyle='none',
             label=r"simulation")
pHs2 = np.linspace(pHmin, pHmax, num=50)
plt.plot(pHs2 - pKa, ideal_alpha(pHs2, pKa), label=r"Henderson-Hasselbalch")
plt.xlabel(r'$\mathrm{pH} - \mathrm{p}K_{\mathrm{A}}$', fontsize=16)
plt.ylabel(r'$\alpha$', fontsize=16)
plt.legend(fontsize=16)
plt.show()

block_size: 10
av = [ 1.6125   3.2625   5.20625  8.35    11.65    14.66875 16.8375  18.1375
 18.9375  19.41875 19.78125 19.8625  19.925   19.9625  19.975  ]
err = [0.0625     0.10363196 0.09893969 0.17199806 0.09036961 0.14222012
 0.12209115 0.10522793 0.06884463 0.04762593 0.0367636  0.02868652
 0.01707825 0.01547848 0.01936492]
tau = [0.21369804 0.38294217 0.23722356 0.46841498 0.14390602 0.45625668
 0.47726846 0.4015393  0.38549113 0.34179482 0.51883786 0.45846935
 0.33633634 0.53102453 0.48484848]

# check if the blocks contain enough data for reliable error estimates
print(f"uncorrelated samples per block:\nblock_size/tau = {block_size / tau}")
threshold = 10  # block size should be much greater than the correlation time
if np.any(block_size / tau < threshold):
    print(f"\nWarning: some blocks may contain less than {threshold} uncorrelated samples."
          "\nYour error estimated may be unreliable."
          "\nPlease, check them using a more sophisticated method or run a longer simulation.")
    print(f"? block_size/tau > threshold ? : {block_size / tau > threshold}")
else:
    print(f"\nAll blocks seem to contain more than {threshold} uncorrelated samples."
          "Error estimates should be OK.")

uncorrelated samples per block:
block_size/tau = [46.795      26.1136033  42.15432961 21.34859155 69.48979592 21.91748745
 20.95256813 24.90416275 25.94093407 29.25731917 19.27384393 21.81170886
 29.73214286 18.83152174 20.625     ]

All blocks seem to contain more than 10 uncorrelated samples.Error estimates should be OK.

# plot the deviations from the ideal result
plt.figure(figsize=(10, 6), dpi=80)
ylim = np.amax(abs(av_alpha - ideal_alpha(pHs, pKa)))
plt.ylim((-1.5 * ylim, 1.5 * ylim))
plt.errorbar(pHs - pKa, av_alpha - ideal_alpha(pHs, pKa),
             err_alpha, marker='o', linestyle='none', label=r"simulation")
plt.plot(pHs - pKa, 0.0 * ideal_alpha(pHs, pKa), label=r"Henderson-Hasselbalch")
plt.xlabel(r'$\mathrm{pH} - \mathrm{p}K_{\mathrm{A}}$', fontsize=16)
plt.ylabel(r'$\alpha - \alpha_{ideal}$', fontsize=16)
plt.legend(fontsize=16)
plt.show()

# average concentration of B+ is the same as the concentration of A-
av_c_Bplus = av_alpha * C_ACID
err_c_Bplus = err_alpha * C_ACID  # error in the average concentration

full_pH_range = np.linspace(2, 12, 100)
ideal_c_Aminus = ideal_alpha(full_pH_range, pKa) * C_ACID
ideal_c_OH = np.power(10.0, -(pKw - full_pH_range))*ureg.molar
ideal_c_H = np.power(10.0, -full_pH_range)*ureg.molar
# ideal_c_M is calculated from electroneutrality
ideal_c_M = (ideal_c_Aminus + ideal_c_OH - ideal_c_H)

# plot the simulation results compared with the ideal results of the cations
plt.figure(figsize=(10, 6), dpi=80)
plt.errorbar(pHs,
             av_c_Bplus.magnitude,
             err_c_Bplus.magnitude,
             marker='o', c="tab:blue", linestyle='none',
             label=r"measured $c_{\mathrm{B^+}}$", zorder=2)
plt.plot(full_pH_range, 
         ideal_c_H.magnitude, 
         c="tab:green",
         label=r"ideal $c_{\mathrm{H^+}}$", 
         zorder=0)
plt.plot(full_pH_range[np.nonzero(ideal_c_M.magnitude > 0.)], 
         ideal_c_M.magnitude[np.nonzero(ideal_c_M.magnitude > 0.)], 
         c="tab:orange",
         label=r"ideal $c_{\mathrm{M^+}}$", 
         zorder=0)
plt.plot(full_pH_range, 
         ideal_c_Aminus.magnitude, 
         c="tab:blue", 
         ls=(0, (5, 5)),
         label=r"ideal $c_{\mathrm{A^-}}$", 
         zorder=1)
plt.yscale("log")
plt.ylim(5e-6,1e-2)
plt.xlabel('input pH', fontsize=16)
plt.ylabel(r'concentration $c$ $[\mathrm{mol/L}]$', fontsize=16)
plt.legend(fontsize=16)
plt.show()

ideal_c_X = -(ideal_c_Aminus + ideal_c_OH - ideal_c_H)

ideal_ionic_strength = 0.5 * \
    (ideal_c_X + ideal_c_M + ideal_c_H + ideal_c_OH + 2 * C_SALT)
# in constant-pH simulation ideal_c_Aminus = ideal_c_Bplus
cpH_ionic_strength = 0.5 * (ideal_c_Aminus + 2 * C_SALT)
cpH_ionic_strength_measured = 0.5 * (av_c_Bplus + 2 * C_SALT)
cpH_error_ionic_strength_measured = 0.5 * err_c_Bplus

plt.figure(figsize=(10, 6), dpi=80)
plt.errorbar(pHs,
             cpH_ionic_strength_measured.magnitude,
             cpH_error_ionic_strength_measured.magnitude,
             c="tab:blue",
             linestyle='none', marker='o',
             label=r"measured", zorder=3)
plt.plot(full_pH_range,
         cpH_ionic_strength.magnitude,
         c="tab:blue",
         ls=(0, (5, 5)),
         label=r"constant-pH", zorder=2)
plt.plot(full_pH_range,
         ideal_ionic_strength.magnitude,
         c="tab:orange",
         linestyle='-',
         label=r"Henderson-Hasselbalch", zorder=1)

plt.ylim(1.8e-3,3e-3)
plt.xlabel('input pH', fontsize=16)
plt.ylabel(r'Ionic Strength [$\mathrm{mol/L}$]', fontsize=16)
plt.legend(fontsize=16)
plt.show()

The constant-pH ensemble method for acid-base reactions¶

Expected prior knowledge¶

Introduction¶

The chemical equilibrium and reaction constant¶

The constant pH method¶

Simulation setup¶

Set the reduced units of energy and length¶

Set the key physical parameters that uniquely define the system¶

Set the range of parameters that we want to vary¶

Choose which interactions should be activated¶

Set the number of samples to be collected¶

Calculate the dependent parameters¶

Set the particle types and charges¶

Initialize the ESPResSo system¶

Set up particles and bonded-interactions¶

Set up non-bonded-interactions¶

Set up the constant pH ensemble using the reaction ensemble module¶

Run the simulations¶

Results¶

Statistical analysis of the data¶

The Neutralizing Ion $\mathrm{B^+}$¶

Suggested problems for further work¶

References¶