Optimization Tutorial

Trey V. Wenger (c) July 2025

Here we demonstrate how to optimize the number of cloud components in a HFSRatioModel model.

[1]:
# General imports
import time

import matplotlib.pyplot as plt
import arviz as az
import pandas as pd
import numpy as np
import pymc as pm

import pytensor
print("pytensor version:", pytensor.__version__)

print("pymc version:", pm.__version__)
print("arviz version:", az.__version__)

import bayes_spec
print("bayes_spec version:", bayes_spec.__version__)

import bayes_hfs
print("bayes_hfs version:", bayes_hfs.__version__)

# Notebook configuration
pd.options.display.max_rows = None
pytensor version: 2.30.3
pymc version: 5.22.0
arviz version: 0.22.0dev
bayes_spec version: 1.9.0
bayes_hfs version: 1+0.gc6f9e00.dirty

Preparing Molecule Data

[2]:
from bayes_hfs import get_molecule_data, supplement_molecule_data
import pickle

try:
    all_mol_data_12CN, all_mol_metadata_12CN = get_molecule_data("CN, v=0,1", fmin=100.0, fmax=200.0)
    with open("mol_data_12CN.pkl", "wb") as f:
        pickle.dump(all_mol_data_12CN, f)
    with open("mol_metadata_12CN.pkl", "wb") as f:
        pickle.dump(all_mol_metadata_12CN, f)
except:
    with open("mol_data_12CN.pkl", "rb") as f:
        all_mol_data_12CN = pickle.load(f)
    with open("mol_metadata_12CN.pkl", "rb") as f:
        all_mol_metadata_12CN = pickle.load(f)

# Keep only Kl = 0 transitions
all_mol_data_12CN = all_mol_data_12CN[all_mol_data_12CN["Kl"] == 0]

# Add GLO
all_mol_data_12CN["GLO"] = 2 * all_mol_data_12CN["F1l"]

try:
    all_mol_data_13CN, all_mol_metadata_13CN = get_molecule_data("C-13-N", fmin=100.0, fmax=200.0)
    with open("mol_data_13CN.pkl", "wb") as f:
        pickle.dump(all_mol_data_13CN, f)
    with open("mol_metadata_13CN.pkl", "wb") as f:
        pickle.dump(all_mol_metadata_13CN, f)
except:
    with open("mol_data_13CN.pkl", "rb") as f:
        all_mol_data_13CN = pickle.load(f)
    with open("mol_metadata_13CN.pkl", "rb") as f:
        all_mol_metadata_13CN = pickle.load(f)

# Add GLO
all_mol_data_13CN["GLO"] = 2 * all_mol_data_13CN["F1l"] + 1

mol_data_12CN = supplement_molecule_data(all_mol_data_12CN, all_mol_metadata_12CN)
mol_data_13CN = supplement_molecule_data(all_mol_data_13CN, all_mol_metadata_13CN)

Model Definition and Simulated Data

[3]:
from bayes_spec import SpecData
from bayes_hfs import HFSRatioModel

# spectral axis definition
freq_axis_12CN_1 = np.arange(113110.0, 113200.0, 0.2) # MHz
freq_axis_12CN_2 = np.arange(113470.0, 113530.0, 0.2) # MHz
freq_axis_13CN_1 = np.arange(108620.0, 108670.0, 0.2) # MHz
freq_axis_13CN_2 = np.arange(108770.0, 108810.0, 0.2) # MHz

# data noise can either be a scalar (assumed constant noise across the spectrum)
# or an array of the same length as the data
noise = 0.001 # K

# brightness data. In this case, we just throw in some random data for now
# since we are only doing this in order to simulate some actual data.
brightness_data_12CN_1 = noise * np.random.randn(len(freq_axis_12CN_1)) # K
brightness_data_12CN_2 = noise * np.random.randn(len(freq_axis_12CN_2)) # K
brightness_data_13CN_1 = noise * np.random.randn(len(freq_axis_13CN_1)) # K
brightness_data_13CN_2 = noise * np.random.randn(len(freq_axis_13CN_2)) # K

# CNRatioModel expects observation names to contain either "12CN" or "13CN"
observation_12CN_1 = SpecData(
    freq_axis_12CN_1,
    brightness_data_12CN_1,
    noise,
    xlabel=r"LSRK Frequency (MHz)",
    ylabel=r"CN $T_B$ (K)",
)
observation_12CN_2 = SpecData(
    freq_axis_12CN_2,
    brightness_data_12CN_2,
    noise,
    xlabel=r"LSRK Frequency (MHz)",
    ylabel=r"CN $T_B$ (K)",
)
observation_13CN_1 = SpecData(
    freq_axis_13CN_1,
    brightness_data_13CN_1,
    noise,
    xlabel=r"LSRK Frequency (MHz)",
    ylabel=r"$^{13}$CN $T_B$ (K)",
)
observation_13CN_2 = SpecData(
    freq_axis_13CN_2,
    brightness_data_13CN_2,
    noise,
    xlabel=r"LSRK Frequency (MHz)",
    ylabel=r"$^{13}$CN $T_B$ (K)",
)
dummy_data = {
    "12CN-1": observation_12CN_1,
    "12CN-2": observation_12CN_2,
    "13CN-1": observation_13CN_1,
    "13CN-2": observation_13CN_2,
}

# association each dataset with the related species
mol_keys = {
    "12CN": ["12CN-1", "12CN-2"],
    "13CN": ["13CN-1", "13CN-2"],
}
[4]:
from bayes_hfs import HFSRatioModel
from bayes_hfs import physics

# Initialize and define the model
n_clouds = 3 # number of cloud components
baseline_degree = 0 # polynomial baseline degree
model = HFSRatioModel(
    mol_data_12CN, # molecular data for species 1
    mol_data_13CN, # molecular data for species 2
    mol_keys, # dataset association
    dummy_data,
    bg_temp = 2.7, # assumed background temperature (K)
    Beff = 1.0, # beam efficiency
    Feff = 1.0, # forward efficiency
    n_clouds=n_clouds,
    baseline_degree=baseline_degree,
    seed=1234,
    verbose=True
)
model.add_priors(
    prior_log10_Ntot1 = [13.5, 0.5], # mean and width of log10 total column density prior of first species (cm-2)
    prior_ratio = 0.1, # width of the column density ratio between the second and first species
    prior_fwhm2 = 1.0, # width of FWHM^2 prior (km2 s-2)
    prior_velocity = [-3.0, 3.0], # upper and lower limit of velocity prior (km/s)
    prior_log10_Tex_CTEX = [0.75, 0.25], # mean and width of log10 CTEX excitation temperature prior (K)
    assume_CTEX1 = False, # do not assume CTEX for the first species
    assume_CTEX2 = False, # assume CTEX for the second species for speed
    prior_log10_CTEX_variance = [-4.0, 1.0], # offset and width of log10 CTEX variance prior
    clip_weights = 1.0e-9, # clip statistical weights between [clip_weights, 1-clip_weights]
    clip_tau = -10.0, # clip optical depths below to prevent masers
    prior_fwhm_L = None, # assume Gaussian line profile
    prior_baseline_coeffs = None, # use default baseline priors
)
model.add_likelihood()

sim_params = {
    "log10_Ntot_12CN": np.array([13.8, 13.9, 14.0]),
    "ratio": np.array([1.0/65.0, 1.0/60.0, 1.0/55.0]),
    "fwhm2": np.array([1.0, 1.25, 1.5])**2.0,
    "velocity": [-2.0, 0.0, 2.5],
    "log10_Tex_CTEX": np.log10([4.46, 3.98, 3.16]),
    "log10_CTEX_variance": np.array([-1.5, -2.0, -3.0]),
    "baseline_12CN-1_norm": [0.0],
    "baseline_12CN-2_norm": [0.0],
    "baseline_13CN-1_norm": [0.0],
    "baseline_13CN-2_norm": [0.0],
}

CTEX_weights_12CN = physics.calc_stat_weight(
    model.mol1_data["states"]["deg"][None, :],
    model.mol1_data["states"]["E"][None, :],
    10.0 ** sim_params["log10_Tex_CTEX"][:, None],
).eval()

CTEX_concentration_12CN = (
    len(model.mol1_data["states"]["state"])
    * CTEX_weights_12CN
    / 10.0**sim_params["log10_CTEX_variance"][:, None]
)

from scipy.stats import dirichlet

sim_params["weights_12CN"] = np.stack([
    dirichlet(CTEX_concentration_12CN[i]).rvs()[0] for i in range(n_clouds)
])

# add derived quantities to sim_params
for key in model.cloud_deterministics:
    if key not in sim_params.keys():
        sim_params[key] = model.model[key].eval(sim_params, on_unused_input="ignore")

# Evaluate and save simulated observations
sim_12CN_1 = model.model["12CN-1"].eval(sim_params, on_unused_input="ignore")
sim_12CN_2 = model.model["12CN-2"].eval(sim_params, on_unused_input="ignore")
sim_13CN_1 = model.model["13CN-1"].eval(sim_params, on_unused_input="ignore")
sim_13CN_2 = model.model["13CN-2"].eval(sim_params, on_unused_input="ignore")

# pack simulated data
observation_12CN_1 = SpecData(
    freq_axis_12CN_1,
    sim_12CN_1,
    noise,
    xlabel=r"LSRK Frequency (MHz)",
    ylabel=r"CN $T_B$ (K)",
)
observation_12CN_2 = SpecData(
    freq_axis_12CN_2,
    sim_12CN_2,
    noise,
    xlabel=r"LSRK Frequency (MHz)",
    ylabel=r"CN $T_B$ (K)",
)
observation_13CN_1 = SpecData(
    freq_axis_13CN_1,
    sim_13CN_1,
    noise,
    xlabel=r"LSRK Frequency (MHz)",
    ylabel=r"$^{13}$CN $T_B$ (K)",
)
observation_13CN_2 = SpecData(
    freq_axis_13CN_2,
    sim_13CN_2,
    noise,
    xlabel=r"LSRK Frequency (MHz)",
    ylabel=r"$^{13}$CN $T_B$ (K)",
)
data = {
    "12CN-1": observation_12CN_1,
    "12CN-2": observation_12CN_2,
    "13CN-1": observation_13CN_1,
    "13CN-2": observation_13CN_2,
}
[5]:
sim_params
[5]:
{'log10_Ntot_12CN': array([13.8, 13.9, 14. ]),
 'ratio': array([0.01538462, 0.01666667, 0.01818182]),
 'fwhm2': array([1.    , 1.5625, 2.25  ]),
 'velocity': [-2.0, 0.0, 2.5],
 'log10_Tex_CTEX': array([0.64933486, 0.59988307, 0.49968708]),
 'log10_CTEX_variance': array([-1.5, -2. , -3. ]),
 'baseline_12CN-1_norm': [0.0],
 'baseline_12CN-2_norm': [0.0],
 'baseline_13CN-1_norm': [0.0],
 'baseline_13CN-2_norm': [0.0],
 'weights_12CN': array([[0.18099997, 0.35774445, 0.05141127, 0.10060146, 0.04239329,
         0.11086714, 0.15598241],
        [0.18391739, 0.37538564, 0.05147776, 0.0953406 , 0.04756006,
         0.09716964, 0.14914891],
        [0.22015952, 0.43069068, 0.03831167, 0.07664031, 0.03802426,
         0.07752001, 0.11865354]]),
 'log10_Ntot_13CN': array([11.98708664, 12.12184875, 12.25963731]),
 'CTEX_weights_12CN': array([[1.99954842, 4.        , 0.59193533, 1.18327051, 0.58954346,
         1.17923313, 1.76919492],
        [1.99949396, 4.        , 0.51109864, 1.02161662, 0.5087849 ,
         1.01771121, 1.52690069],
        [1.99936267, 4.        , 0.35871387, 0.71691448, 0.35666979,
         0.71346443, 1.07049146]]),
 'Tex_12CN': array([[6.62377784, 4.84728125, 3.18024483],
        [7.29549498, 5.1227256 , 3.24392405],
        [3.48460532, 4.06018147, 3.16334071],
        [3.66223901, 4.2517178 , 3.22631454],
        [3.26304903, 3.72315005, 2.96168302],
        [3.77468031, 4.02382954, 3.04074123],
        [2.70675076, 4.08302572, 3.14052123],
        [3.41789579, 3.88317284, 3.01669258],
        [2.81252526, 4.2761819 , 3.20240169]]),
 'tau_12CN': array([[0.02524333, 0.0360111 , 0.06420367],
        [0.17973349, 0.2691383 , 0.50431054],
        [0.28473286, 0.31504016, 0.51418762],
        [0.33573084, 0.37674563, 0.64106031],
        [0.38104257, 0.42733998, 0.68628587],
        [0.88301926, 1.03135861, 1.74637405],
        [0.32083078, 0.32311411, 0.53005213],
        [0.26725006, 0.30419385, 0.50807069],
        [0.03592579, 0.03636557, 0.06221058]]),
 'tau_total_12CN': array([2.71350897, 3.11930731, 5.25675546]),
 'TR_12CN': array([[4.27597848, 2.62918409, 1.20296471],
        [4.91419543, 2.87859882, 1.25318148],
        [1.44740238, 1.93265667, 1.18910571],
        [1.59417095, 2.09880298, 1.23863245],
        [1.26436858, 1.64130685, 1.0295351 ],
        [1.68461726, 1.89689066, 1.09001642],
        [0.8404223 , 1.94780861, 1.16743383],
        [1.38876463, 1.77628294, 1.07132523],
        [0.9174165 , 2.11549053, 1.21586649]]),
 'CTEX_weights_13CN': array([[3.        , 0.99376119, 2.9815721 , 4.9699281 , 0.92851903,
         0.31070072, 0.93202191, 1.55316693, 0.30874373, 0.92620269,
         1.54373121, 0.92472116, 1.54142752, 2.15832747],
        [3.        , 0.9930114 , 2.9793573 , 4.96631358, 0.80605378,
         0.26984676, 0.80946217, 1.34890613, 0.26794283, 0.80380078,
         1.33972638, 0.80236011, 1.3374862 , 1.87280054],
        [3.        , 0.9912059 , 2.9740239 , 4.95760931, 0.57313394,
         0.192086  , 0.57618799, 0.96013626, 0.19038061, 0.57111701,
         0.95191395, 0.56982806, 0.94990964, 1.33015962]]),
 'Tex_13CN': array([[3.63387014, 4.0123908 , 3.08464866],
        [3.83675886, 4.4342716 , 3.08229104],
        [3.84009247, 3.76818165, 3.2042096 ],
        [3.74209719, 4.19126825, 3.15773939],
        [4.66544699, 3.82749742, 3.11567771],
        [4.52148561, 4.26470704, 3.07173897],
        [5.00127202, 4.48112345, 3.08933137],
        [5.39889112, 4.17955656, 3.20886035],
        [5.20750875, 4.7045325 , 3.16240343],
        [4.0212438 , 5.15561858, 3.02750414],
        [4.02487718, 4.28049286, 3.14440333],
        [3.89187731, 3.60468882, 2.9993573 ],
        [4.17098129, 4.06799019, 3.15767616],
        [4.19325594, 3.19195589, 3.24091144],
        [4.12801426, 3.40738764, 3.11165721],
        [4.93507743, 4.35670617, 3.05954754],
        [4.05618042, 4.56249   , 3.11278043],
        [4.01553487, 3.74768332, 3.06805292],
        [3.95529061, 4.28992818, 3.1329999 ],
        [4.40618018, 3.92832745, 3.12736718],
        [4.18768408, 4.81107389, 3.18901639],
        [4.46192129, 4.46628177, 3.31607163],
        [4.2783927 , 4.38690557, 3.08338147],
        [4.33093502, 5.06851471, 3.26663988],
        [5.75698581, 4.81377011, 3.1490468 ],
        [4.38657499, 4.66567441, 3.10011369],
        [4.41108682, 3.55311919, 3.17987158],
        [4.33931568, 3.82058713, 3.05597262]]),
 'tau_13CN': array([[6.91016104e-05, 8.62078555e-05, 1.62414632e-04],
        [8.13865602e-05, 1.16170996e-04, 1.98952714e-04],
        [3.17002408e-05, 4.90748819e-05, 7.63535671e-05],
        [1.47575919e-04, 1.80064378e-04, 3.49181577e-04],
        [7.71761587e-05, 1.30964896e-04, 2.07535796e-04],
        [2.17990215e-04, 2.89708089e-04, 5.72893422e-04],
        [2.47769861e-04, 3.43972966e-04, 6.84887892e-04],
        [7.31425101e-04, 1.29197271e-03, 2.11177097e-03],
        [1.54527530e-03, 2.10941210e-03, 4.34270099e-03],
        [8.23550053e-04, 9.98210130e-04, 2.27788743e-03],
        [2.47848809e-03, 3.32640756e-03, 6.75966969e-03],
        [1.07463295e-03, 1.45504622e-03, 2.63385994e-03],
        [1.13999026e-03, 1.77112462e-03, 2.87381935e-03],
        [8.52386088e-04, 1.47913347e-03, 2.13240347e-03],
        [7.37813449e-04, 1.23676746e-03, 1.86095314e-03],
        [3.77625764e-03, 5.59115358e-03, 1.15866992e-02],
        [3.38711307e-03, 4.11006613e-03, 8.37106802e-03],
        [9.41168554e-04, 1.25350083e-03, 2.32747602e-03],
        [6.96307786e-03, 8.62039131e-03, 1.69549542e-02],
        [3.35612901e-03, 5.45424590e-03, 8.73289194e-03],
        [1.64511631e-03, 1.99815187e-03, 4.08158818e-03],
        [1.15227317e-03, 1.76718272e-03, 2.94828359e-03],
        [1.25328535e-03, 1.59472409e-03, 3.19246242e-03],
        [8.79240704e-05, 1.04039577e-04, 2.20310632e-04],
        [1.16316006e-05, 1.78720936e-05, 3.86220767e-05],
        [2.82315246e-04, 3.78176622e-04, 8.07986128e-04],
        [7.29919449e-05, 1.12072231e-04, 2.07457051e-04],
        [1.98775501e-04, 2.92855454e-04, 5.68804114e-04]]),
 'tau_total_13CN': array([0.03338432, 0.04615867, 0.08728389]),
 'TR_13CN': array([[1.63769168, 1.96304246, 1.18617309],
        [1.8108763 , 2.33556625, 1.184289  ],
        [1.8136662 , 1.75193053, 1.28186856],
        [1.72943434, 2.11957112, 1.24428871],
        [2.54293543, 1.80263014, 1.21062722],
        [2.41313236, 2.18424104, 1.17551796],
        [2.84302083, 2.37219693, 1.18580068],
        [3.20891541, 2.10436621, 1.28139781],
        [3.03194919, 2.5728406 , 1.24385719],
        [1.96259901, 2.98090426, 1.13437175],
        [1.96568165, 2.19018492, 1.22695166],
        [1.85033635, 1.60534487, 1.11222735],
        [2.09347408, 2.00319904, 1.23749058],
        [2.11306356, 1.26499364, 1.30450211],
        [2.0557196 , 1.44064407, 1.20074399],
        [2.77867001, 2.25756471, 1.15937469],
        [1.99268517, 2.44131639, 1.20148997],
        [1.9572337 , 1.72631391, 1.16600468],
        [1.90319648, 2.19642801, 1.21615459],
        [2.29963139, 1.87982136, 1.21163864],
        [2.10608725, 2.66372918, 1.26092144],
        [2.3491845 , 2.35308013, 1.3637902 ],
        [2.18598191, 2.28226144, 1.17653557],
        [2.23236108, 2.89813924, 1.32336919],
        [3.53239079, 2.66305005, 1.2265218 ],
        [2.2756973 , 2.52556554, 1.1848887 ],
        [2.29749236, 1.55445369, 1.24843537],
        [2.23371459, 1.78103118, 1.15001426]])}

Optimize

We use the Optimize class for optimization.

[6]:
from bayes_spec import Optimize
from bayes_hfs import HFSRatioModel

# Initialize optimizer
opt = Optimize(
    HFSRatioModel,
    mol_data_12CN, # molecular data for species 1
    mol_data_13CN, # molecular data for species 2
    mol_keys, # dataset association
    data,
    bg_temp = 2.7, # assumed background temperature (K)
    Beff = 1.0, # beam efficiency
    Feff = 1.0, # forward efficiency
    max_n_clouds=5,
    baseline_degree=baseline_degree,
    seed=1234,
    verbose=True
)

# Define each model
opt.add_priors(
    prior_log10_Ntot1 = [13.5, 0.5], # mean and width of log10 total column density prior of first species (cm-2)
    prior_ratio = 0.1, # width of the column density ratio between the second and first species
    prior_fwhm2 = 1.0, # width of FWHM^2 prior (km2 s-2)
    prior_velocity = [-3.0, 3.0], # upper and lower limit of velocity prior (km/s)
    prior_log10_Tex_CTEX = [0.75, 0.25], # mean and width of log10 CTEX excitation temperature prior (K)
    assume_CTEX1 = False, # do not assume CTEX for the first species
    assume_CTEX2 = True, # assume CTEX for the second species for speed
    prior_log10_CTEX_variance = [-4.0, 1.0], # offset and width of log10 CTEX variance prior
    clip_weights = 1.0e-9, # clip statistical weights between [clip_weights, 1-clip_weights]
    clip_tau = -10.0, # clip optical depths below to prevent masers
    prior_fwhm_L = None, # assume Gaussian line profile
    prior_baseline_coeffs = None, # use default baseline priors
)
opt.add_likelihood()

Optimize has created max_n_clouds models, where opt.models[1] has n_clouds=1, opt.models[2] has n_clouds=2, etc.

[7]:
print(opt.models[4])
print(opt.models[4].n_clouds)
<bayes_hfs.hfs_ratio_model.HFSRatioModel object at 0x7bc38d9b2650>
4

By default (approx=True), the optimization algorithm first loops over every model and approximates the posterior distribution using variational inference. This is generally a bad idea, since VI is only an approximation and tends to struggle with complex models. Instead we use approx=False to sample every model with MCMC. This is slower but more robust.

We can supply arguments to fit and sample via dictionaries. Whichever model is the first to have a BIC within bic_threshold of the minimum BIC is the “best” model. The algorithm will terminate early if successive models have increasing BICs or fail to converge.

[8]:
fit_kwargs = {
    "rel_tolerance": 0.01,
    "abs_tolerance": 0.01,
    "learning_rate": 0.001,
}
sample_kwargs = {
    "chains": 8,
    "cores": 8,
    "n_init": 200_000,
    "init_kwargs": fit_kwargs,
    "nuts_kwargs": {"target_accept": 0.9},
}
solve_kwargs = {
    "init_params": "random_from_data",
    "n_init": 10,
    "max_iter": 1_000,
    "kl_div_threshold": 0.1,
}
opt.optimize(
    bic_threshold=10.0,
    sample_kwargs=sample_kwargs,
    fit_kwargs=fit_kwargs,
    solve_kwargs=solve_kwargs,
    approx=False,
    start_spread = {"velocity_norm": [0.1, 0.9]},
)
Null hypothesis BIC = 1.132e+07
Sampling n_cloud = 1 posterior...
Initializing NUTS using custom advi+adapt_diag strategy
Convergence achieved at 41000
Interrupted at 40,999 [20%]: Average Loss = 1.8552e+17
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [baseline_12CN-1_norm, baseline_12CN-2_norm, baseline_13CN-1_norm, baseline_13CN-2_norm, log10_Ntot_12CN_norm, ratio_norm, fwhm2_norm, velocity_norm, log10_Tex_CTEX_norm, log10_CTEX_variance_norm, weights_12CN_norm]
Sampling 8 chains for 1_000 tune and 1_000 draw iterations (8_000 + 8_000 draws total) took 1162 seconds.
Adding log-likelihood to trace
GMM converged to unique solution
n_cloud = 1 solution = 0 BIC = 2.335e+06

Sampling n_cloud = 2 posterior...
Initializing NUTS using custom advi+adapt_diag strategy
Convergence achieved at 45400
Interrupted at 45,399 [22%]: Average Loss = 7.3459e+19
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [baseline_12CN-1_norm, baseline_12CN-2_norm, baseline_13CN-1_norm, baseline_13CN-2_norm, log10_Ntot_12CN_norm, ratio_norm, fwhm2_norm, velocity_norm, log10_Tex_CTEX_norm, log10_CTEX_variance_norm, weights_12CN_norm]
Sampling 8 chains for 1_000 tune and 1_000 draw iterations (8_000 + 8_000 draws total) took 5737 seconds.
Adding log-likelihood to trace
GMM converged to unique solution
n_cloud = 2 solution = 0 BIC = 6.866e+05

Sampling n_cloud = 3 posterior...
Initializing NUTS using custom advi+adapt_diag strategy
Convergence achieved at 56200
Interrupted at 56,199 [28%]: Average Loss = 4.8623e+28
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [baseline_12CN-1_norm, baseline_12CN-2_norm, baseline_13CN-1_norm, baseline_13CN-2_norm, log10_Ntot_12CN_norm, ratio_norm, fwhm2_norm, velocity_norm, log10_Tex_CTEX_norm, log10_CTEX_variance_norm, weights_12CN_norm]
Sampling 8 chains for 1_000 tune and 1_000 draw iterations (8_000 + 8_000 draws total) took 1155 seconds.
Adding log-likelihood to trace
There were 990 divergences in converged chains.
GMM converged to unique solution
6 of 8 chains appear converged.
n_cloud = 3 solution = 0 BIC = -1.277e+04

Sampling n_cloud = 4 posterior...
Initializing NUTS using custom advi+adapt_diag strategy
Convergence achieved at 65600
Interrupted at 65,599 [32%]: Average Loss = 7.3701e+21
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [baseline_12CN-1_norm, baseline_12CN-2_norm, baseline_13CN-1_norm, baseline_13CN-2_norm, log10_Ntot_12CN_norm, ratio_norm, fwhm2_norm, velocity_norm, log10_Tex_CTEX_norm, log10_CTEX_variance_norm, weights_12CN_norm]
Sampling 8 chains for 1_000 tune and 1_000 draw iterations (8_000 + 8_000 draws total) took 5104 seconds.
Adding log-likelihood to trace
There were 330 divergences in converged chains.
GMM found 3 unique solutions
Solution 0: chains [np.int64(0), np.int64(1)]
Solution 1: chains [np.int64(3), np.int64(6), np.int64(7)]
Solution 2: chains [np.int64(4), np.int64(5)]
7 of 8 chains appear converged.
n_cloud = 4 solution = 0 BIC = -1.267e+04
n_cloud = 4 solution = 1 BIC = -1.267e+04
n_cloud = 4 solution = 2 BIC = -1.267e+04

Stopping criteria met.
Sampling n_cloud = 5 posterior...
Initializing NUTS using custom advi+adapt_diag strategy
Convergence achieved at 76300
Interrupted at 76,299 [38%]: Average Loss = 7.9475e+24
Multiprocess sampling (8 chains in 8 jobs)
NUTS: [baseline_12CN-1_norm, baseline_12CN-2_norm, baseline_13CN-1_norm, baseline_13CN-2_norm, log10_Ntot_12CN_norm, ratio_norm, fwhm2_norm, velocity_norm, log10_Tex_CTEX_norm, log10_CTEX_variance_norm, weights_12CN_norm]
Sampling 8 chains for 1_000 tune and 1_000 draw iterations (8_000 + 8_000 draws total) took 6427 seconds.
Adding log-likelihood to trace
There were 271 divergences in converged chains.
No solution found!
0 of 8 chains appear converged.

Stopping criteria met.
Stopping early.
[9]:
opt.best_model.n_clouds
[9]:
3
[12]:
from bayes_spec.plots import plot_predictive

posterior = opt.best_model.sample_posterior_predictive(
    thin=100, # keep one in {thin} posterior samples
)
axes = plot_predictive(opt.best_model.data, posterior.posterior_predictive)
Sampling: [12CN-1, 12CN-2, 13CN-1, 13CN-2]
../_images/notebooks_optimization_15_3.png
[13]:
plt.plot(opt.bics.keys(), opt.bics.values(), 'ko')
plt.xlabel("Number of Components")
plt.ylabel("BIC")
[13]:
Text(0, 0.5, 'BIC')
../_images/notebooks_optimization_16_1.png
[ ]: