HMM

Hidden Markov Models in Pyro

Pyro

Python

HMM

Hidden Markov Model with Poisson emissions

Authors

Tommaso Piscitelli

Erik De Luca

Published

May 24, 2025

Load dataset

Code

# !pip install pyprojroot

Collecting pyprojroot
  Downloading pyprojroot-0.3.0-py3-none-any.whl.metadata (4.8 kB)
Requirement already satisfied: typing-extensions in c:\users\erik4\appdata\local\programs\python\python313\lib\site-packages (from pyprojroot) (4.13.0)
Downloading pyprojroot-0.3.0-py3-none-any.whl (7.6 kB)
Installing collected packages: pyprojroot
Successfully installed pyprojroot-0.3.0
d:\GitHub\Blood-Donors\data

  WARNING: Retrying (Retry(total=4, connect=None, read=None, redirect=None, status=None)) after connection broken by 'ProtocolError('Connection aborted.', ConnectionResetError(10054, "Connessione in corso interrotta forzatamente dall'host remoto", None, 10054, None))': /packages/53/9b/eef01392be945c0fe86a8d084ba9188b1e2b22af037d7109b9f40a962cd0/pyprojroot-0.3.0-py3-none-any.whl.metadata

Code

import pandas as pd
import numpy as np
from hmmlearn import hmm
import matplotlib.pyplot as plt
from pyprojroot import here

df = pd.read_csv(here("data/recent_donations.csv"))
df

	unique_number	class_year	birth_year	first_donation_year	gender	y_2009	y_2010	y_2011	y_2012	y_2013	y_2014	y_2015	y_2016	y_2018	y_2019	y_2017	y_2020	y_2021	y_2022	y_2023
0	26308560	(1960,1970]	1965	1985	M	0	0	0	0	0	0	0	0	0	0	0	1	1	3	1
1	26309283	(1960,1970]	1966	2002	M	2	1	2	2	1	1	3	3	4	1	3	3	3	3	4
2	26317365	(1960,1970]	1961	1984	M	4	2	3	3	3	4	3	3	2	3	3	2	0	1	0
3	26318451	(1960,1970]	1967	1989	M	0	3	3	4	4	4	2	3	3	1	2	3	1	0	0
4	26319465	(1960,1970]	1964	1994	F	1	2	2	1	2	1	1	0	0	2	1	1	1	1	1
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
9231	27220599	(1970,1980]	1980	2022	M	0	0	0	0	0	0	0	0	0	0	0	0	0	2	0
9232	27220806	(2000,2010]	2002	2022	M	0	0	0	0	0	0	0	0	0	0	0	0	0	2	3
9233	27221247	(1990,2000]	2000	2022	F	0	0	0	0	0	0	0	0	0	0	0	0	0	2	0
9234	27221274	(1960,1970]	1966	2022	F	0	0	0	0	0	0	0	0	0	0	0	0	0	2	3
9235	27221775	(2000,2010]	2004	2022	M	0	0	0	0	0	0	0	0	0	0	0	0	0	2	1

9236 rows × 20 columns

Code

df["gender"] = df["gender"].map({"M": 0, "F": 1})

from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
df[["birth_year", "first_donation_year"]] = scaler.fit_transform(df[["birth_year", "first_donation_year"]])

year_cols = sorted([col for col in df.columns if col.startswith("y_")])
donation_data = df[year_cols].values  # matrice [n_donatori, T]

features = df[["gender", "birth_year", "first_donation_year"]].values  # shape: [n_donatori, n_features]

Let’s start with a simple Hidden Markov Model. In this case we assume that the emmissions are continuous and not discrete

Code

from hmmlearn import hmm

# Estrai le colonne delle osservazioni
y_cols = [col for col in df.columns if col.startswith("y_")]
sequences = df[y_cols].to_numpy()

# Prepara i dati per hmmlearn
X = sequences.reshape(-1, 1)  # hmmlearn richiede forma (n_samples, n_features)
lengths = [len(y_cols)] * sequences.shape[0]  # una sequenza per ogni individuo

# Costruisci l'HMM (es: GaussianHMM, Poisson non è supportato direttamente)
model = hmm.GaussianHMM(n_components=3, covariance_type="diag", n_iter=100, random_state=42)
model.fit(X, lengths)

GaussianHMM(n_components=3, n_iter=100, random_state=42)

In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.

Code

states = model.predict(X)  # X = osservazioni (n_donors * T, 1)

plt.figure(figsize=(15, 4))
plt.plot(X, label="Donazioni")
plt.plot(states, label="Stato latente", alpha=0.7)
plt.legend()
plt.title("Donazioni e stati latenti (HMM Gaussian)")
plt.show()

Code

import seaborn as sns

for i in range(model.n_components):
    mu = model.means_[i][0]
    sigma = np.sqrt(model.covars_[i][0])
    sns.kdeplot(np.random.normal(mu, sigma, 1000), label=f"Stato {i}")

plt.title("Distribuzioni delle emissioni")
plt.legend()
plt.show()

Pyro

Code

# Estrai la matrice y: righe = individui, colonne = anni
donation_cols = [col for col in df.columns if col.startswith("y_")]
Y = df[donation_cols].fillna(0).astype(int).to_numpy()  # [n_donors, T]
Y_tensor = torch.tensor(Y, dtype=torch.float32)  # Pyro lavora con torch

Code

import pyro
import pyro.distributions as dist
from pyro.nn import PyroParam
from pyro.infer import SVI, TraceEnum_ELBO, config_enumerate
from pyro.optim import Adam
from torch.distributions import constraints
from pyro import poutine
import torch

class SimpleDiscreteHMM:
    def __init__(self, num_states=3):
        self.num_states = num_states
        self.lambdas = PyroParam(torch.ones(num_states), constraint=constraints.positive)
        self.start_probs = PyroParam(torch.ones(num_states) / num_states, constraint=constraints.simplex)
        self.trans_probs = PyroParam(torch.ones(num_states, num_states) / num_states, constraint=constraints.simplex)

    @config_enumerate
    def model(self, Y):
        n_donors, T = Y.shape
        num_states = self.num_states

        with pyro.plate("donors", n_donors, dim=-2):
            z_prev = pyro.sample("z_0", dist.Categorical(pyro.param("start_probs")))

            for t in range(T):
                z_t = pyro.sample(f"z_{t}", dist.Categorical(pyro.param("trans_probs")[z_prev]))
                pyro.sample(f"y_{t}", dist.Poisson(self.lambdas[z_t]), obs=Y[:, t])
                z_prev = z_t

from pyro.nn import PyroModule

class Guide(PyroModule):
    def __init__(self, num_states):
        super().__init__()
        self.num_states = num_states

    @config_enumerate
    def forward(self, Y):
        pass  # nessun parametro latente globale da approssimare qui

Code

model = SimpleDiscreteHMM(num_states=3)
guide = Guide(num_states=3)

optimizer = Adam({"lr": 0.01})
svi = SVI(model.model, guide, optimizer, loss=TraceEnum_ELBO())

for step in range(500):
    loss = svi.step(Y_tensor)
    if step % 50 == 0:
        print(f"Step {step}: loss = {loss:.2f}")

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
Cell In[62], line 8
      5 svi = SVI(model.model, guide, optimizer, loss=TraceEnum_ELBO())
      7 for step in range(500):
----> 8     loss = svi.step(Y_tensor)
      9     if step % 50 == 0:
     10         print(f"Step {step}: loss = {loss:.2f}")

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\infer\svi.py:145, in SVI.step(self, *args, **kwargs)
    143 # get loss and compute gradients
    144 with poutine.trace(param_only=True) as param_capture:
--> 145     loss = self.loss_and_grads(self.model, self.guide, *args, **kwargs)
    147 params = set(
    148     site["value"].unconstrained() for site in param_capture.trace.nodes.values()
    149 )
    151 # actually perform gradient steps
    152 # torch.optim objects gets instantiated for any params that haven't been seen yet

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\infer\traceenum_elbo.py:451, in TraceEnum_ELBO.loss_and_grads(self, model, guide, *args, **kwargs)
    443 """
    444 :returns: an estimate of the ELBO
    445 :rtype: float
   (...)    448 Performs backward on the ELBO of each particle.
    449 """
    450 elbo = 0.0
--> 451 for model_trace, guide_trace in self._get_traces(model, guide, args, kwargs):
    452     elbo_particle = _compute_dice_elbo(model_trace, guide_trace)
    453     if is_identically_zero(elbo_particle):

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\infer\traceenum_elbo.py:374, in TraceEnum_ELBO._get_traces(self, model, guide, args, kwargs)
    372     raise NotImplementedError("TraceEnum_ELBO does not support GuideMessenger")
    373 if self.max_plate_nesting == float("inf"):
--> 374     self._guess_max_plate_nesting(model, guide, args, kwargs)
    375 if self.vectorize_particles:
    376     guide = self._vectorized_num_particles(guide)

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\infer\elbo.py:153, in ELBO._guess_max_plate_nesting(self, model, guide, args, kwargs)
    151 with poutine.block():
    152     guide_trace = poutine.trace(guide).get_trace(*args, **kwargs)
--> 153     model_trace = poutine.trace(
    154         poutine.replay(model, trace=guide_trace)
    155     ).get_trace(*args, **kwargs)
    156 guide_trace = prune_subsample_sites(guide_trace)
    157 model_trace = prune_subsample_sites(model_trace)

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\poutine\trace_messenger.py:216, in TraceHandler.get_trace(self, *args, **kwargs)
    208 def get_trace(self, *args, **kwargs) -> Trace:
    209     """
    210     :returns: data structure
    211     :rtype: pyro.poutine.Trace
   (...)    214     Calls this poutine and returns its trace instead of the function's return value.
    215     """
--> 216     self(*args, **kwargs)
    217     return self.msngr.get_trace()

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\poutine\trace_messenger.py:191, in TraceHandler.__call__(self, *args, **kwargs)
    187 self.msngr.trace.add_node(
    188     "_INPUT", name="_INPUT", type="args", args=args, kwargs=kwargs
    189 )
    190 try:
--> 191     ret = self.fn(*args, **kwargs)
    192 except (ValueError, RuntimeError) as e:
    193     exc_type, exc_value, traceback = sys.exc_info()

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\poutine\messenger.py:32, in _context_wrap(context, fn, *args, **kwargs)
     25 def _context_wrap(
     26     context: "Messenger",
     27     fn: Callable,
     28     *args: Any,
     29     **kwargs: Any,
     30 ) -> Any:
     31     with context:
---> 32         return fn(*args, **kwargs)

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\poutine\messenger.py:32, in _context_wrap(context, fn, *args, **kwargs)
     25 def _context_wrap(
     26     context: "Messenger",
     27     fn: Callable,
     28     *args: Any,
     29     **kwargs: Any,
     30 ) -> Any:
     31     with context:
---> 32         return fn(*args, **kwargs)

Cell In[61], line 22, in SimpleDiscreteHMM.model(self, Y)
     19 num_states = self.num_states
     21 with pyro.plate("donors", n_donors, dim=-2):
---> 22     z_prev = pyro.sample("z_0", dist.Categorical(pyro.param("start_probs")))
     24     for t in range(T):
     25         z_t = pyro.sample(f"z_{t}", dist.Categorical(pyro.param("trans_probs")[z_prev]))

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\primitives.py:88, in param(name, init_tensor, constraint, event_dim)
     86 # Note effectful(-) requires the double passing of name below.
     87 args = (name,) if init_tensor is None else (name, init_tensor)
---> 88 value = _param(*args, constraint=constraint, event_dim=event_dim, name=name)
     89 assert value is not None  # type narrowing guaranteed by _param
     90 return value

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\poutine\runtime.py:461, in effectful.<locals>._fn(name, infer, obs, *args, **kwargs)
    444 msg = Message(
    445     type=type,
    446     name=name,
   (...)    458     infer=infer if infer is not None else {},
    459 )
    460 # apply the stack and return its return value
--> 461 apply_stack(msg)
    462 if TYPE_CHECKING:
    463     assert msg["value"] is not None

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\poutine\runtime.py:383, in apply_stack(initial_msg)
    380     if msg["stop"]:
    381         break
--> 383 default_process_message(msg)
    385 for frame in stack[-pointer:]:
    386     frame._postprocess_message(msg)

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\poutine\runtime.py:345, in default_process_message(msg)
    342     msg["done"] = True
    343     return
--> 345 msg["value"] = msg["fn"](*msg["args"], **msg["kwargs"])
    347 # after fn has been called, update msg to prevent it from being called again.
    348 msg["done"] = True

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\params\param_store.py:249, in ParamStoreDict.get_param(self, name, init_tensor, constraint, event_dim)
    233 """
    234 Get parameter from its name. If it does not yet exist in the
    235 ParamStore, it will be created and stored.
   (...)    246 :rtype: torch.Tensor
    247 """
    248 if init_tensor is None:
--> 249     return self[name]
    250 else:
    251     return self.setdefault(name, init_tensor, constraint)

File c:\Users\erik4\AppData\Local\Programs\Python\Python313\Lib\site-packages\pyro\params\param_store.py:129, in ParamStoreDict.__getitem__(self, name)
    125 def __getitem__(self, name: str) -> torch.Tensor:
    126     """
    127     Get the *constrained* value of a named parameter.
    128     """
--> 129     unconstrained_value = self._params[name]
    131     # compute the constrained value
    132     constraint = self._constraints[name]

KeyError: 'start_probs'

Code

print("Lambda per stato:", model.lambdas.detach().numpy())
print("Probabilità iniziali:", model.start_probs.detach().numpy())
print("Matrice di transizione:", model.trans_probs.detach().numpy())

---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
Cell In[63], line 1
----> 1 print("Lambda per stato:", model.lambdas.detach().numpy())
      2 print("Probabilità iniziali:", model.start_probs.detach().numpy())
      3 print("Matrice di transizione:", model.trans_probs.detach().numpy())

AttributeError: 'PyroParam' object has no attribute 'detach'