Skip to content



Base class for all LLaMPPL models.

Your models should subclass this class. Minimally, you should provide an __init__ method that calls super().__init__(self), and a step method.

Source code in hfppl/
class Model:
    """Base class for all LLaMPPL models.

    Your models should subclass this class. Minimally, you should provide an `__init__` method
    that calls `super().__init__(self)`, and a `step` method.

    def __init__(self):
        self.weight = 0.0
        self.finished = False
        self.mode = "sample"
        self.beam_idx = 0
        self.force_eos = False
        self.twist_amount = 0.0

    def reset(self):
        self.weight = 0.0
        self.finished = False
        self.mode = "sample"
        self.beam_idx = 0
        self.force_eos = False
        self.twist_amount = 0.0

    def immutable_properties(self):
        """Return a `set[str]` of properties that LLaMPPL may assume do not change during execution of `step`.
        This set is empty by default but can be overridden by subclasses to speed up inference.

            properties (set[str]): a set of immutable property names"""
        return set()

    def __deepcopy__(self, memo):
        cpy = type(self).__new__(type(self))
        immutable = self.immutable_properties()

        for k, v in self.__dict__.items():
            if k in immutable:
                setattr(cpy, k, v)
                setattr(cpy, k, copy.deepcopy(v, memo))

        return cpy

    def twist(self, amt):
        """Multiply this particle's weight by `exp(amt)`, but divide it back out before the next `step`.

        Use this method to provide heuristic guidance about whether a particle is "on the right track"
        without changing the ultimate target distribution.

            amt: the logarithm of the amount by which to (temporarily) multiply this particle's weight.
        self.twist_amount += amt

    def untwist(self):
        self.twist_amount = 0.0

    def finish(self):
        self.finished = True

    def done_stepping(self):
        return self.finished

    async def step(self):
        """Defines the computation performed in each step of the model.

        All subclasses should override this method."""

        if not self.done_stepping():
            raise NotImplementedError("Model.step() must be implemented by subclasses")

    def __str__(self):
        return "Particle"

    async def start(self):

    def score(self, score):
        """Multiply this particle's weight by `exp(score)`.

        The `score` method is a low-level way to change the target distribution.
        For many use cases, it is sufficient to use `sample`, `observe`, `condition`,
        and `twist`, all of which are implemented in terms of `score`.

            score: logarithm of the amount by which the particle's weight should be multiplied.
        self.weight += score

    def condition(self, b):
        """Constrain a given Boolean expression to be `True`.

        If the condition is False, the particle's weight is set to zero and `self.finish()`
        is called, so that no further `step` calls are made.

            b: the Boolean expression whose value is constrained to be True.
        if not b:

    async def intervene(self, dist, x):
        """Force the distribution to take on the value `x`, but do not _condition_ on this result.

        This is useful primarily with distributions that have side effects (e.g., modifying some state).
        For example, a model with the code

        token_1 = await self.sample(self.stateful_lm.next_token())
        await self.observe(self.stateful_lm.next_token(), token_2)

        encodes a posterior inference problem, to find `token_1` values that *likely preceded* `token_2`. By contrast,

        token_1 = await self.sample(stateful_lm.next_token())
        await self.intervene(self.stateful_lm.next_token(), token_2)

        encodes a much easier task: freely generate `token_1` and then force-feed `token_2` as the following token.

            dist (hfppl.distributions.distribution.Distribution): the distribution on which to intervene.
            x: the value to intervene with.
        await dist.log_prob(x)
        return x

    async def observe(self, dist, x):
        """Condition the model on the value `x` being sampled from the distribution `dist`.

        For discrete distributions `dist`, `await self.observe(dist, x)` specifies the same constraint as
        val = await self.sample(dist)
        self.condition(val == x)
        but can be much more efficient.

            dist: a `Distribution` object from which to observe
            x: the value observed from `dist`
        p = await dist.log_prob(x)
        return x

    async def sample(self, dist, proposal=None):
        """Extend the model with a sample from a given `Distribution`, with support for autobatching.
        If specified, the Distribution `proposal` is used during inference to generate informed hypotheses.

            dist: the `Distribution` object from which to sample
            proposal: if provided, inference algorithms will use this `Distribution` object to generate proposed samples, rather than `dist`.
              However, importance weights will be adjusted so that the target posterior is independent of the proposal.

            value: the value sampled from the distribution.
        # Special logic for beam search
        # if self.mode == "beam":
        #     d = dist if proposal is None else proposal
        #     x, w = d.argmax(self.beam_idx)
        #     if proposal is not None:
        #         self.score(dist.log_prob(x))
        #     else:
        #         self.score(w)
        #     return x

        if proposal is None:
            x, _ = await dist.sample()
            return x
            x, q = await proposal.sample()
            p = await dist.log_prob(x)
            self.score(p - q)
            return x

    async def call(self, submodel):
        return await submodel.run_with_parent(self)

    def string_for_serialization(self):
        """Return a string representation of the particle for serialization purposes.

            str: a string representation of the particle.
        return str(self)


Constrain a given Boolean expression to be True.

If the condition is False, the particle's weight is set to zero and self.finish() is called, so that no further step calls are made.


Name Type Description Default

the Boolean expression whose value is constrained to be True.

Source code in hfppl/
def condition(self, b):
    """Constrain a given Boolean expression to be `True`.

    If the condition is False, the particle's weight is set to zero and `self.finish()`
    is called, so that no further `step` calls are made.

        b: the Boolean expression whose value is constrained to be True.
    if not b:


Return a set[str] of properties that LLaMPPL may assume do not change during execution of step. This set is empty by default but can be overridden by subclasses to speed up inference.


Name Type Description
properties set[str]

a set of immutable property names

Source code in hfppl/
def immutable_properties(self):
    """Return a `set[str]` of properties that LLaMPPL may assume do not change during execution of `step`.
    This set is empty by default but can be overridden by subclasses to speed up inference.

        properties (set[str]): a set of immutable property names"""
    return set()

intervene(dist, x) async

Force the distribution to take on the value x, but do not condition on this result.

This is useful primarily with distributions that have side effects (e.g., modifying some state). For example, a model with the code

token_1 = await self.sample(self.stateful_lm.next_token())
await self.observe(self.stateful_lm.next_token(), token_2)

encodes a posterior inference problem, to find token_1 values that likely preceded token_2. By contrast,

token_1 = await self.sample(stateful_lm.next_token())
await self.intervene(self.stateful_lm.next_token(), token_2)

encodes a much easier task: freely generate token_1 and then force-feed token_2 as the following token.


Name Type Description Default
dist Distribution

the distribution on which to intervene.


the value to intervene with.

Source code in hfppl/
async def intervene(self, dist, x):
    """Force the distribution to take on the value `x`, but do not _condition_ on this result.

    This is useful primarily with distributions that have side effects (e.g., modifying some state).
    For example, a model with the code

    token_1 = await self.sample(self.stateful_lm.next_token())
    await self.observe(self.stateful_lm.next_token(), token_2)

    encodes a posterior inference problem, to find `token_1` values that *likely preceded* `token_2`. By contrast,

    token_1 = await self.sample(stateful_lm.next_token())
    await self.intervene(self.stateful_lm.next_token(), token_2)

    encodes a much easier task: freely generate `token_1` and then force-feed `token_2` as the following token.

        dist (hfppl.distributions.distribution.Distribution): the distribution on which to intervene.
        x: the value to intervene with.
    await dist.log_prob(x)
    return x

observe(dist, x) async

Condition the model on the value x being sampled from the distribution dist.

For discrete distributions dist, await self.observe(dist, x) specifies the same constraint as

val = await self.sample(dist)
self.condition(val == x)
but can be much more efficient.


Name Type Description Default

a Distribution object from which to observe


the value observed from dist

Source code in hfppl/
async def observe(self, dist, x):
    """Condition the model on the value `x` being sampled from the distribution `dist`.

    For discrete distributions `dist`, `await self.observe(dist, x)` specifies the same constraint as
    val = await self.sample(dist)
    self.condition(val == x)
    but can be much more efficient.

        dist: a `Distribution` object from which to observe
        x: the value observed from `dist`
    p = await dist.log_prob(x)
    return x

sample(dist, proposal=None) async

Extend the model with a sample from a given Distribution, with support for autobatching. If specified, the Distribution proposal is used during inference to generate informed hypotheses.


Name Type Description Default

the Distribution object from which to sample


if provided, inference algorithms will use this Distribution object to generate proposed samples, rather than dist. However, importance weights will be adjusted so that the target posterior is independent of the proposal.



Name Type Description

the value sampled from the distribution.

Source code in hfppl/
async def sample(self, dist, proposal=None):
    """Extend the model with a sample from a given `Distribution`, with support for autobatching.
    If specified, the Distribution `proposal` is used during inference to generate informed hypotheses.

        dist: the `Distribution` object from which to sample
        proposal: if provided, inference algorithms will use this `Distribution` object to generate proposed samples, rather than `dist`.
          However, importance weights will be adjusted so that the target posterior is independent of the proposal.

        value: the value sampled from the distribution.
    # Special logic for beam search
    # if self.mode == "beam":
    #     d = dist if proposal is None else proposal
    #     x, w = d.argmax(self.beam_idx)
    #     if proposal is not None:
    #         self.score(dist.log_prob(x))
    #     else:
    #         self.score(w)
    #     return x

    if proposal is None:
        x, _ = await dist.sample()
        return x
        x, q = await proposal.sample()
        p = await dist.log_prob(x)
        self.score(p - q)
        return x


Multiply this particle's weight by exp(score).

The score method is a low-level way to change the target distribution. For many use cases, it is sufficient to use sample, observe, condition, and twist, all of which are implemented in terms of score.


Name Type Description Default

logarithm of the amount by which the particle's weight should be multiplied.

Source code in hfppl/
def score(self, score):
    """Multiply this particle's weight by `exp(score)`.

    The `score` method is a low-level way to change the target distribution.
    For many use cases, it is sufficient to use `sample`, `observe`, `condition`,
    and `twist`, all of which are implemented in terms of `score`.

        score: logarithm of the amount by which the particle's weight should be multiplied.
    self.weight += score

step() async

Defines the computation performed in each step of the model.

All subclasses should override this method.

Source code in hfppl/
async def step(self):
    """Defines the computation performed in each step of the model.

    All subclasses should override this method."""

    if not self.done_stepping():
        raise NotImplementedError("Model.step() must be implemented by subclasses")


Return a string representation of the particle for serialization purposes.


Name Type Description

a string representation of the particle.

Source code in hfppl/
def string_for_serialization(self):
    """Return a string representation of the particle for serialization purposes.

        str: a string representation of the particle.
    return str(self)


Multiply this particle's weight by exp(amt), but divide it back out before the next step.

Use this method to provide heuristic guidance about whether a particle is "on the right track" without changing the ultimate target distribution.


Name Type Description Default

the logarithm of the amount by which to (temporarily) multiply this particle's weight.

Source code in hfppl/
def twist(self, amt):
    """Multiply this particle's weight by `exp(amt)`, but divide it back out before the next `step`.

    Use this method to provide heuristic guidance about whether a particle is "on the right track"
    without changing the ultimate target distribution.

        amt: the logarithm of the amount by which to (temporarily) multiply this particle's weight.
    self.twist_amount += amt


Decorator to create a SubModel implementation from an async function.

For example:

async def sample_two_tokens(self, context):
    token1 = await self.sample(context.next_token())
    token2 = await self.sample(context.next_token())
    return token1, token2

This SubModel can then be used from another model or submodel, using the syntax await

Source code in hfppl/
def submodel(f):
    """Decorator to create a SubModel implementation from an async function.

    For example:

    async def sample_two_tokens(self, context):
        token1 = await self.sample(context.next_token())
        token2 = await self.sample(context.next_token())
        return token1, token2

    This SubModel can then be used from another model or submodel, using the syntax `await`.

    @functools.wraps(f, updated=())  # unclear if this is the best way to do it
    class SubModelImpl(SubModel):
        def __init__(self, *args, **kwargs):
            self.args = args
            self.kwargs = kwargs

        async def forward(self):
            return await f(self, *self.args, **self.kwargs)

    return SubModelImpl