Source code for sympy.stats.rv

"""
Main Random Variables Module

Defines abstract random variable type.
Contains interfaces for probability space object (PSpace) as well as standard
operators, P, E, sample, density, where

See Also
========
sympy.stats.crv
sympy.stats.frv
sympy.stats.rv_interface
"""

from sympy import Basic, S, Expr, Symbol, Tuple, And, Add, Eq, lambdify
from sympy.core.sets import FiniteSet, ProductSet

[docs]class RandomDomain(Basic): """ Represents a set of variables and the values which they can take See Also ======== sympy.stats.crv.ContinuousDomain sympy.stats.frv.FiniteDomain """ is_ProductDomain = False is_Finite = False is_Continuous = False def __new__(cls, symbols, *args): symbols = FiniteSet(*symbols) return Basic.__new__(cls, symbols, *args) @property def symbols(self): return self.args[0] @property def set(self): return self.args[1] def __contains__(self, other): raise NotImplementedError() def integrate(self, expr): raise NotImplementedError()
[docs]class SingleDomain(RandomDomain): """ A single variable and its domain See Also ======== sympy.stats.crv.SingleContinuousDomain sympy.stats.frv.SingleFiniteDomain """ def __new__(cls, symbol, set): assert symbol.is_Symbol symbols = FiniteSet(symbol) return RandomDomain.__new__(cls, symbols, set) @property def symbol(self): return tuple(self.symbols)[0] def __contains__(self, other): if len(other)!=1: return False sym, val = tuple(other)[0] return self.symbol == sym and val in self.set
[docs]class ConditionalDomain(RandomDomain): """ A RandomDomain with an attached condition See Also ======== sympy.stats.crv.ConditionalContinuousDomain sympy.stats.frv.ConditionalFiniteDomain """ def __new__(cls, fulldomain, condition): condition = condition.subs(dict((rs,rs.symbol) for rs in random_symbols(condition))) return RandomDomain.__new__( cls, fulldomain.symbols, fulldomain, condition) @property def fulldomain(self): return self.args[1] @property def condition(self): return self.args[2] @property def set(self): raise NotImplementedError("Set of Conditional Domain not Implemented") def as_boolean(self): return And(self.fulldomain.as_boolean(), self.condition)
[docs]class PSpace(Basic): """ A Probability Space Probability Spaces encode processes that equal different values probabalistically. These underly Random Symbols which occur in SymPy expressions and contain the mechanics to evaluate statistical statements. See Also ======== sympy.stats.crv.ContinuousPSpace sympy.stats.frv.FinitePSpace """ is_Finite = None is_Continuous = None @property def domain(self): return self.args[0] @property def density(self): return self.args[1] @property def values(self): return frozenset(RandomSymbol(self, sym) for sym in self.domain.symbols) @property def symbols(self): return self.domain.symbols def where(self, condition): raise NotImplementedError() def compute_density(self, expr): raise NotImplementedError() def sample(self): raise NotImplementedError() def probability(self, condition): raise NotImplementedError() def integrate(self, expr): raise NotImplementedError()
[docs]class SinglePSpace(PSpace): """ Represents the probabilities of a set of random events that can be attributed to a single variable/symbol. """ @property def value(self): return tuple(self.values)[0]
[docs]class RandomSymbol(Symbol): """ Random Symbols represent ProbabilitySpaces in SymPy Expressions In principle they can take on any value that their symbol can take on within the associated PSpace with probability determined by the PSpace Density. Random Symbols contain pspace and symbol properties. The pspace property points to the represented Probability Space The symbol is a standard SymPy Symbol that is used in that probability space for example in defining a density. You can form normal SymPy expressions using RandomSymbols and operate on those expressions with the Functions E - Expectation of a random expression P - Probability of a condition density - Probability Density of an expression given - A new random expression (with new random symbols) given a condition An object of the RandomSymbol type should almost never be created by the user. They tend to be created instead by the PSpace class's value method. Traditionally a user doesn't even do this but instead calls one of the convenience functions Normal, Exponential, Coin, Die, FiniteRV, etc.... """ is_bounded=True is_finite=True def __new__(cls, *args): obj = Basic.__new__(cls) obj.pspace = args[0] obj.symbol = args[1] return obj @property def name(self): return self.symbol.name @property def is_commutative(self): return self.symbol.is_commutative def _hashable_content(self): return self.pspace, self.symbol
[docs]class ProductPSpace(PSpace): """ A probability space resulting from the merger of two independent probability spaces. Often created using the function, pspace """ def __new__(cls, *spaces): rs_space_dict = {} for space in spaces: for value in space.values: rs_space_dict[value] = space symbols = FiniteSet(val.symbol for val in rs_space_dict.keys()) # Overlapping symbols if len(symbols) < sum(len(space.symbols) for space in spaces): raise ValueError("Overlapping Random Variables") if all(space.is_Finite for space in spaces): from sympy.stats.frv import ProductFinitePSpace cls = ProductFinitePSpace if all(space.is_Continuous for space in spaces): from sympy.stats.crv import ProductContinuousPSpace cls = ProductContinuousPSpace obj = Basic.__new__(cls, symbols, FiniteSet(*spaces)) obj.rs_space_dict = rs_space_dict return obj @property def spaces(self): return self.args[1] @property def values(self): return sumsets(space.values for space in self.spaces) def integrate(self, expr, rvs=None, **kwargs): rvs = rvs or self.values rvs = frozenset(rvs) for space in self.spaces: expr = space.integrate(expr, rvs & space.values, **kwargs) return expr @property def domain(self): return ProductDomain(*[space.domain for space in self.spaces]) @property def density(self): raise NotImplementedError("Density not available for ProductSpaces") def sample(self): return dict([(k,v) for space in self.spaces for k,v in space.sample().items()])
[docs]class ProductDomain(RandomDomain): """ A domain resulting from the merger of two independent domains See Also ======== sympy.stats.crv.ProductContinuousDomain sympy.stats.frv.ProductFiniteDomain """ is_ProductDomain = True def __new__(cls, *domains): symbols = sumsets([domain.symbols for domain in domains]) # Flatten any product of products domains2 = [] for domain in domains: if not domain.is_ProductDomain: domains2.append(domain) else: domains2.extend(domain.domains) domains2 = FiniteSet(domains2) sym_domain_dict = {} for domain in domains2: for symbol in domain.symbols: sym_domain_dict[symbol] = domain if all(domain.is_Finite for domain in domains2): from sympy.stats.frv import ProductFiniteDomain cls = ProductFiniteDomain if all(domain.is_Continuous for domain in domains2): from sympy.stats.crv import ProductContinuousDomain cls = ProductContinuousDomain obj = RandomDomain.__new__(cls, symbols, domains2) obj.sym_domain_dict = sym_domain_dict return obj @property def domains(self): return self.args[1] @property def set(self): return ProductSet(domain.set for domain in self.domains) def __contains__(self, other): # Split event into each subdomain for domain in self.domains: # Collect the parts of this event which associate to this domain elem = frozenset([item for item in other if item[0] in domain.symbols]) # Test this sub-event if elem not in domain: return False # All subevents passed return True def as_boolean(self): return And(*[domain.as_boolean() for domain in self.domains])
[docs]def random_symbols(expr): """ Returns all RandomSymbols within a SymPy Expression. """ try: return list(expr.atoms(RandomSymbol)) except AttributeError: return []
[docs]def pspace(expr): """ Returns the underlying Probability Space of a random expression. For internal use. Examples ======== >>> from sympy.stats import pspace, Normal >>> from sympy.stats.rv import ProductPSpace >>> X = Normal('X', 0, 1) >>> pspace(2*X + 1) == X.pspace True """ rvs = random_symbols(expr) if not rvs: return None # If only one space present if all(rv.pspace == rvs[0].pspace for rv in rvs): return rvs[0].pspace # Otherwise make a product space return ProductPSpace(*[rv.pspace for rv in rvs])
def sumsets(sets): """ Union of sets """ return reduce(frozenset.union, sets, frozenset())
[docs]def rs_swap(a,b): """ Build a dictionary to swap RandomSymbols based on their underlying symbol. i.e. if X = ('x', pspace1) and Y = ('x', pspace2) then X and Y match and the key, value pair {X:Y} will appear in the result Inputs: collections a and b of random variables which share common symbols Output: dict mapping RVs in a to RVs in b """ d = {} for rsa in a: d[rsa] = [rsb for rsb in b if rsa.symbol==rsb.symbol][0] return d
def given(expr, condition=None, **kwargs): """ From a random expression and a condition on that expression creates a new probability space from the condition and returns the same expression on that conditional probability space. Examples ======== >>> from sympy.stats import given, density, Die >>> X = Die('X', 6) >>> Y = given(X, X>3) >>> density(Y) {4: 1/3, 5: 1/3, 6: 1/3} """ if not random_symbols(condition) or pspace_independent(expr, condition): return expr # Get full probability space of both the expression and the condition fullspace = pspace(Tuple(expr, condition)) # Build new space given the condition space = fullspace.conditional_space(condition, **kwargs) # Dictionary to swap out RandomSymbols in expr with new RandomSymbols # That point to the new conditional space swapdict = rs_swap(fullspace.values, space.values) # Swap random variables in the expression expr = expr.subs(swapdict) return expr def expectation(expr, condition=None, numsamples=None, **kwargs): """ Returns the expected value of a random expression Parameters ---------- expr : Expr containing RandomSymbols The expression of which you want to compute the expectation value given : Expr containing RandomSymbols A conditional expression. E(X, X>0) is expectation of X given X > 0 numsamples : int Enables sampling and approximates the expectation with this many samples evalf : Bool (defaults to True) If sampling return a number rather than a complex expression evaluate : Bool (defaults to True) In case of continuous systems return unevaluated integral Examples ======== >>> from sympy.stats import E, Die >>> X = Die('X', 6) >>> E(X) 7/2 >>> E(2*X + 1) 8 >>> E(X, X>3) # Expectation of X given that it is above 3 5 """ if not random_symbols(expr): # expr isn't random? return expr if numsamples: # Computing by monte carlo sampling? return sampling_E(expr, condition, numsamples=numsamples, **kwargs) # Create new expr and recompute E if condition is not None: # If there is a condition return expectation(given(expr, condition, **kwargs), **kwargs) # A few known statements for efficiency if expr.is_Add: # We know that E is Linear return Add(*[expectation(arg, **kwargs) for arg in expr.args]) # Otherwise case is simple, pass work off to the ProbabilitySpace return pspace(expr).integrate(expr, **kwargs) def probability(condition, given_condition=None, numsamples=None, **kwargs): """ Probability that a condition is true, optionally given a second condition Parameters ---------- expr : Relational containing RandomSymbols The condition of which you want to compute the probability given_condition : Relational containing RandomSymbols A conditional expression. P(X>1, X>0) is expectation of X>1 given X>0 numsamples : int Enables sampling and approximates the probability with this many samples evalf : Bool (defaults to True) If sampling return a number rather than a complex expression evaluate : Bool (defaults to True) In case of continuous systems return unevaluated integral Examples ======== >>> from sympy.stats import P, Die >>> from sympy import Eq >>> X, Y = Die('X', 6), Die('Y', 6) >>> P(X>3) 1/2 >>> P(Eq(X, 5), X>2) # Probability that X == 5 given that X > 2 1/4 >>> P(X>Y) 5/12 """ if numsamples: return sampling_P(condition, given_condition, numsamples=numsamples, **kwargs) if given_condition is not None: # If there is a condition # Recompute on new conditional expr return probability(given(condition, given_condition, **kwargs),**kwargs) # Otherwise pass work off to the ProbabilitySpace return pspace(condition).probability(condition, **kwargs) def density(expr, condition=None, **kwargs): """ Probability density of a random expression Optionally given a second condition This density will take on different forms for different types of probability spaces. Discrete variables produce Dicts. Continuous variables produce Lambdas. Examples ======== >>> from sympy.stats import density, Die, Normal >>> from sympy import Symbol >>> D = Die('D', 6) >>> X = Normal('x', 0, 1) >>> density(D) {1: 1/6, 2: 1/6, 3: 1/6, 4: 1/6, 5: 1/6, 6: 1/6} >>> density(2*D) {2: 1/6, 4: 1/6, 6: 1/6, 8: 1/6, 10: 1/6, 12: 1/6} >>> density(X) Lambda(_x, sqrt(2)*exp(-_x**2/2)/(2*sqrt(pi))) """ if condition is not None: # If there is a condition # Recompute on new conditional expr return density(given(expr, condition, **kwargs), **kwargs) # Otherwise pass work off to the ProbabilitySpace return pspace(expr).compute_density(expr, **kwargs) def cdf(expr, condition=None, **kwargs): """ Cumulative Distribution Function of a random expression. optionally given a second condition This density will take on different forms for different types of probability spaces. Discrete variables produce Dicts. Continuous variables produce Lambdas. Examples ======== >>> from sympy.stats import density, Die, Normal, cdf >>> from sympy import Symbol >>> D = Die('D', 6) >>> X = Normal('X', 0, 1) >>> density(D) {1: 1/6, 2: 1/6, 3: 1/6, 4: 1/6, 5: 1/6, 6: 1/6} >>> cdf(D) {1: 1/6, 2: 1/3, 3: 1/2, 4: 2/3, 5: 5/6, 6: 1} >>> cdf(3*D, D>2) {9: 1/4, 12: 1/2, 15: 3/4, 18: 1} >>> cdf(X) Lambda(_z, erf(sqrt(2)*_z/2)/2 + 1/2) """ if condition is not None: # If there is a condition # Recompute on new conditional expr return cdf(given(expr, condition, **kwargs), **kwargs) # Otherwise pass work off to the ProbabilitySpace return pspace(expr).compute_cdf(expr, **kwargs) def where(condition, given_condition=None, **kwargs): """ Returns the domain where a condition is True. Examples ======== >>> from sympy.stats import where, Die, Normal >>> from sympy import symbols, And >>> D1, D2 = Die('a', 6), Die('b', 6) >>> a, b = D1.symbol, D2.symbol >>> X = Normal('x', 0, 1) >>> where(X**2<1) Domain: And(-1 < x, x < 1) >>> where(X**2<1).set (-1, 1) >>> where(And(D1<=D2 , D2<3)) Domain: Or(And(a == 1, b == 1), And(a == 1, b == 2), And(a == 2, b == 2)) """ if given_condition is not None: # If there is a condition # Recompute on new conditional expr return where(given(condition, given_condition, **kwargs), **kwargs) # Otherwise pass work off to the ProbabilitySpace return pspace(condition).where(condition, **kwargs) def sample(expr, condition=None, **kwargs): """ A realization of the random expression Examples ======== >>> from sympy.stats import Die, sample >>> X, Y, Z = Die('X', 6), Die('Y', 6), Die('Z', 6) >>> die_roll = sample(X+Y+Z) # A random realization of three dice """ return sample_iter(expr, condition, numsamples=1).next() def sample_iter(expr, condition=None, numsamples=S.Infinity, **kwargs): """ Returns an iterator of realizations from the expression given a condition expr: Random expression to be realized condition: A conditional expression (optional) numsamples: Length of the iterator (defaults to infinity) Examples -------- >>> from sympy.stats import Normal, sample_iter >>> X = Normal('X', 0, 1) >>> expr = X*X + 3 >>> iterator = sample_iter(expr, numsamples=3) >>> list(iterator) # doctest: +SKIP [12, 4, 7] See Also ======== Sample sampling_P sampling_E sample_iter_lambdify sample_iter_subs """ # lambdify is much faster but not as robust try: return sample_iter_lambdify(expr, condition, numsamples, **kwargs) # use subs when lambdify fails except TypeError: return sample_iter_subs(expr, condition, numsamples, **kwargs) def sample_iter_lambdify(expr, condition=None, numsamples=S.Infinity, **kwargs): """ See sample_iter Uses lambdify for computation. This is fast but does not always work. """ if condition: ps = pspace(Tuple(expr, condition)) else: ps = pspace(expr) rvs = list(ps.values) fn = lambdify(rvs, expr, **kwargs) if condition: given_fn = lambdify(rvs, condition, **kwargs) # Check that lambdify can handle the expression # Some operations like Sum can prove difficult try: d = ps.sample() # a dictionary that maps RVs to values args = [d[rv] for rv in rvs] fn(*args) if condition: given_fn(*args) except: raise TypeError("Expr/condition too complex for lambdify") def return_generator(): count = 0 while count < numsamples: d = ps.sample() # a dictionary that maps RVs to values args = [d[rv] for rv in rvs] if condition: # Check that these values satisfy the condition gd = given_fn(*args) if not isinstance(gd, bool): raise ValueError("Conditions must not contain free symbols") if gd == False: # If the values don't satisfy then try again continue yield fn(*args) count += 1 return return_generator() def sample_iter_subs(expr, condition=None, numsamples=S.Infinity, **kwargs): """ See sample_iter Uses subs for computation. This is slow but almost always works. """ if condition: ps = pspace(Tuple(expr, condition)) else: ps = pspace(expr) count = 0 while count < numsamples: d = ps.sample() # a dictionary that maps RVs to values if condition: # Check that these values satisfy the condition gd = condition.subs(d) if not isinstance(gd, bool): raise ValueError("Conditions must not contain free symbols") if gd == False: # If the values don't satisfy then try again continue yield expr.subs(d) count += 1 def sampling_P(condition, given_condition=None, numsamples=1, evalf=True, **kwargs): """ Sampling version of P See Also ======== P sampling_E """ count_true = 0 count_false = 0 samples = sample_iter(condition, given_condition, numsamples=numsamples, **kwargs) for x in samples: if not isinstance(x, bool): raise ValueError("Conditions must not contain free symbols") if x==True: count_true += 1 else: count_false += 1 result = S(count_true) / numsamples if evalf: return result.evalf() else: return result def sampling_E(condition, given_condition=None, numsamples=1, evalf=True, **kwargs): """ Sampling version of E See Also ======== P sampling_P """ samples = sample_iter(condition, given_condition, numsamples=numsamples, **kwargs) result = Add(*list(samples)) / numsamples if evalf: return result.evalf() else: return result def dependent(a, b): """ Dependence of two random expressions Two expressions are independent if knowledge of one does not change computations on the other. Examples ======== >>> from sympy.stats import Normal, dependent, given >>> from sympy import Tuple, Eq >>> X, Y = Normal('X', 0, 1), Normal('Y', 0, 1) >>> dependent(X, Y) False >>> dependent(2*X + Y, -Y) True >>> X, Y = given(Tuple(X, Y), Eq(X+Y,3)) >>> dependent(X, Y) True See Also ======== independent """ if pspace_independent(a,b): return False z = Symbol('z', real=True) # Dependent if density is unchanged when one is given information about # the other return (density(a, Eq(b, z)) != density(a) or density(b, Eq(a, z)) != density(b)) def independent(a, b): """ Independence of two random expressions Two expressions are independent if knowledge of one does not change computations on the other. Examples ======== >>> from sympy.stats import Normal, independent, given >>> from sympy import Tuple, Eq >>> X, Y = Normal('X', 0, 1), Normal('Y', 0, 1) >>> independent(X, Y) True >>> independent(2*X + Y, -Y) False >>> X, Y = given(Tuple(X, Y), Eq(X+Y,3)) >>> independent(X, Y) False See Also ======== dependent """ return not dependent(a, b) def pspace_independent(a,b): """ Tests for independence between a and b by checking if their PSpaces have overlapping symbols. This is a sufficient but not necessary condition for independence and is intended to be used internally. Note: pspace_independent(a,b) implies independent(a,b) independent(a,b) does not imply pspace_independent(a,b) """ a_symbols = pspace(b).symbols b_symbols = pspace(a).symbols if len(a_symbols.intersect(b_symbols)) == 0: return True return None def rv_subs(expr, symbols=None): """ Given a random expression replace all random variables with their symbols. If symbols keyword is given restrict the swap to only the symbols listed. """ if symbols is None: symbols = random_symbols(expr) swapdict = dict([(rv, rv.symbol) for rv in symbols]) return expr.subs(swapdict)