Source code for sympy.parsing.sympy_parser

"""Transform a string with Python-like source code into SymPy expression. """

from sympy_tokenize import \
    generate_tokens, untokenize, TokenError, \
    NUMBER, STRING, NAME, OP, ENDMARKER

from keyword import iskeyword
from StringIO import StringIO
import re
import unicodedata

from sympy.core.basic import Basic, C

_re_repeated = re.compile(r"^(\d*)\.(\d*)\[(\d+)\]$")

def _token_splittable(token):
    """
    Predicate for whether a token name can be split into multiple tokens.

    A token is splittable if it does not contain an underscore character and
    it is not the name of a Greek letter. This is used to implicitly convert
    expressions like 'xyz' into 'x*y*z'.
    """
    if '_' in token:
        return False
    else:
        try:
            return not unicodedata.lookup('GREEK SMALL LETTER ' + token)
        except KeyError:
            pass
    if len(token) > 1:
        return True
    return False


def _token_callable(token, local_dict, global_dict, nextToken=None):
    """
    Predicate for whether a token name represents a callable function.
    """
    func = local_dict.get(token[1])
    if not func:
        func = global_dict.get(token[1])
    is_Function = getattr(func, 'is_Function', False)
    if (is_Function or
        (callable(func) and not hasattr(func, 'is_Function')) or
            isinstance(nextToken, AppliedFunction)):
        return True
    return False


def _add_factorial_tokens(name, result):
    if result == [] or result[-1][1] == '(':
        raise TokenError()

    beginning = [(NAME, name), (OP, '(')]
    end = [(OP, ')')]

    diff = 0
    length = len(result)

    for index, token in enumerate(result[::-1]):
        toknum, tokval = token
        i = length - index - 1

        if tokval == ')':
            diff += 1
        elif tokval == '(':
            diff -= 1

        if diff == 0:
            if i - 1 >= 0 and result[i - 1][0] == NAME:
                return result[:i - 1] + beginning + result[i - 1:] + end
            else:
                return result[:i] + beginning + result[i:] + end

    return result


class AppliedFunction(object):
    """
    A group of tokens representing a function and its arguments.

    `exponent` is for handling the shorthand sin^2, ln^2, etc.
    """
    def __init__(self, function, args, exponent=None):
        if exponent is None:
            exponent = []
        self.function = function
        self.args = args
        self.exponent = exponent
        self.items = ['function', 'args', 'exponent']

    def expand(self):
        """Return a list of tokens representing the function"""
        result = []
        result.append(self.function)
        result.extend(self.args)
        return result

    def __getitem__(self, index):
        return getattr(self, self.items[index])

    def __repr__(self):
        return "AppliedFunction(%s, %s, %s)" % (self.function, self.args,
                                                self.exponent)


class ParenthesisGroup(list):
    """List of tokens representing an expression in parentheses."""
    pass


def _flatten(result):
    result2 = []
    for tok in result:
        if isinstance(tok, AppliedFunction):
            result2.extend(tok.expand())
        else:
            result2.append(tok)
    return result2


def _group_parentheses(recursor):
    def _inner(tokens, local_dict, global_dict):
        """Group tokens between parentheses with ParenthesisGroup.

        Also processes those tokens recursively.

        """
        result = []
        stacks = []
        stacklevel = 0
        for token in tokens:
            if token[0] == OP:
                if token[1] == '(':
                    stacks.append(ParenthesisGroup([]))
                    stacklevel += 1
                elif token[1] == ')':
                    stacks[-1].append(token)
                    stack = stacks.pop()

                    if len(stacks) > 0:
                        # We don't recurse here since the upper-level stack
                        # would reprocess these tokens
                        stacks[-1].extend(stack)
                    else:
                        # Recurse here to handle nested parentheses
                        # Strip off the outer parentheses to avoid an infinite loop
                        inner = stack[1:-1]
                        inner = recursor(inner,
                                         local_dict,
                                         global_dict)
                        parenGroup = [stack[0]] + inner + [stack[-1]]
                        result.append(ParenthesisGroup(parenGroup))
                    stacklevel -= 1
                    continue
            if stacklevel:
                stacks[-1].append(token)
            else:
                result.append(token)
        return result
    return _inner


def _apply_functions(tokens, local_dict, global_dict):
    """Convert a NAME token + ParenthesisGroup into an AppliedFunction.

    Note that ParenthesisGroups, if not applied to any function, are
    converted back into lists of tokens.

    """
    result = []
    symbol = None
    for tok in tokens:
        if tok[0] == NAME:
            symbol = tok
            result.append(tok)
        elif isinstance(tok, ParenthesisGroup):
            if symbol:
                result[-1] = AppliedFunction(symbol, tok)
                symbol = None
            else:
                result.extend(tok)
        else:
            symbol = None
            result.append(tok)
    return result


def _implicit_multiplication(tokens, local_dict, global_dict):
    """Implicitly adds '*' tokens.

    Cases:

    - Two AppliedFunctions next to each other ("sin(x)cos(x)")

    - AppliedFunction next to an open parenthesis ("sin x (cos x + 1)")

    - A close parenthesis next to an AppliedFunction ("(x+2)sin x")\

    - A close parenthesis next to an open parenthesis ("(x+2)(x+3)")

    - AppliedFunction next to an implicitly applied function ("sin(x)cos x")

    """
    result = []
    for tok, nextTok in zip(tokens, tokens[1:]):
        result.append(tok)
        if (isinstance(tok, AppliedFunction) and
                isinstance(nextTok, AppliedFunction)):
            result.append((OP, '*'))
        elif (isinstance(tok, AppliedFunction) and
              nextTok[0] == OP and nextTok[1] == '('):
            # Applied function followed by an open parenthesis
            result.append((OP, '*'))
        elif (tok[0] == OP and tok[1] == ')' and
              isinstance(nextTok, AppliedFunction)):
            # Close parenthesis followed by an applied function
            result.append((OP, '*'))
        elif (tok[0] == OP and tok[1] == ')' and
              nextTok[0] == NAME):
            # Close parenthesis followed by an implicitly applied function
            result.append((OP, '*'))
        elif (tok[0] == nextTok[0] == OP
              and tok[1] == ')' and nextTok[1] == '('):
            # Close parenthesis followed by an open parenthesis
            result.append((OP, '*'))
        elif (isinstance(tok, AppliedFunction) and nextTok[0] == NAME):
            # Applied function followed by implicitly applied function
            result.append((OP, '*'))
    result.append(tokens[-1])
    return result


def _implicit_application(tokens, local_dict, global_dict):
    """Adds parentheses as needed after functions."""
    result = []
    appendParen = 0  # number of closing parentheses to add
    skip = 0  # number of tokens to delay before adding a ')' (to
              # capture **, ^, etc.)
    exponentSkip = False  # skipping tokens before inserting parentheses to
                          # work with function exponentiation
    for tok, nextTok in zip(tokens, tokens[1:]):
        result.append(tok)
        if (tok[0] == NAME and
            nextTok[0] != OP and
            nextTok[0] != ENDMARKER):
            if _token_callable(tok, local_dict, global_dict, nextTok):
                result.append((OP, '('))
                appendParen += 1
        # name followed by exponent - function exponentiation
        elif (tok[0] == NAME and nextTok[0] == OP and nextTok[1] == '**'):
            if _token_callable(tok, local_dict, global_dict):
                exponentSkip = True
        elif exponentSkip:
            # if the last token added was an applied function (i.e. the
            # power of the function exponent) OR a multiplication (as
            # implicit multiplication would have added an extraneous
            # multiplication)
            if (isinstance(tok, AppliedFunction)
                or (tok[0] == OP and tok[1] == '*')):
                # don't add anything if the next token is a multiplication
                # or if there's already a parenthesis (if parenthesis, still
                # stop skipping tokens)
                if not (nextTok[0] == OP and nextTok[1] == '*'):
                    if not(nextTok[0] == OP and nextTok[1] == '('):
                        result.append((OP, '('))
                        appendParen += 1
                    exponentSkip = False
        elif appendParen:
            if nextTok[0] == OP and nextTok[1] in ('^', '**', '*'):
                skip = 1
                continue
            if skip:
                skip -= 1
                continue
            result.append((OP, ')'))
            appendParen -= 1

    result.append(tokens[-1])

    if appendParen:
        result.extend([(OP, ')')] * appendParen)
    return result


[docs]def function_exponentiation(tokens, local_dict, global_dict): """Allows functions to be exponentiated, e.g. ``cos**2(x)``. Example: >>> from sympy.parsing.sympy_parser import (parse_expr, ... standard_transformations, function_exponentiation) >>> transformations = standard_transformations + (function_exponentiation,) >>> parse_expr('sin**4(x)', transformations=transformations) sin(x)**4 """ result = [] exponent = [] consuming_exponent = False level = 0 for tok, nextTok in zip(tokens, tokens[1:]): if tok[0] == NAME and nextTok[0] == OP and nextTok[1] == '**': if _token_callable(tok, local_dict, global_dict): consuming_exponent = True elif consuming_exponent: exponent.append(tok) # only want to stop after hitting ) if tok[0] == nextTok[0] == OP and tok[1] == ')' and nextTok[1] == '(': consuming_exponent = False # if implicit multiplication was used, we may have )*( instead if tok[0] == nextTok[0] == OP and tok[1] == '*' and nextTok[1] == '(': consuming_exponent = False del exponent[-1] continue elif exponent and not consuming_exponent: if tok[0] == OP: if tok[1] == '(': level += 1 elif tok[1] == ')': level -= 1 if level == 0: result.append(tok) result.extend(exponent) exponent = [] continue result.append(tok) result.append(tokens[-1]) if exponent: result.extend(exponent) return result
[docs]def split_symbols_custom(predicate): """Creates a transformation that splits symbol names. ``predicate`` should return True if the symbol name is to be split. For instance, to retain the default behavior but avoid splitting certain symbol names, a predicate like this would work: >>> from sympy.parsing.sympy_parser import (parse_expr, _token_splittable, ... standard_transformations, implicit_multiplication, ... split_symbols_custom) >>> def can_split(symbol): ... if symbol not in ('list', 'of', 'unsplittable', 'names'): ... return _token_splittable(symbol) ... return False ... >>> transformation = split_symbols_custom(can_split) >>> parse_expr('unsplittable', transformations=standard_transformations + ... (transformation, implicit_multiplication)) unsplittable """ def _split_symbols(tokens, local_dict, global_dict): result = [] split = False for tok in tokens: if tok[0] == NAME and tok[1] == 'Symbol': split = True elif split and tok[0] == NAME: symbol = tok[1][1:-1] if predicate(symbol): for char in symbol: result.extend([(NAME, "'%s'" % char), (OP, ')'), (NAME, 'Symbol'), (OP, '(')]) # Delete the last three tokens: get rid of the extraneous # Symbol( we just added, and also get rid of the last ) # because the closing parenthesis of the original Symbol is # still there del result[-3:] split = False continue else: split = False result.append(tok) return result return _split_symbols #: Splits symbol names for implicit multiplication. #: #: Intended to let expressions like ``xyz`` be parsed as ``x*y*z``. Does not #: split Greek character names, so ``theta`` will *not* become #: ``t*h*e*t*a``. Generally this should be used with #: ``implicit_multiplication``.
split_symbols = split_symbols_custom(_token_splittable)
[docs]def implicit_multiplication(result, local_dict, global_dict): """Makes the multiplication operator optional in most cases. Use this before :func:`implicit_application`, otherwise expressions like ``sin 2x`` will be parsed as ``x * sin(2)`` rather than ``sin(2*x)``. Example: >>> from sympy.parsing.sympy_parser import (parse_expr, ... standard_transformations, implicit_multiplication) >>> transformations = standard_transformations + (implicit_multiplication,) >>> parse_expr('3 x y', transformations=transformations) 3*x*y """ # These are interdependent steps, so we don't expose them separately for step in (_group_parentheses(implicit_multiplication), _apply_functions, _implicit_multiplication): result = step(result, local_dict, global_dict) result = _flatten(result) return result
[docs]def implicit_application(result, local_dict, global_dict): """Makes parentheses optional in some cases for function calls. Use this after :func:`implicit_multiplication`, otherwise expressions like ``sin 2x`` will be parsed as ``x * sin(2)`` rather than ``sin(2*x)``. Example: >>> from sympy.parsing.sympy_parser import (parse_expr, ... standard_transformations, implicit_application) >>> transformations = standard_transformations + (implicit_application,) >>> parse_expr('cot z + csc z', transformations=transformations) csc(z) + cot(z) """ for step in (_group_parentheses(implicit_application), _apply_functions, _implicit_application,): result = step(result, local_dict, global_dict) result = _flatten(result) return result
[docs]def implicit_multiplication_application(result, local_dict, global_dict): """Allows a slightly relaxed syntax. - Parentheses for single-argument method calls are optional. - Multiplication is implicit. - Symbol names can be split (i.e. spaces are not needed between symbols). - Functions can be exponentiated. Example: >>> from sympy.parsing.sympy_parser import (parse_expr, ... standard_transformations, implicit_multiplication_application) >>> parse_expr("10sin**2 x**2 + 3xyz + tan theta", ... transformations=(standard_transformations + ... (implicit_multiplication_application,))) 3*x*y*z + 10*sin(x**2)**2 + tan(theta) """ for step in (split_symbols, implicit_multiplication, implicit_application, function_exponentiation): result = step(result, local_dict, global_dict) return result
[docs]def auto_symbol(tokens, local_dict, global_dict): """Inserts calls to ``Symbol`` for undefined variables.""" result = [] prevTok = (None, None) tokens.append((None, None)) # so zip traverses all tokens for tok, nextTok in zip(tokens, tokens[1:]): tokNum, tokVal = tok nextTokNum, nextTokVal = nextTok if tokNum == NAME: name = tokVal if (name in ['True', 'False', 'None'] or iskeyword(name) or name in local_dict # Don't convert attribute access or (prevTok[0] == OP and prevTok[1] == '.') # Don't convert keyword arguments or (prevTok[0] == OP and prevTok[1] in ('(', ',') and nextTokNum == OP and nextTokVal == '=')): result.append((NAME, name)) continue elif name in global_dict: obj = global_dict[name] if isinstance(obj, (Basic, type)) or callable(obj): result.append((NAME, name)) continue result.extend([ (NAME, 'Symbol'), (OP, '('), (NAME, repr(str(name))), (OP, ')'), ]) else: result.append((tokNum, tokVal)) prevTok = (tokNum, tokVal) return result
[docs]def factorial_notation(tokens, local_dict, global_dict): """Allows standard notation for factorial.""" result = [] prevtoken = '' for toknum, tokval in tokens: if toknum == OP: op = tokval if op == '!!': if prevtoken == '!' or prevtoken == '!!': raise TokenError result = _add_factorial_tokens('factorial2', result) elif op == '!': if prevtoken == '!' or prevtoken == '!!': raise TokenError result = _add_factorial_tokens('factorial', result) else: result.append((OP, op)) else: result.append((toknum, tokval)) prevtoken = tokval return result
[docs]def convert_xor(tokens, local_dict, global_dict): """Treats XOR, ``^``, as exponentiation, ``**``.""" result = [] for toknum, tokval in tokens: if toknum == OP: if tokval == '^': result.append((OP, '**')) else: result.append((toknum, tokval)) else: result.append((toknum, tokval)) return result
[docs]def auto_number(tokens, local_dict, global_dict): """Converts numeric literals to use SymPy equivalents. Complex numbers use ``I``; integer literals use ``Integer``, float literals use ``Float``, and repeating decimals use ``Rational``. """ result = [] prevtoken = '' for toknum, tokval in tokens: if toknum == NUMBER: number = tokval postfix = [] if number.endswith('j') or number.endswith('J'): number = number[:-1] postfix = [(OP, '*'), (NAME, 'I')] if '.' in number or (('e' in number or 'E' in number) and not (number.startswith('0x') or number.startswith('0X'))): match = _re_repeated.match(number) if match is not None: # Clear repeating decimals, e.g. 3.4[31] -> (3 + 4/10 + 31/990) pre, post, repetend = match.groups() zeros = '0'*len(post) post, repetends = [w.lstrip('0') for w in [post, repetend]] # or else interpreted as octal a = pre or '0' b, c = post or '0', '1' + zeros d, e = repetends, ('9'*len(repetend)) + zeros seq = [ (OP, '('), (NAME, 'Integer'), (OP, '('), (NUMBER, a), (OP, ')'), (OP, '+'), (NAME, 'Rational'), (OP, '('), ( NUMBER, b), (OP, ','), (NUMBER, c), (OP, ')'), (OP, '+'), (NAME, 'Rational'), (OP, '('), ( NUMBER, d), (OP, ','), (NUMBER, e), (OP, ')'), (OP, ')'), ] else: seq = [(NAME, 'Float'), (OP, '('), (NUMBER, repr(str(number))), (OP, ')')] else: seq = [(NAME, 'Integer'), (OP, '('), ( NUMBER, number), (OP, ')')] result.extend(seq + postfix) else: result.append((toknum, tokval)) return result
[docs]def rationalize(tokens, local_dict, global_dict): """Converts floats into ``Rational``. Run AFTER ``auto_number``.""" result = [] passed_float = False for toknum, tokval in tokens: if toknum == NAME: if tokval == 'Float': passed_float = True tokval = 'Rational' result.append((toknum, tokval)) elif passed_float == True and toknum == NUMBER: passed_float = False result.append((STRING, tokval)) else: result.append((toknum, tokval)) return result #: Standard transformations for :func:`parse_expr`. #: Inserts calls to :class:`Symbol`, :class:`Integer`, and other SymPy #: datatypes and allows the use of standard factorial notation (e.g. ``x!``).
standard_transformations = (auto_symbol, auto_number, factorial_notation)
[docs]def stringify_expr(s, local_dict, global_dict, transformations): """ Converts the string ``s`` to Python code, in ``local_dict`` Generally, ``parse_expr`` should be used. """ tokens = [] input_code = StringIO(s.strip()) for toknum, tokval, _, _, _ in generate_tokens(input_code.readline): tokens.append((toknum, tokval)) for transform in transformations: tokens = transform(tokens, local_dict, global_dict) return untokenize(tokens)
[docs]def eval_expr(code, local_dict, global_dict): """ Evaluate Python code generated by ``stringify_expr``. Generally, ``parse_expr`` should be used. """ expr = eval( code, global_dict, local_dict) # take local objects in preference return expr
[docs]def parse_expr(s, local_dict=None, transformations=standard_transformations, global_dict=None): """Converts the string ``s`` to a SymPy expression, in ``local_dict`` Parameters ========== s : str The string to parse. local_dict : dict, optional A dictionary of local variables to use when parsing. global_dict : dict, optional A dictionary of global variables. By default, this is initialized with ``from sympy import *``; provide this parameter to override this behavior (for instance, to parse ``"Q & S"``). transformations : tuple, optional A tuple of transformation functions used to modify the tokens of the parsed expression before evaluation. The default transformations convert numeric literals into their SymPy equivalents, convert undefined variables into SymPy symbols, and allow the use of standard mathematical factorial notation (e.g. ``x!``). Examples ======== >>> from sympy.parsing.sympy_parser import parse_expr >>> parse_expr("1/2") 1/2 >>> type(_) <class 'sympy.core.numbers.Half'> >>> from sympy.parsing.sympy_parser import standard_transformations,\\ ... implicit_multiplication_application >>> transformations = (standard_transformations + ... (implicit_multiplication_application,)) >>> parse_expr("2x", transformations=transformations) 2*x See Also ======== stringify_expr, eval_expr, standard_transformations, implicit_multiplication_application """ if local_dict is None: local_dict = {} if global_dict is None: global_dict = {} exec 'from sympy import *' in global_dict code = stringify_expr(s, local_dict, global_dict, transformations) return eval_expr(code, local_dict, global_dict)