numpycuda_parsing.py


"""
module containing all the pyparsing stuff


"""

import numpy as np

from pyparsing import *

import pycuda.tools
dtype_to_ctype = {k:v for k,v in pycuda.compyte.dtypes.DTYPE_TO_NAME.iteritems() if isinstance(k, str)}


#some general grammar definitions

dtype_term = oneOf(' '.join(dtype_to_ctype.keys())).setResultsName('dtype')


identifier = Word(alphas+'_', alphanums+'_').setResultsName('identifier')
dummy = Word(alphas.lower(),exact=1)


def sign_wrap(expr): return Combine(Optional(Literal('-')) + expr)
positive_integer = Word(nums)
integer = sign_wrap(positive_integer)

positive_floating = Combine( Optional( positive_integer) + '.' + Optional(positive_integer))

floating = sign_wrap(positive_floating)

number = Or([integer, floating])

colon = Literal(':')

dimension = Or([positive_integer, colon, dummy])
shape_expression  = nestedExpr('[',']',     delimitedList(dimension) ).setResultsName('shape')
input_argument = dtype_term + Optional(shape_expression).setResultsName('shape') + identifier

default_value = Suppress( Literal('=')) + number.setResultsName('default')
dimension = Or([positive_integer, dummy])
shape_expression  = nestedExpr('[',']',     delimitedList(dimension) ).setResultsName('shape')
output_argument = input_argument + Optional(default_value)

def argument_list(argument): return nestedExpr(content= delimitedList(Group( argument)))
decl_grammar = argument_list(output_argument).setResultsName('outputs') +'<<' + shape_expression + '<<' + argument_list(input_argument).setResultsName('inputs') + colon


class ArgumentDeclaration(object):

    """argument declaration"""
    def __init__(self, identifier, dtype, shape = ()):
        self.identifier = identifier
        self.shape = shape
        self.dtype = dtype

    @property
    def ctype(self): return dtype_to_ctype[self.dtype]
    @property
    def is_scalar(self): return self.ndim == 0
    @property
    def is_array(self): return not self.is_scalar
    @property
    def ndim(self): return len(self.shape)

    @property
    def argument_string(self):
        base_argument = '{mutable}{type}{ptr} const {restrict}{identifier}'
        shape_argument = 'unsigned const {identifier}_shape_{dimension}'
        return ', '.join(
                [base_argument.format(mutable='const '*self.immutable, type=self.ctype, ptr='*'*self.is_array, restrict='__restrict__ '*self.is_array, identifier=self.identifier)]
                +
                [shape_argument.format(identifier=self.identifier, dimension=dimension)
                    for dimension, size in enumerate(self.shape) if size == ':']
            )
    @property
    def shape_string(self):
        shape_argument = 'unsigned const {identifier}_shape_{dimension} = {size};'
        constant_shape_arguments = [
            shape_argument.format(identifier=self.identifier, dimension=dimension, size = size if isinstance(size, np.uint32) else 'dummy_{}'.format(size))
                for dimension, size in enumerate(self.shape) if not size == ':']
        return '\n'.join(constant_shape_arguments)
    @property
    def stride_string(self):
        def terms():
            stride_template  = '{identifier}_stride_{dimension}'
            shape_template = '{identifier}_shape_{dimension}'
            prev = stride_template.format(identifier=self.identifier, dimension=len(self.shape)-1)
            yield 'unsigned const {identifier} = {stride};'.format(identifier = prev ,stride = 1)
            for i, size in reversed(list(enumerate( self.shape[:-1]))):
                this = stride_template.format(identifier=self.identifier, dimension=i)
                size = shape_template.format(identifier=self.identifier, dimension=i+1)
                yield 'unsigned const {this} = {prev} * {size};'.format(this=this, prev=prev ,size = size)
                prev = this
            #add total element size as well, for good measure, even though not used anywhere atm
            size = shape_template.format(identifier=self.identifier, dimension=0)
            yield 'unsigned const {identifier}_size = {prev} * {size};'.format(identifier=self.identifier, prev=prev ,size=size)
        return '\n'.join(term for term in terms())


class InputDeclaration(ArgumentDeclaration):
    immutable = True
class OutputDeclaration(ArgumentDeclaration):
    immutable = False

    def __init__(self, identifier, dtype, default = None, shape = None):
        self.identifier = identifier
        self.dtype = dtype
        self.shape = (np.uint32(1),) if not shape else shape   #scalar outputs are upcast to singleton arrays
        self.default = getattr(np, dtype)( default) if default else default


from collections import OrderedDict
class KernelDeclaration(object):
    """
    holds all info defining a kernel declaration
    plus data structures to facilitate runtime argument parsing
    at runtime, create dummy dict
    as we scan argument (value, decl) pairs, we check dummy[decl.shape[i]]==value.shape[i]
    if not set, set it
    this gives dummy:size dict we can add to kwargs
    when we concat all colons and dummies, we should have sufficient arguments

    build list of expected arguments?
    """
    def __init__(self, decl):
        #store pyparsing result in a format conductive to further processing
        self.shape =  decl.shape
        self.inputs  = [InputDeclaration(**dict(arg)) for arg in decl.inputs[0]]
        self.outputs = [OutputDeclaration(**dict(arg)) for arg in decl.outputs[0]]

        self.dummies = set()
        def shape_scrubbing(terms):
            """postprocess shape objects"""
            for term in terms:
                try:
                    shape = term.shape[0]        #take root of nestedexpr
                    newshape = []
                    for size in shape:
                        try:
                            #dimension known at compile time
                            size = np.uint32(size)
                        except:
                            #runtime specified dimension
                            if size == ':':
                                pass
                            else:
                                self.dummies.add(size)
                        newshape.append(size)

                    term.shape = tuple(newshape)
                except:
                    pass
        shape_scrubbing([self])
        shape_scrubbing(self.inputs)
        shape_scrubbing(self.outputs)

        self.arguments = OrderedDict()
        for arg in self.inputs:
            self.arguments[arg.identifier] = arg
        for arg in self.outputs:        #overwrite in/out params with their output version; that is as planned, but need a consistency check here
            self.arguments[arg.identifier] = arg


    @property
    def identifiers(self):
        return self.arguments.keys()

    @property
    def dummy_string(self):
        return ', '.join('unsigned const dummy_{}'.format(dummy) for dummy in self.dummies)
    @property
    def argument_string(self):
        args = [arg.argument_string for arg in self.arguments.itervalues()]
        dummy = self.dummy_string
        return ',\n'.join(args + [dummy])
    @property
    def shape_string(self):
        shape_argument = 'unsigned const kernel_shape_{dimension} = {size};'
        shape_arguments = [
            shape_argument.format(dimension=dimension, size = size if isinstance(size, np.uint32) else 'dummy_{}'.format(size))
                for dimension, size in enumerate(self.shape) if not size == ':'] #either from constant or dummy
        return '\n'.join(shape_arguments)
    @property
    def init_string(self):
        kernel =  self.shape_string
        shapes =  '\n'.join([arg.shape_string  for arg in self.arguments.itervalues() if arg.is_array])
        strides = '\n'.join([arg.stride_string for arg in self.arguments.itervalues() if arg.is_array])
        return '\n\n'.join([kernel, shapes, strides])

def parsing(source):
    """split source into declaration and body"""
    grammar = Group(decl_grammar       ).setResultsName('decl') + \
              Word(printables+' \t\r\n').setResultsName('body')

    r = grammar.parseString(source)

    decl = KernelDeclaration(r.decl)

    body = r.body

    #proceed to transform the body according to the declaration
    body = replace_typing(body)
    body = replace_shape_syntax( body, decl )
    body = replace_array_syntax( body, decl )
    body = replace_for_syntax(body, decl)

    return decl, body


def replace_typing(source):
    """
    replace numpy types with c-types. this could be more efficient and intelligent...
    we do not do any semantic analysis here; simple find and replace
    but that should suffice, no?
    """
    type_grammar = dtype_term.copy()
    type_grammar.setParseAction(lambda s,l,t: dtype_to_ctype[t[0]])
    return type_grammar.transformString(source)


def replace_shape_syntax(source, decl):
    """
    replace arrayidentifier.shape[ndim] syntax with C named variables
    silently fails to replace some wrong syntax, like misspelled shape;
    dont worry, the cuda compiler is sure to complain about it :)
    would it be sufficient and currect to catch all instances of 'arrayidentifier.'+whatever,
    that fail to match the whole syntax?
    """
    arrayidentifier = (Word(alphanums+'_')).setResultsName('identifier') # + Optional( Word(alphanums))
    positive_integer = Word(nums)
    shape_expr = arrayidentifier + Suppress( Literal('.shape')) + nestedExpr('[',']', positive_integer).setResultsName('dimension')

    def replace(s,l,t):
        """if match is correct, replace numpy syntax with c-compatible syntax"""
        identifier = t.identifier
        dimensions = t.dimension[0]
        if not len(dimensions)==1: raise Exception('only simple shape indexing allows')
        dimension = dimensions[0]
        try:
            arg = decl.arguments[identifier]
        except KeyError:
            raise ParseFatalException("array '{identifier}' is not defined".format(identifier=identifier))
        try:
            size = arg.shape[int(dimension)]
        except Exception:
            raise ParseFatalException('{identifier}.shape[{dimension}] is invalid'.format(identifier=identifier, dimension=dimension))

        return '{identifier}_shape_{dimension}'.format(identifier=identifier, dimension=dimension)
    shape_expr.setParseAction(replace)

    return shape_expr.transformString(source)


def replace_array_syntax(source, decl):
    """
    replace weave.blitz style array indexing with inner product over strides
    we could optionally insert bounds checking code here as well, as a debugging aid
    should we allow for partial indexing? not sure; disallowed atm
    """
    arrayidentifier = oneOf(' '.join(decl.identifiers)).setResultsName('identifier')
    index = Or([identifier, positive_integer])
    index_expr = arrayidentifier + nestedExpr('(',')', delimitedList( index)).setResultsName('indices')

    def replace(s,l,t):
        """if match is correct, replace numpy syntax with c-compatible syntax"""
        identifier = t.identifier
        indices = t.indices[0]

        try:
            arg = decl.arguments[identifier]
        except KeyError:
            raise ParseFatalException("array '{identifier}' is not defined".format(identifier=identifier))

        if not len(indices)==arg.ndim:
            raise Exception("indexing '{identifier}' requires {ndim} arguments".format(identifier=identifier, ndim=arg.ndim))


        offset = '+'.join(
            '{identifier}_stride_{i}*{idx}'.format(identifier=identifier, i=i, idx=idx)
                for i,idx in enumerate(indices))
        return '{identifier}[{offset}]'.format(identifier=identifier, offset=offset)
    index_expr.setParseAction(replace)

    return index_expr.transformString(source)

def replace_for_syntax(source, arg_info):
    """
    replace: 'for (id in start:stop:step)'
    with:    'for (int id=start; start<end; id+=step)'
    rather trivial syntactic sugar indeed
    we could implement an unrolling mechanism here too,
    in case all params are known at compile time, and loop is small?
    """

    index = Or([sign_wrap(identifier), integer])
    colon = Suppress(Literal(':'))
    range = index.setResultsName('start') + colon + index.setResultsName('stop') + Optional(Combine(colon + index), '1').setResultsName('step')
    loop_expr = Literal('for') + '(' + identifier.setResultsName('index') + Literal('in') + range + ')'

    def replace(s,l,t):
        return 'for (int {index}={start}; {index} < {stop}; {index}+={step})'.format(**dict(t))
    loop_expr.setParseAction(replace)

    return loop_expr.transformString(source)


def replace_output_reference_syntax(source):
    """
    scalar output args are passed by reference; they are actually shape=(1,) arrays
    we might need them by reference for atomic operations
    perhaps just add a & in those cases; treat it as a value type by placing a * everywhere
    in the body of code. does cuda C handle &*ptr==ptr correctly?
    """