cellularmitosis/README.md

## README.md

      
    Raw
  

              README.md
            
          
    Blog 2019/10/24
<- previous |
index |
next ->
An alternative syntax for C, part 14: revisiting qualifiers and storage

<- part 13
In this installment:

there is a finer-grained distinction between function, variable qualifiers vs. storage classes,

e.g. volatile, restrict vs. extern, static


I reverted the function storage declaration syntax

i.e. extern<func> foo(): is now extern func foo():


test27.cy has been re-worked to produce C code which actually compiles 😜

Thanks to the folks at /r/programmingLanguages
for the feedback!
Possible future work?:


support for enum, union
support for typedef
fundecls returning a function pointer are currently broken
relax the parser (unindented blank lines currently cause DEDENT, allow extraneous whitespace, etc)
performance: don't make slices of the list of tokens
reimplement in a higher-level lang (clojure, haskell)
reimplement in a lower-level lang (rust, or self-host in Cy)
leverage this into implementing a toy compiler?  (emit assembly)

TL;DR: What is Cy?

Cy is an alternative syntax for C which features indentation-based scoping and intuitive type declarations.
func main(argc: int, argv: pointer<pointer<char>>) -> int:
    printf("Hello, world!")
    return 0

But why tho?

To teach myself how to write a transpiler! 🤩🤩🤩

  
## cy.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

# cy.py: a traspiler for an alternate syntax for C.
# Note: this is an incomplete work-in-progress.


# Data structures:

# A token is a tuple.
# First element is a string 'token'.
# Second element is a string which is the token type (e.g. 'OBRACKET').
# Third element is a string which is the source text of the token (e.g. '[').
# e.g. ('token', 'OBRACKET', '[')
# e.g. ('token', 'IDENTIFIER', 'foo')
# e.g. ('token', 'S', '   ')


# An AST node is a tuple.
# First element is a string 'ast'.
# Second element is a string which is the AST node type (e.g. 'vardecl').
# Third element is an array which are the child tuples (AST nodes or tokens).
# e.g. ('ast', 'type', [('token', 'IDENTIFIER', 'int')])


# The following EBNF notation is:
# - lowercase are AST nodes
# - ALLCAPS are tokens (terminals)
# - Concatenation: rule1 = rule2 rule3 rule4
# - Alternation:   rule1 = rule2 | rule3 | rule4
# - Repetition:    rule1 = rule2 { rule3 }
# - Grouping:      rule1 = (rule2 rule3) | rule4

# Grammar:
#
#     program = statement { statement }
#
#   statement = ( linestmt NL ) | blockstmt
#    linestmt = declassign | assign | vardecl | rvalue | return | BREAK | CONTINUE | INCLUDE | DEFINE
#   blockstmt = fundecl | if | while | for | struct | scope | comment | blankline
#     comment = [S] ( LCOMMENT | BCOMMENT ) NL
#   blankline = [S] NL
#
#  declassign = vardecl S EQ S rvalue
#      assign = rvalue S aoperator S rvalue
#   aoperator = PLUSEQ | MINUSEQ | STAREQ | SLASHEQ | PERCENTEQ
#             | AMPEQ | PIPEEQ | CARATEQ | LTLTEQ | GTGTEQ
#             | EQ
#     vardecl = [storage S] IDENTIFIER COLON S type
#
#     storage = [ AUTO | REGISTER | STATIC | EXTERN ] [THREADLOCAL]
#
#     fundecl = [storage S] {fspecifier S} FUNC S IDENTIFIER fundeclargs [fundeclret] (scope | NL)
#  fspecifier = INLINE | NORETURN
# fundeclargs = OPAREN [ vardecl { COMMA S vardecl } ] CPAREN
#  fundeclret = S ARROW S type
#
#      return = RETURN [ S rvalue ]
#
#          if = IF S rvalue scope { elif } [else]
#        elif = ELIF S rvalue scope
#        else = ELSE scope
#       while = WHILE S rvalue scope
#         for = FOR S [linestmt] SEMICOLON S rvalue SEMICOLON S linestmt { COMMA S linestmt } scope
#       scope = COLON NL INDENT statement { statement } DEDENT
#      struct = STRUCT S IDENFITIER structscope
# structscope = COLON NL INDENT vardecl NL { vardecl NL } DEDENT
#
#      rvalue = ( ( ternary | binary | unary | IDENTIFIER ) { dotted | arrowed | indexed | called } )
#             | type | cast | INTLIT | FLOATLIT | STRINGLIT | CHARLIT | BOOLLIT
#        cast = CAST called
#
#       unary = uoperator OPAREN rvalue CPAREN
#   uoperator = MINUS | AMP | STAR | BANG | TILDE
#      binary = OPAREN rvalue S boperator S rvalue CPAREN
#   boperator = LTLT | GTGT | LTEQ | GTEQ | EQEQ | BANGEQ | AMPAMP | BARBAR
#             | PLUS | MINUS | STAR | SLASH | PERCENT | LT | GT
#             | BANG | AMP | BAR | TILDE | CARAT
#     ternary | OPAREN rvalue QMARK S rvalue S COLON S rvalue CPAREN
#
#      dotted = PERIOD IDENTIFIER
#     arrowed = ARROW IDENTIFIER
#     indexed = OBRACKET rvalue CBRACKET
#      called = OPAREN [ rvalue { COMMA S rvalue } ] CPAREN
#
#        type = ARRAY OBRACKET rvalue CBRACKET LT type GT
#             | ( ARRAY | POINTER | FUNCTION | tqualifier ) LT type GT
#             | IDENTIFIER { S IDENTIFIER }
#  tqualifier = CONST | VOLATILE | RESTRICT | ATOMIC
#
#          ws = (S | NL) { (S | NL) }


# Token data structure utils:

def is_token(token):
    """Returns whether the arg is a token."""
    return isinstance(token, tuple) and len(token) > 0 and token[0] == 'token'


def token_type(token):
    """Returns the type of the token (e.g. 'IDENTIFIER')."""
    assert is_token(token)
    return token[1]


def token_text(token):
    """Returns the source text of the token (e.g. 'foo' for a 'IDENTIFIER' token)."""
    assert is_token(token)
    return token[2]


def is_toktype(token, toktype):
    """
    Returns whether the token is of the given type.
    _toktype_ is e.g. 'IDENTIFIER', 'COLON', etc.
    """
    return is_token(token) and token_type(token) == toktype


# Tokenizer implementation:

def tokenize(tokendefs, keyword_map, input, indent=0):
    """Uses tokendefs to tokenize the 'input' string and return a list of tokens"""
    tokens = []
    offset = 0
    previous = None
    while offset < len(input):
        for (token_name, regex) in tokendefs:
            m = regex.match(input, offset)
            if m is not None:
                matched_text = m.group(0)
                offset += len(matched_text)
                token = ('token', token_name, matched_text)

                # special-case: distinguish keywords from identifiers
                token = keyword_specialcases(token, keyword_map)

                # special-case: implement "offside-rule"
                (tokens2, indent) = offside_rule(token, previous, indent)

                tokens += tokens2
                previous = token
                break
        else:
            raise Exception("Couldn't tokenize starting at '%s...'" % input[offset:offset+16])

    # special-case: add any needed implicit trailing DEDENT tokens
    while indent > 0:
        tokens.append(('token', 'DEDENT', ''))
        indent -= 1

    return tokens


INDENT_UNIT=4
def offside_rule(token, previous, indent_level):
    """
    Implement the "offside rule" indentation-based scoping mechanism.
    See https://en.wikipedia.org/wiki/Off-side_rule

    Args:
    - token: the current token
    - previous: the previous token
    - indent_level: the indent level before encounting this token

    Returns a list of replacement tokens and the new indentation level.

    If the previous token was a NL and the indentation level has changed,
    inject INDENT or DEDENT tokens as needed.
    Otherwise, return the token unmodified.

    Big-picture example: the token stream
      COLON NL S RETURN NL
    should become
      COLON NL INDENT RETURN NL DEDENT
    """

    def get_indent_level(text):
        """
        Determine the indentation level of the text.
        TODO: this implementation is pretty naive.
        """
        indentation = text.split('\n')[-1]
        assert len(indentation) % INDENT_UNIT == 0
        return len(indentation) / INDENT_UNIT

    assert is_token(token)

    if previous is None:
        return ([token], indent_level)
    assert is_token(previous)

    if not is_toktype(previous, 'NL'):
        return ([token], indent_level)

    tokens = []
    if is_toktype(token, 'S'):
        text = token_text(token)
        new_level = get_indent_level(text)
    else:
        new_level = 0

    for _ in range(new_level - indent_level):
        tokens.append(('token', 'INDENT', '    '))
    for _ in range(indent_level - new_level):
        tokens.append(('token', 'DEDENT', ''))

    if not is_toktype(token, 'S'):
        tokens.append(token)

    return (tokens, new_level)


def keyword_specialcases(token, keyword_map):
    """
    Add a special-case which turns IDENTIFIER tokens into keyword tokens.

    e.g.:
    - 'array' changes from an IDENTIFIER token to an ARRAY token.
    - 'true' changes from an IDENTIFIER token to a BOOLLIT token.
    """
    if is_toktype(token, 'IDENTIFIER'):
        text = token_text(token)
        if text in keyword_map.keys():
            toktype = keyword_map[text]
            return ('token', toktype, text)
    return token


def load_tokendefs(fpath):
    """
    Load the token definitions from the file at fpath.

    Returns a list of tuples of the form (<token type>, <compiled regex>).

    The format of the tokendefs file should be pairs of lines:
    - a line which is the token type (e.g. 'IDENTIFIER', 'OBRACKET', etc),
    - followed by a line which is the regex which recognizes that token.

    Example tokendefs.txt:
    IDENTIFIER
    [a-zA-Z][a-zA-Z0-9]*
    OPAREN
    \(
    CPAREN
    )
    """
    import re
    tokendefs = []
    with open(fpath) as f:
        for line in f:
            token_name = line.rstrip('\n')
            pattern = f.next().rstrip('\n')
            regex = re.compile(pattern)
            tokendef = (token_name, regex)
            tokendefs.append(tokendef)
    return tokendefs


def load_keywords(fpath):
    """
    Load the keyword definitions from the file at the path.

    Returns a dict which maps keywords to toktypes.

    The format of the keywords file should be pairs of lines:
    - a keywork (e.g. 'true'),
    - followed by a line which is the toktype of that keyword (e.g. 'BOOLLIT').

    Example keywords.txt file:
    true
    BOOLLIT
    false
    BOOLLIT
    array
    ARRAY
    return
    RETURN
    """
    keyword_map = {}
    with open(fpath) as f:
        for line in f:
            keyword = line.rstrip('\n')
            toktype = f.next().rstrip('\n')
            keyword_map[keyword] = toktype
    return keyword_map


# AST node data structure utils:

def is_ast(ast):
    """Returns whether the arg is an AST node."""
    return isinstance(ast, tuple) and len(ast) > 0 and ast[0] == 'ast'


def ast_type(ast):
    """Returns the type of the AST (e.g. 'vardecl', 'statement', etc.)"""
    assert is_ast(ast)
    return ast[1]


def is_asttype(ast, atype):
    """
    Returns whether the AST node is of the given type.
    atype is e.g. 'vardecl', 'statement', etc.
    """
    return is_ast(ast) and ast_type(ast) == atype


def ast_children(ast):
    """Return the child nodes of the given AST."""
    assert is_ast(ast)
    return ast[2]


# Parser helpers:
# See https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form#Table_of_symbols

def alternation(tokens, funcs):
    """Try each of the parsing functions until one succeeds."""
    failure = (None, tokens)
    for f in funcs:
        (ast, tokens) = f(tokens)
        if ast is not None:
            return (ast, tokens)
    return failure


def concatenation(tokens, funcs):
    """
    All of the parsing functions must succeed as a whole.
    Returns (<array of AST's>, <remaining tokens>).
    """
    failure = (None, tokens)
    asts = []
    for f in funcs:
        (ast, tokens) = f(tokens)
        if ast is None:
            return failure
        asts.append(ast)
    return (asts, tokens)


# Parsing functions:

# Note: all parse_x functions share a similar format:
# - Their first argument is the list of tokens.
# - They return a tuple of (<parsed result>, <remaining tokens>).
#   - <parsed result> is either an AST node or a token (a terminal).
#   - <remaining tokens> is the list of unconsumed tokens.
# - If the parse fails, (None, <tokens>) is returned, where <tokens>
#   is the list of tokens which was passed in.

def parse(tokens):
    """Attempts to parse the tokens into an AST."""
    (ast, tokens) = parse_program(tokens)
    if ast is None:
        raise Exception("Couldn't parse starting at %s" % tokens[:8])
    if len(tokens) > 0:
        raise Exception("Leftover tokens: %s" % tokens)
    return ast


def parse_program(tokens):
    """
    Attempts to parse a program.

    Grammar:
    program = statement { statement }
    """
    failure = (None, tokens)
    statement_asts = []

    (ast, tokens) = parse_statement(tokens)
    if ast is None:
        return failure
    statement_asts.append(ast)

    while True:
        (ast, tokens) = parse_statement(tokens)
        if ast is None:
            break
        statement_asts.append(ast)

    ast = ('ast', 'program', statement_asts)
    return (ast, tokens)


def parse_statement(tokens):
    """
    Attempts to parse a statement.

    Grammar:
    statement = ( linestmt NL ) | blockstmt
    """
    failure = (None, tokens)

    (asts, tokens) = concatenation(tokens, [
        parse_linestmt,
        parse_NL
    ])
    if asts is not None:
        line_ast = asts[0]
        ast = ('ast', 'statement', [line_ast])
        return (ast, tokens)

    (block_ast, tokens) = parse_blockstmt(tokens)
    if block_ast is not None:
        ast = ('ast', 'statement', [block_ast])
        return (ast, tokens)

    return failure


def parse_linestmt(tokens):
    """
    Attempts to parse a "line" statement.

    Grammar:
    linestmt = declassign | assign | vardecl | rvalue | return | BREAK | CONTINUE | INCLUDE | DEFINE
    """
    failure = (None, tokens)
    (ast, tokens) = alternation(tokens, [
        parse_declassign,
        parse_assign,
        parse_vardecl,
        parse_rvalue,
        parse_return,
        parse_BREAK,
        parse_CONTINUE,
        parse_INCLUDE,
        parse_DEFINE
    ])
    if ast is None:
        return failure
    line_ast = ('ast', 'linestmt', [ast])
    return (line_ast, tokens)


def parse_blockstmt(tokens):
    """
    Attempts to parse a "block" statement.

    Grammar:
    blockstmt = fundecl | if | while | for | struct | scope | comment | blankline
    """
    failure = (None, tokens)
    (ast, tokens) = alternation(tokens, [
        parse_fundecl,
        parse_if,
        parse_while,
        parse_for,
        parse_struct,
        parse_scope,
        parse_comment,
        parse_blankline
    ])
    if ast is None:
        return failure
    block_ast = ('ast', 'blockstmt', [ast])
    return (block_ast, tokens)


def parse_comment(tokens):
    """
    Attempts to parse a (line or block-level) comment.

    Grammar:
    comment = [S] ( LCOMMENT | BCOMMENT ) NL
    """
    failure = (None, tokens)
    (ast, tokens) = parse_S(tokens)
    (token, tokens) = alternation(tokens, [
        parse_LCOMMENT,
        parse_BCOMMENT
    ])
    if token is None:
        return failure
    ast = ('ast', 'comment', [token])
    (nl_ast, tokens) = parse_NL(tokens)
    if nl_ast is None:
        return failure
    return (ast, tokens)


def parse_blankline(tokens):
    """
    Attempts to parse a blank line.

    Grammar:
    blankline = [S] NL
    """
    failure = (None, tokens)
    (_, tokens) = parse_S(tokens)
    (ast, tokens) = parse_NL(tokens)
    if ast is None:
        return failure
    ast = ('ast', 'blankline', [])
    return (ast, tokens)


def parse_declassign(tokens):
    """
    Attempts to parse a declaration-assignment statement.

    Grammar:
    declassign = vardecl S EQ S rvalue
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_vardecl,
        parse_S,
        parse_EQ,
        parse_S,
        parse_rvalue
    ])
    if asts is None:
        return failure
    vardecl_ast = asts[0]
    expr_ast = asts[4]
    ast = ('ast', 'declassign', [vardecl_ast, expr_ast])
    return (ast, tokens)


def parse_assign(tokens):
    """
    Attempts to parse an assignment statement.

    Grammar:
    assign = rvalue S aoperator S rvalue
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_rvalue,
        parse_S,
        parse_aoperator,
        parse_S,
        parse_rvalue
    ])
    if asts is None:
        return failure
    ast = ('ast', 'assign', [asts[0], asts[2], asts[4]])
    return (ast, tokens)


def parse_aoperator(tokens):
    """
    Attempts to parse an assignment operator.

    Grammar:
    aoperator = PLUSEQ | MINUSEQ | STAREQ | SLASHEQ | PERCENTEQ
              | AMPEQ | PIPEEQ | CARATEQ | LTLTEQ | GTGTEQ
              | EQ
    """
    failure = (None, tokens)
    (token, tokens) = alternation(tokens, [
        parse_PLUSEQ,
        parse_MINUSEQ,
        parse_STAREQ,
        parse_SLASHEQ,
        parse_PERCENTEQ,
        parse_AMPEQ,
        parse_PIPEEQ,
        parse_CARATEQ,
        parse_LTLTEQ,
        parse_GTGTEQ,
        parse_EQ
    ])
    if token is None:
        return failure
    ast = ('ast', 'aoperator', [token])
    return (ast, tokens)


def parse_vardecl(tokens):
    """
    Attempts to parse a vardecl AST node.

    Grammar:
    vardecl = [storage S] IDENTIFIER COLON S type
    """
    failure = (None, tokens)

    (asts, tokens) = concatenation(tokens, [
        parse_storage,
        parse_S
    ])
    if asts is not None:
        storage_ast = asts[0]
    else:
        storage_ast = None

    (asts, tokens) = concatenation(tokens, [
        parse_IDENTIFIER,
        parse_COLON,
        parse_S,
        parse_type
    ])
    if asts is None:
        return failure
    identifier_token = asts[0]
    type_ast = asts[3]

    if storage_ast is None:
        subnodes = [identifier_token, type_ast]
    else:
        subnodes = [storage_ast, identifier_token, type_ast]

    vardecl_ast = ('ast', 'vardecl', subnodes)
    return (vardecl_ast, tokens)


def parse_storage(tokens):
    """
    Attempts to parse a declaration storage type.

    Grammar:
    storage = [ AUTO | REGISTER | STATIC | EXTERN ] [THREADLOCAL]
    """
    failure = (None, tokens)
    asts = []
    (ast, tokens) = alternation(tokens, [
        parse_AUTO,
        parse_REGISTER,
        parse_STATIC,
        parse_EXTERN
    ])
    if ast is not None:
        asts.append(ast)
    (ast, tokens) = parse_THREADLOCAL(tokens)
    if ast is not None:
        asts.append(ast)
    if len(asts) == 0:
        return failure
    else:
        ast = ('ast', 'storage', asts)
        return (ast, tokens)


def parse_return(tokens):
    """
    Attempts to parse a return statement.

    Grammar:
    return = RETURN [ S rvalue ]
    """
    failure = (None, tokens)
    (ast, tokens) = parse_RETURN(tokens)
    if ast is None:
        return failure
    return_ast = ('ast', 'return', [])

    (asts, tokens) = concatenation(tokens, [
        parse_S,
        parse_rvalue
    ])
    if asts is not None:
        expr_ast = asts[1]
        return_ast = ('ast', 'return', [expr_ast])

    return (return_ast, tokens)


# fundecl examples:
# func f():
# func f(a: int):
# func f() -> int:
# func f(a: int) -> int:
# func f(a: int, b: int):
# func f(a: int, b: int) -> int:
def parse_fundecl(tokens):
    """
    Attempts to parse a function declaration.

    Grammar:
    fundecl = [storage S] {fspecifier S} FUNC S IDENTIFIER fundeclargs [fundeclret] (scope | NL)
    """
    failure = (None, tokens)
    subnodes = []
    (asts, tokens) = concatenation(tokens, [parse_storage, parse_S])
    if asts is not None:
        subnodes.append(asts[0])
    while True:
        (asts, tokens) = concatenation(tokens, [parse_fspecifier, parse_S])
        if asts is None:
            break
        else:
            subnodes.append(asts[0])
    (asts, tokens) = concatenation(tokens, [
        parse_FUNC,
        parse_S,
        parse_IDENTIFIER,
        parse_fundeclargs
    ])
    if asts is None:
        return failure
    subnodes.append(asts[2])
    subnodes.append(asts[3])
    (ast, tokens) = parse_fundeclret(tokens)
    if ast is not None:
        subnodes.append(ast)
    (ast, tokens) = alternation(tokens, [parse_scope, parse_NL])
    if ast is None:
        return failure
    if is_asttype(ast, 'scope'):
        subnodes.append(ast)
    ast = ('ast', 'fundecl', subnodes)
    return (ast, tokens)


def parse_fspecifier(tokens):
    """
    Attempts to parse a function specifier.

    Grammar:
    fspecifier = INLINE | NORETURN
    """
    failure = (None, tokens)
    (ast, tokens) = alternation(tokens, [parse_INLINE, parse_NORETURN])
    if ast is None:
        return failure
    else:
        ast = ('ast', 'fspecifier', [ast])
        return (ast, tokens)


def parse_funtype(tokens):
    """
    Attempts to parse the "type" of a function.

    Grammar:
    funtype = FUNC
            | ( STATIC LT FUNC GT ) | ( STATIC LT INLINE LT FUNC GT GT )
            | ( EXTERN LT FUNC GT ) | ( EXTERN LT INLINE LT FUNC GT GT )
            | ( INLINE LT FUNC GT )
    """
    failure = (None, tokens)
    (ast, tokens) = parse_FUNC(tokens)
    if ast is not None:
        ast = ('ast', 'funtype', [])
        return (ast, tokens)
    (asts, tokens) = concatenation(tokens, [parse_STATIC, parse_LT, parse_FUNC, parse_GT])
    if asts is not None:
        ast = ('ast', 'funtype', [asts[0]])
        return (ast, tokens)
    (asts, tokens) = concatenation(tokens, [parse_EXTERN, parse_LT, parse_FUNC, parse_GT])
    if asts is not None:
        ast = ('ast', 'funtype', [asts[0]])
        return (ast, tokens)
    (asts, tokens) = concatenation(tokens, [parse_INLINE, parse_LT, parse_FUNC, parse_GT])
    if asts is not None:
        ast = ('ast', 'funtype', [asts[0]])
        return (ast, tokens)
    (asts, tokens) = concatenation(tokens, [parse_STATIC, parse_LT, parse_INLINE, parse_LT, parse_FUNC, parse_GT, parse_GT])
    if asts is not None:
        ast = ('ast', 'funtype', [asts[0], asts[2]])
        return (ast, tokens)
    (asts, tokens) = concatenation(tokens, [parse_EXTERN, parse_LT, parse_INLINE, parse_LT, parse_FUNC, parse_GT, parse_GT])
    if asts is not None:
        ast = ('ast', 'funtype', [asts[0], asts[2]])
        return (ast, tokens)
    return failure


def parse_fundeclargs(tokens):
    """
    Attempts to parse a list of function declaration arguments.

    Grammar:
    fundeclargs = OPAREN [ vardecl { COMMA S vardecl } ] CPAREN
    """
    failure = (None, tokens)

    def list_of_args(tokens):
        """
        Grammar: vardecl { COMMA S vardecl }
        Returns (<list of args>, <remaining tokens>)
        """
        args = []
        (vardecl_ast, tokens) = parse_vardecl(tokens)
        if vardecl_ast is None:
            return (args, tokens)
        args.append(vardecl_ast)

        while True:
            (asts, tokens) = concatenation(tokens, [
                parse_COMMA,
                parse_S,
                parse_vardecl
            ])
            if asts is None:
                break
            vardecl_ast = asts[2]
            args.append(vardecl_ast)

        return (args, tokens)

    (ast, tokens) = parse_OPAREN(tokens)
    if ast is None:
        return failure

    (args, tokens) = list_of_args(tokens)

    (ast, tokens) = parse_CPAREN(tokens)
    if ast is None:
        return failure

    ast = ('ast', 'fundeclargs', args)
    return (ast, tokens)


def parse_fundeclret(tokens):
    """
    Attempts to parse a function declaration return value.

    Grammar:
    fundeclret = S ARROW S type
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_S,
        parse_ARROW,
        parse_S,
        parse_type
    ])
    if asts is None:
        return failure
    type_ast = asts[3]
    ast = ('ast', 'fundeclret', [type_ast])
    return (ast, tokens)


def parse_if(tokens):
    """
    Attempts to parse an if statement.

    Grammar:
    if = IF S rvalue scope { elif } [else]
    """
    failure = (None, tokens)

    def parse_elif(tokens):
        """
        Attempts to parse an elif statement.

        Grammar:
        elif = ELIF S rvalue scope
        """
        failure = (None, tokens)
        (asts, tokens) = concatenation(tokens, [
            parse_ELIF,
            parse_S,
            parse_rvalue,
            parse_scope
        ])
        if asts is None:
            return failure
        ast = ('ast', 'elif', [asts[2], asts[3]])
        return (ast, tokens)

    def parse_else(tokens):
        """
        Attempts to parse an else statement.

        Grammar:
        else = ELSE scope
        """
        failure = (None, tokens)
        (asts, tokens) = concatenation(tokens, [
            parse_ELSE,
            parse_scope
        ])
        if asts is None:
            return failure
        ast = ('ast', 'else', [asts[1]])
        return (ast, tokens)

    (asts, tokens) = concatenation(tokens, [
        parse_IF,
        parse_S,
        parse_rvalue,
        parse_scope
    ])
    if asts is None:
        return failure
    sub_asts = [asts[2], asts[3]]

    elifs = []
    while True:
        (ast, tokens) = parse_elif(tokens)
        if ast is None:
            break
        elifs.append(ast)
    sub_asts += elifs

    (else_ast, tokens) = parse_else(tokens)
    if else_ast is not None:
        sub_asts.append(else_ast)

    if_ast = ('ast', 'if', sub_asts)
    return (if_ast, tokens)


def parse_while(tokens):
    """
    Attempts to parse a while statement.

    Grammar:
    while = WHILE S rvalue scope
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_WHILE,
        parse_S,
        parse_rvalue,
        parse_scope
    ])
    if asts is None:
        return failure
    expr_ast = asts[2]
    scope_ast = asts[3]
    ast = ('ast', 'while', [expr_ast, scope_ast])
    return (ast, tokens)


def parse_for(tokens):
    """
    Attempts to parse a for statement.

    Grammar:
    for = FOR S [linestmt] SEMICOLON S rvalue SEMICOLON S linestmt { COMMA S linestmt } scope
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_FOR,
        parse_S
    ])
    if asts is None:
        return failure
    (linestmt1_ast, tokens) = parse_linestmt(tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_SEMICOLON,
        parse_S,
        parse_rvalue,
        parse_SEMICOLON,
        parse_S,
        parse_linestmt
    ])
    if asts is None:
        return failure
    expr2_ast = asts[2]
    linestmt3_asts = [asts[5]]
    while True:
        (asts, tokens) = concatenation(tokens, [
            parse_COMMA,
            parse_S,
            parse_linestmt
        ])
        if asts is None:
            break
        linestmt3_asts.append(asts[2])
    (scope_ast, tokens) = parse_scope(tokens)
    if scope_ast is None:
        return failure
    ast = ('ast', 'for', [linestmt1_ast, expr2_ast] + linestmt3_asts + [scope_ast])
    return (ast, tokens)


def parse_scope(tokens):
    """
    Attempts to parse a scoped sequence of statements.

    Grammar:
    scope = COLON NL INDENT statement { statement } DEDENT
    """
    failure = (None, tokens)
    statements = []
    (asts, tokens) = concatenation(tokens, [
        parse_COLON,
        parse_NL,
        parse_INDENT,
        parse_statement
    ])
    if asts is None:
        return failure
    statements.append(asts[3])

    while True:
        (ast, tokens) = parse_statement(tokens)
        if ast is None:
            break
        statements.append(ast)

    (ast, tokens) = parse_DEDENT(tokens)
    if ast is None:
        return failure

    ast = ('ast', 'scope', statements)
    return (ast, tokens)


def parse_struct(tokens):
    """
    Attempts to parse a struct.

    Grammar:
    struct = STRUCT S IDENFITIER structscope
    """
    failure = (None, tokens)

    def parse_structscope(tokens):
        """
        Grammar:
        structscope = COLON NL INDENT vardecl NL { vardecl NL } DEDENT
        """
        failure = (None, tokens)
        (asts, tokens) = concatenation(tokens, [
            parse_COLON,
            parse_NL,
            parse_INDENT,
            parse_vardecl,
            parse_NL
        ])
        if asts is None:
            return failure
        vardecls = [asts[3]]
        while True:
            (asts, tokens) = concatenation(tokens, [
                parse_vardecl,
                parse_NL
            ])
            if asts is None:
                break
            vardecls.append(asts[0])
        (dedent, tokens) = parse_DEDENT(tokens)
        if dedent is None:
            return failure
        ast = ('ast', 'structscope', vardecls)
        return (ast, tokens)

    (asts, tokens) = concatenation(tokens, [
        parse_STRUCT,
        parse_S,
        parse_IDENTIFIER,
        parse_structscope
    ])
    if asts is None:
        return failure
    identifier_token = asts[2]
    scope_ast = asts[3]
    ast = ('ast', 'struct', [identifier_token, scope_ast])
    return (ast, tokens)


def parse_rvalue(tokens):
    """
    Attempts to parse an rvalue (the right-hand side of an assignment statement).
    I.e. an expression which result in a value.

    Grammar:
    rvalue = ( ( ternary | binary | unary | IDENTIFIER ) { dotted | arrowed | indexed | called } )
           | type | cast | INTLIT | FLOATLIT | STRINGLIT | CHARLIT | BOOLLIT
    """
    failure = (None, tokens)

    def parse_rvalue1(tokens):
        """
        Grammar fragment:
        ( ternary | binary | unary | IDENTIFIER ) { dotted | arrowed | indexed | called }
        """
        failure = (None, tokens)
        (ast1, tokens) = alternation(tokens, [
            parse_ternary,
            parse_binary,
            parse_unary,
            parse_IDENTIFIER
        ])
        if ast1 is None:
            return failure
        asts = []
        while True:
            (ast, tokens) = alternation(tokens, [
                parse_dotted,
                parse_arrowed,
                parse_indexed,
                parse_called,
            ])
            if ast is None:
                break
            asts.append(ast)
        ast = ('ast', 'rvalue', [ast1] + asts)
        return (ast, tokens)

    (ast, tokens) = alternation(tokens, [
        parse_rvalue1,
        parse_type,
        parse_cast,
        parse_INTLIT,
        parse_FLOATLIT,
        parse_STRINGLIT,
        parse_CHARLIT,
        parse_BOOLLIT
    ])
    if ast is None:
        return failure
    if is_asttype(ast, 'rvalue'):
        return (ast, tokens)
    else:
        return (('ast', 'rvalue', [ast]), tokens)


def parse_cast(tokens):
    """
    Attempts to parse a cast statement.

    Grammar:
    cast = CAST called
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_CAST,
        parse_called
    ])
    if asts is None:
        return failure
    assert len(ast_children(asts[1])) == 2
    var_ast = ast_children(asts[1])[0]
    type_ast = ast_children(asts[1])[1]
    ast = ('ast', 'cast', [var_ast, type_ast])
    return (ast, tokens)


def parse_dotted(tokens):
    """
    Attempt to parse a member access (dot) expression.

    Grammar:
    dotted = PERIOD IDENTIFIER
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_PERIOD,
        parse_IDENTIFIER
    ])
    if asts is None:
        return failure
    ast = ('ast', 'dotted', [asts[1]])
    return (ast, tokens)


def parse_arrowed(tokens):
    """
    Attempt to parse a pointer access (arrow) expression.

    Grammar:
    arrowed = ARROW IDENTIFIER
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_ARROW,
        parse_IDENTIFIER
    ])
    if asts is None:
        return failure
    ast = ('ast', 'arrowed', [asts[1]])
    return (ast, tokens)


def parse_indexed(tokens):
    """
    Attempt to parse an array access (bracket) expression.

    Grammar:
    indexed = OBRACKET rvalue CBRACKET
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_OBRACKET,
        parse_rvalue,
        parse_CBRACKET
    ])
    if asts is None:
        return failure
    ast = ('ast', 'indexed', [asts[1]])
    return (ast, tokens)


def parse_called(tokens):
    """
    Grammar:
    OPAREN [ rvalue { COMMA S rvalue } ] CPAREN
    """
    failure = (None, tokens)

    def list_of_exprs(tokens):
        """
        Grammar: rvalue { COMMA S rvalue }
        Returns (<list of exprs>, <remaining tokens>)
        """
        exprs = []
        (expr_ast, tokens) = parse_rvalue(tokens)
        if expr_ast is None:
            return (exprs, tokens)
        exprs.append(expr_ast)

        while True:
            (asts, tokens) = concatenation(tokens, [
                parse_COMMA,
                parse_S,
                parse_rvalue
            ])
            if asts is None:
                break
            expr_ast = asts[2]
            exprs.append(expr_ast)

        return (exprs, tokens)

    (ast, tokens) = parse_OPAREN(tokens)
    if ast is None:
        return failure
    (exprs, tokens) = list_of_exprs(tokens)
    (ast, tokens) = parse_CPAREN(tokens)
    if ast is None:
        return failure
    ast = ('ast', 'called', exprs)
    return (ast, tokens)


def parse_ternary(tokens):
    """
    Attempts to parse a ternary statement.

    Grammar:
    ternary | OPAREN rvalue S QMARK S rvalue S COLON S rvalue CPAREN
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_OPAREN,
        parse_rvalue,
        parse_S,
        parse_QMARK,
        parse_S,
        parse_rvalue,
        parse_S,
        parse_COLON,
        parse_S,
        parse_rvalue,
        parse_CPAREN
    ])
    if asts is None:
        return failure
    ast = ('ast', 'ternary', [asts[1], asts[5], asts[9]])
    return (ast, tokens)


def parse_binary(tokens):
    """
    Attempts to parse a binary expression.

    Grammar:
    binary = OPAREN rvalue S boperator S expr CPAREN
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_OPAREN,
        parse_rvalue,
        parse_S,
        parse_boperator,
        parse_S,
        parse_rvalue,
        parse_CPAREN
    ])
    if asts is None:
        return failure
    expr1 = asts[1]
    op = asts[3]
    expr2 = asts[5]
    ast = ('ast', 'binary', [expr1, op, expr2])
    return (ast, tokens)


def parse_boperator(tokens):
    """
    Attempts to parse a binary operator.

    Grammar:
    boperator = LTLT | GTGT | LTEQ | GTEQ | EQEQ | BANGEQ | AMPAMP | BARBAR
              | PLUS | MINUS | STAR | SLASH | PERCENT | LT | GT
              | BANG | AMP | BAR | TILDE | CARAT
    """
    failure = (None, tokens)
    (token, tokens) = alternation(tokens, [
        parse_LTLT,
        parse_GTGT,
        parse_LTEQ,
        parse_GTEQ,
        parse_EQEQ,
        parse_BANGEQ,
        parse_AMPAMP,
        parse_BARBAR,
        parse_PLUS,
        parse_MINUS,
        parse_STAR,
        parse_SLASH,
        parse_PERCENT,
        parse_LT,
        parse_GT,
        parse_BANG,
        parse_AMP,
        parse_BAR,
        parse_TILDE,
        parse_CARAT
    ])
    if token is None:
        return failure
    ast = ('ast', 'boperator', [token])
    return (ast, tokens)


def parse_unary(tokens):
    """
    Attempts to parse a unary expression.

    Grammar:
    unary = uoperator OPAREN rvalue CPAREN
    """
    failure = (None, tokens)
    (asts, tokens) = concatenation(tokens, [
        parse_uoperator,
        parse_OPAREN,
        parse_rvalue,
        parse_CPAREN
    ])
    if asts is None:
        return failure
    op = asts[0]
    expr = asts[2]
    ast = ('ast', 'unary', [op, expr])
    return (ast, tokens)


def parse_uoperator(tokens):
    """
    Attempts to parse a unary operator.

    Grammar:
    uoperator = MINUS | AMP | STAR | BANG | TILDE
    """
    failure = (None, tokens)
    (token, tokens) = alternation(tokens, [
        parse_MINUS,
        parse_AMP,
        parse_STAR,
        parse_BANG,
        parse_TILDE
    ])
    if token is None:
        return failure
    ast = ('ast', 'uoperator', [token])
    return (ast, tokens)


def parse_type(tokens):
    """
    Attemps to parse a type declaration.

    Grammar:
    type = ARRAY OBRACKET rvalue CBRACKET LT type GT
         | ( ARRAY | POINTER | FUNCTION | tqualifier ) LT type GT
         | IDENTIFIER { S IDENTIFIER }
    """
    failure = (None, tokens)

    def parse_type1(tokens):
        """
        Grammar fragment:
        ARRAY OBRACKET rvalue CBRACKET LT type GT

        'array[8]<int>' becomes:
        ('ast', 'type', [
            ('token', 'ARRAY', 'array'),
            ('token', 'INTLIT', '8'),
            ('ast', 'type', [
                ('token', 'IDENTIFIER', 'int')
            ])
        ])
        """
        failure = (None, tokens)
        (asts, tokens) = concatenation(tokens, [
            parse_ARRAY,
            parse_OBRACKET,
            parse_rvalue,
            parse_CBRACKET,
            parse_LT,
            parse_type,
            parse_GT
        ])
        if asts is None:
            return failure
        identifier_token = asts[0]
        rvalue_ast = asts[2]
        subtype_ast = asts[5]
        ast = ('ast', 'type', [identifier_token, rvalue_ast, subtype_ast])
        return (ast, tokens)

    def parse_type2(tokens):
        """
        Grammar fragment:
        ( ARRAY | POINTER | FUNCTION | tqualifier ) LT type GT

        'array<int>' becomes:
        ('ast', 'type', [
            ('token', 'ARRAY', 'array'),
            ('ast', 'type', [
                ('token', 'IDENTIFIER', 'int')
            ])
        ])

        'volatile<char>' becomes:
        ('ast', 'type', [
            ('ast', 'tqualifier', [
                ('token', 'VOLATILE', 'volatile')
            ]),
            ('ast', 'type', [
                ('token', 'IDENTIFIER', 'char')
            ])
        ])

        'array<pointer<char>>' becomes:
        ('ast', 'type', [
            ('token', 'ARRAY', 'array'),
            ('ast', 'type', [
                ('token', 'POINTER', 'pointer'),
                ('ast', 'type', [
                    ('token', 'IDENTIFIER', 'char')
                ])
            ])
        ])

        ( tqualifier | storage | ARRAY | POINTER | FUNCTION ) LT type GT
        """
        failure = (None, tokens)
        (ast1, tokens) = alternation(tokens, [
            parse_ARRAY,
            parse_POINTER,
            parse_FUNCTION,
            parse_tqualifier
        ])
        if ast1 is None:
            return failure
        (asts, tokens) = concatenation(tokens, [
            parse_LT,
            parse_type,
            parse_GT
        ])
        if asts is None:
            return failure
        ast = ('ast', 'type', [ast1, asts[1]])
        return (ast, tokens)

    def parse_type3(tokens):
        """
        Grammar fragment:
        IDENTIFIER { S IDENTIFIER }

        'int' becomes:
        ('ast', 'type', [('token', 'IDENTIFIER', 'int')])

        'unsigned char' becomes:
        ('ast', 'type', [('token', 'IDENTIFIER', 'unsigned'), ('token', 'IDENTIFIER', 'char')])
        """
        failure = (None, tokens)
        types = []
        (identifier_token, tokens) = parse_IDENTIFIER(tokens)
        if identifier_token is None:
            return failure
        types.append(identifier_token)
        while True:
            (asts, tokens) = concatenation(tokens, [
                parse_S,
                parse_IDENTIFIER
            ])
            if asts is None:
                break
            types.append(asts[1])
        type_ast = ('ast', 'type', types)
        return (type_ast, tokens)

    (ast, tokens) = alternation(tokens, [
        parse_type1,
        parse_type2,
        parse_type3
    ])
    if ast is None:
        return failure
    else:
        return (ast, tokens)


def parse_tqualifier(tokens):
    """
    Attempts to parse a type qualifier.

    Grammar:
    tqualifier = CONST | VOLATILE | RESTRICT | ATOMIC
    """
    failure = (None, tokens)
    (ast, tokens) = alternation(tokens, [
        parse_CONST,
        parse_VOLATILE,
        parse_RESTRICT,
        parse_ATOMIC
    ])
    if ast is None:
        return failure
    else:
        ast = ('ast', 'tqualifier', [ast])
        return (ast, tokens)


def parse_ws(tokens):
    """
    Attempts to parse any amount of whitespace.

    Grammar:
    ws = (S | NL) { (S | NL) }
    """
    failure = (None, tokens)
    subnodes = []
    (token, tokens) = alternation(tokens, [
        parse_S,
        parse_NL
    ])
    if token is None:
        return failure
    subnodes.append(token)

    while True:
        (token, tokens) = alternation(tokens, [
            parse_S,
            parse_NL
        ])
        if token is None:
            break
        subnodes.append(token)

    ast = ('ast', 'ws', subnodes)
    return (ast, tokens)


# Terminal parsing: parsing individual tokens.

def parse_terminal(tokens, toktype):
    """
    Attempts to parse a terminal node of type _toktype_.
    Note that the parsed result of a terminal is the token itself, not an AST node.
    """
    failure = (None, tokens)
    if len(tokens) > 0 and is_toktype(tokens[0], toktype):
        return (tokens[0], tokens[1:])
    else:
        return failure

def parse_FUNC(tokens):
    return parse_terminal(tokens, 'FUNC')

def parse_ARRAY(tokens):
    return parse_terminal(tokens, 'ARRAY')

def parse_POINTER(tokens):
    return parse_terminal(tokens, 'POINTER')

def parse_FUNCTION(tokens):
    return parse_terminal(tokens, 'FUNCTION')

def parse_COMMA(tokens):
    return parse_terminal(tokens, 'COMMA')

def parse_PERIOD(tokens):
    return parse_terminal(tokens, 'PERIOD')

def parse_ARROW(tokens):
    return parse_terminal(tokens, 'ARROW')

def parse_IDENTIFIER(tokens):
    return parse_terminal(tokens, 'IDENTIFIER')

def parse_COLON(tokens):
    return parse_terminal(tokens, 'COLON')

def parse_SEMICOLON(tokens):
    return parse_terminal(tokens, 'SEMICOLON')

def parse_INTLIT(tokens):
    return parse_terminal(tokens, 'INTLIT')

def parse_FLOATLIT(tokens):
    return parse_terminal(tokens, 'FLOATLIT')

def parse_STRINGLIT(tokens):
    return parse_terminal(tokens, 'STRINGLIT')

def parse_CHARLIT(tokens):
    return parse_terminal(tokens, 'CHARLIT')

def parse_BOOLLIT(tokens):
    return parse_terminal(tokens, 'BOOLLIT')

def parse_OPAREN(tokens):
    return parse_terminal(tokens, 'OPAREN')

def parse_CPAREN(tokens):
    return parse_terminal(tokens, 'CPAREN')

def parse_OBRACKET(tokens):
    return parse_terminal(tokens, 'OBRACKET')

def parse_CBRACKET(tokens):
    return parse_terminal(tokens, 'CBRACKET')

def parse_MINUS(tokens):
    return parse_terminal(tokens, 'MINUS')

def parse_AMP(tokens):
    return parse_terminal(tokens, 'AMP')

def parse_STAR(tokens):
    return parse_terminal(tokens, 'STAR')

def parse_BANG(tokens):
    return parse_terminal(tokens, 'BANG')

def parse_PLUS(tokens):
    return parse_terminal(tokens, 'PLUS')

def parse_SLASH(tokens):
    return parse_terminal(tokens, 'SLASH')

def parse_PERCENT(tokens):
    return parse_terminal(tokens, 'PERCENT')

def parse_LT(tokens):
    return parse_terminal(tokens, 'LT')

def parse_LTEQ(tokens):
    return parse_terminal(tokens, 'LTEQ')

def parse_GT(tokens):
    return parse_terminal(tokens, 'GT')

def parse_GTEQ(tokens):
    return parse_terminal(tokens, 'GTEQ')

def parse_EQ(tokens):
    return parse_terminal(tokens, 'EQ')

def parse_EQEQ(tokens):
    return parse_terminal(tokens, 'EQEQ')

def parse_BANGEQ(tokens):
    return parse_terminal(tokens, 'BANGEQ')

def parse_AMPAMP(tokens):
    return parse_terminal(tokens, 'AMPAMP')

def parse_BARBAR(tokens):
    return parse_terminal(tokens, 'BARBAR')

def parse_BAR(tokens):
    return parse_terminal(tokens, 'BAR')

def parse_LTLT(tokens):
    return parse_terminal(tokens, 'LTLT')

def parse_GTGT(tokens):
    # So, I'm not super happy about this, but here's why this has to be special-cased:
    # If we just treated this like all of the other tokenization and parsing functions,
    # we would have a token for '>>', and this would be a one-liner:
    #   return parse_terminal(tokens, 'GTGT')
    # However, that would break `pointer<pointer<char>>`, because the last two
    # characters would be a single GTGT token, rathe than two separate GT tokens.
    # So we have to implement parse_GTGT by looking for two distinct GT tokens.
    failure = (None, tokens)
    (gt_tokens, tokens) = concatenation(tokens, [parse_GT, parse_GT])
    if gt_tokens is None:
        return failure
    token = ('token', 'GTGT', '>>')
    return (token, tokens)

def parse_GTGTEQ(tokens):
    return parse_terminal(tokens, 'GTGTEQ')

def parse_LTLTEQ(tokens):
    return parse_terminal(tokens, 'LTLTEQ')

def parse_TILDE(tokens):
    return parse_terminal(tokens, 'TILDE')

def parse_CARAT(tokens):
    return parse_terminal(tokens, 'CARAT')

def parse_PLUSEQ(tokens):
    return parse_terminal(tokens, 'PLUSEQ')

def parse_MINUSEQ(tokens):
    return parse_terminal(tokens, 'MINUSEQ')

def parse_STAREQ(tokens):
    return parse_terminal(tokens, 'STAREQ')

def parse_SLASHEQ(tokens):
    return parse_terminal(tokens, 'SLASHEQ')

def parse_PERCENTEQ(tokens):
    return parse_terminal(tokens, 'PERCENTEQ')

def parse_AMPEQ(tokens):
    return parse_terminal(tokens, 'AMPEQ')

def parse_PIPEEQ(tokens):
    return parse_terminal(tokens, 'PIPEEQ')

def parse_CARATEQ(tokens):
    return parse_terminal(tokens, 'CARATEQ')

def parse_INDENT(tokens):
    return parse_terminal(tokens, 'INDENT')

def parse_DEDENT(tokens):
    return parse_terminal(tokens, 'DEDENT')

def parse_IF(tokens):
    return parse_terminal(tokens, 'IF')

def parse_ELIF(tokens):
    return parse_terminal(tokens, 'ELIF')

def parse_ELSE(tokens):
    return parse_terminal(tokens, 'ELSE')

def parse_WHILE(tokens):
    return parse_terminal(tokens, 'WHILE')

def parse_FOR(tokens):
    return parse_terminal(tokens, 'FOR')

def parse_BREAK(tokens):
    return parse_terminal(tokens, 'BREAK')

def parse_CONTINUE(tokens):
    return parse_terminal(tokens, 'CONTINUE')

def parse_NL(tokens):
    return parse_terminal(tokens, 'NL')

def parse_S(tokens):
    return parse_terminal(tokens, 'S')

def parse_LCOMMENT(tokens):
    return parse_terminal(tokens, 'LCOMMENT')

def parse_BCOMMENT(tokens):
    return parse_terminal(tokens, 'BCOMMENT')

def parse_INCLUDE(tokens):
    return parse_terminal(tokens, 'INCLUDE')

def parse_DEFINE(tokens):
    return parse_terminal(tokens, 'DEFINE')

def parse_STRUCT(tokens):
    return parse_terminal(tokens, 'STRUCT')

def parse_RETURN(tokens):
    return parse_terminal(tokens, 'RETURN')

def parse_QMARK(tokens):
    return parse_terminal(tokens, 'QMARK')

def parse_CAST(tokens):
    return parse_terminal(tokens, 'CAST')

def parse_AUTO(tokens):
    return parse_terminal(tokens, 'AUTO')

def parse_REGISTER(tokens):
    return parse_terminal(tokens, 'REGISTER')

def parse_STATIC(tokens):
    return parse_terminal(tokens, 'STATIC')

def parse_EXTERN(tokens):
    return parse_terminal(tokens, 'EXTERN')

def parse_THREADLOCAL(tokens):
    return parse_terminal(tokens, 'THREADLOCAL')

def parse_INLINE(tokens):
    return parse_terminal(tokens, 'INLINE')

def parse_NORETURN(tokens):
    return parse_terminal(tokens, 'NORETURN')

def parse_CONST(tokens):
    return parse_terminal(tokens, 'CONST')

def parse_VOLATILE(tokens):
    return parse_terminal(tokens, 'VOLATILE')

def parse_RESTRICT(tokens):
    return parse_terminal(tokens, 'RESTRICT')

def parse_ATOMIC(tokens):
    return parse_terminal(tokens, 'ATOMIC')


# Output generator implementation:

# Note: all generate_x functions share a similar format:
# - Their first argument is as AST.
# - They return a string (of C code).
# If the passed AST is valid, then errors are not possible.
# Exceptions are thrown otherwise.

def generate(ast):
    """Generates C code from the given AST."""
    return generate_program(ast)


def generate_program(ast):
    """Generates a C program."""
    assert is_asttype(ast, 'program')
    outputs = []
    for statement_ast in ast_children(ast):
        statement = generate_statement(statement_ast)
        outputs.append(statement)
    output = '\n'.join(outputs)
    if not output.endswith('\n'):
        output += '\n'  # always include a trailing newline.
    return output


def generate_statement(ast):
    """Generates a C statement."""
    assert is_asttype(ast, 'statement')
    sub_ast = ast_children(ast)[0]
    assert is_ast(sub_ast)
    if is_asttype(sub_ast, 'linestmt'):
        return generate_linestmt(sub_ast)
    elif is_asttype(sub_ast, 'blockstmt'):
        return generate_blockstmt(sub_ast)
    else:
        raise Exception("generate_statement: don't know how to generate %s" % str(sub_ast))


def generate_blockstmt(ast):
    """Generates a C "block" statement."""
    assert is_asttype(ast, 'blockstmt')
    sub_ast = ast_children(ast)[0]
    assert is_ast(sub_ast)
    if is_asttype(sub_ast, 'fundecl'):
        return generate_fundecl(sub_ast)
    elif is_asttype(sub_ast, 'if'):
        return generate_if(sub_ast)
    elif is_asttype(sub_ast, 'while'):
        return generate_while(sub_ast)
    elif is_asttype(sub_ast, 'for'):
        return generate_for(sub_ast)
    elif is_asttype(sub_ast, 'struct'):
        return generate_struct(sub_ast)
    elif is_asttype(sub_ast, 'scope'):
        return generate_scope(sub_ast)
    elif is_asttype(sub_ast, 'comment'):
        return generate_comment(sub_ast)
    elif is_asttype(sub_ast, 'blankline'):
        return generate_blankline(sub_ast)
    else:
        raise Exception("generate_blockstmt: don't know how to generate %s" % str(sub_ast))


def generate_linestmt(ast):
    """Generates a C "line" statement."""
    assert is_asttype(ast, 'linestmt')
    subnode = ast_children(ast)[0]
    if is_asttype(subnode, 'declassign'):
        return generate_declassign(subnode)
    elif is_asttype(subnode, 'assign'):
        return generate_assign(subnode)
    elif is_asttype(subnode, 'vardecl'):
        return generate_vardecl(subnode) + ';'
    elif is_asttype(subnode, 'rvalue'):
        return generate_rvalue(subnode) + ';'
    elif is_asttype(subnode, 'return'):
        return generate_return(subnode)
    elif is_toktype(subnode, 'BREAK'):
        return "break;"
    elif is_toktype(subnode, 'CONTINUE'):
        return "continue;"
    elif is_toktype(subnode, 'INCLUDE'):
        return token_text(subnode)
    elif is_toktype(subnode, 'DEFINE'):
        return token_text(subnode)
    else:
        raise Exception("generate_linestmt: don't know how to generate %s" % str(subnode))


def generate_blankline(ast):
    """Generates a blank line."""
    assert is_asttype(ast, 'blankline')
    return ""


def generate_comment(ast):
    """Generates a C (line or block-level) comment."""
    assert is_asttype(ast, 'comment')
    comment_token = ast_children(ast)[0]
    assert is_toktype(comment_token, 'LCOMMENT') or is_toktype(comment_token, 'BCOMMENT')
    return token_text(comment_token)


def generate_declassign(ast):
    """Generates a C declaration-assignment statement."""
    assert is_asttype(ast, 'declassign')
    children = ast_children(ast)
    vardecl_ast = children[0]
    vardecl = generate_vardecl(vardecl_ast)
    expr_ast = children[1]
    expr = generate_rvalue(expr_ast)
    return "%s = %s;" % (vardecl, expr)


def generate_assign(ast):
    """Generates a C assignment statement."""
    assert is_asttype(ast, 'assign')
    children = ast_children(ast)

    rvalue1_ast = children[0]
    rvalue1 = generate_rvalue(rvalue1_ast)

    op_ast = children[1]
    assert is_asttype(op_ast, 'aoperator')
    op_token = ast_children(op_ast)[0]
    assert is_token(op_token)
    op = token_text(op_token)

    rvalue2_ast = children[2]
    rvalue2 = generate_rvalue(rvalue2_ast)
    return "%s %s %s;" % (rvalue1, op, rvalue2)


def generate_vardecl(ast):
    """
    Generates a C variable declaration.

    Note: we don't append the trailing ';' here because this is also used for function args.
    """
    assert is_asttype(ast, 'vardecl')
    children = ast_children(ast)

    i = 0
    if is_asttype(children[i], 'storage'):
        output = generate_storage(children[i]) + " "
        i += 1
    else:
        output = ""

    identifier_token = children[i]
    assert is_toktype(identifier_token, 'IDENTIFIER')
    var_name = token_text(identifier_token)
    i += 1

    type_ast = children[i]

    # e.g. 'int a', 'char* b', 'static int c[8]', 'extern char** d', 'volatile int* d[3]', etc.
    output += generate_type(type_ast, var_name)
    return output


def generate_call(ast):
    """Generates a parenthetical C function call."""
    assert is_asttype(ast, 'call')
    return "(%s)" % ", ".join([generate_rvalue(child) for child in ast_children(ast)])


def generate_return(ast):
    """Generates a C return statement."""
    assert is_asttype(ast, 'return')
    children = ast_children(ast)
    if len(children) == 0:
        return "return;"
    else:
        expr_ast = children[0]
        return "return %s;" % generate_rvalue(expr_ast)


def generate_storage(ast):
    """Generates a C storage classifier."""
    assert is_asttype(ast, 'storage')
    return token_text(ast_children(ast)[0])


def generate_fundecl(ast):
    """Generates a C function declaration."""

    def generate_funtype(ast):
        """Generates the "type" of a C function."""
        assert is_asttype(ast, 'funtype')
        asts = ast_children(ast)
        if len(asts) == 0:
            return None
        else:
            return " ".join([token_text(token) for token in asts])

    def generate_fspecifier(ast):
        """Generates a C function declaration specifier."""
        assert is_asttype(ast, 'fspecifier')
        return token_text(ast_children(ast)[0])

    def generate_fundeclargs(ast):
        """Generates a list of arguments in a C function declaration."""
        assert is_asttype(ast, 'fundeclargs')
        arg_asts = ast_children(ast)
        args = []
        for vardecl_ast in arg_asts:
            arg = generate_vardecl(vardecl_ast)
            args.append(arg)
        if len(args) == 0:
            return "void"
        else:
            return ", ".join(args)

    def generate_fundeclret(ast):
        """Generates the return type of a C function declaration."""
        assert is_asttype(ast, 'fundeclret')
        return generate_type(ast_children(ast)[0])

    assert is_asttype(ast, 'fundecl')
    asts = ast_children(ast)
    i = 0

    prefix = ""
    if is_asttype(asts[i], 'storage'):
        prefix += generate_storage(asts[i]) + " "
        i += 1

    while is_asttype(asts[i], 'fspecifier'):
        prefix += generate_fspecifier(asts[i]) + " "
        i += 1

    identifier = token_text(asts[i])
    i += 1

    args = "%s" % generate_fundeclargs(asts[i])
    i += 1

    if i < len(asts) and is_asttype(asts[i], 'fundeclret'):
        ret = generate_fundeclret(asts[i])
        i += 1
    else:
        ret = "void"

    if i < len(asts) and is_asttype(asts[i], 'scope'):
        scope = generate_scope(asts[i])
    else:
        scope = None

    output = "%s %s(%s)" % (ret, identifier, args)
    if scope is None:
        output += ";"
    else:
        output += " %s" % scope
    return output


def generate_if(ast):
    """Generates a C if statement"""
    assert is_asttype(ast, 'if')
    children = ast_children(ast)

    predicate_ast = children[0]
    predicate = generate_rvalue(predicate_ast)
    output = "if (%s) " % (predicate)

    scope_ast = children[1]
    output += generate_scope(scope_ast)

    for ast in children[2:]:
        assert is_ast(ast)
        if ast_type(ast) == 'elif':
            elif_children = ast_children(ast)

            predicate_ast = elif_children[0]
            predicate = generate_rvalue(predicate_ast)
            output += " else if (%s) " % (predicate)

            scope_ast = elif_children[1]
            output += generate_scope(scope_ast)

        elif ast_type(ast) == 'else':
            else_children = ast_children(ast)

            scope_ast = else_children[0]
            output += " else " + generate_scope(scope_ast)

        else:
            raise Exception("generate_if: unexpected AST type %s" % ast_type(ast))

    return output


def generate_while(ast):
    """Generates a C while statement."""
    assert is_asttype(ast, 'while')
    children = ast_children(ast)
    expr_ast = children[0]
    expr = generate_rvalue(expr_ast)
    scope_ast = children[1]
    scope = generate_scope(scope_ast)
    output = "while (%s) %s" % (expr, scope)
    return output


def generate_for(ast):
    """Generates a C for statement."""
    assert is_asttype(ast, 'for')
    children = ast_children(ast)
    output = "for ("

    linestmt1_ast = children[0]
    if linestmt1_ast is not None:
        output += generate_linestmt(linestmt1_ast) + " "
    else:
        output += "; "

    expr2_ast = children[1]
    output += generate_rvalue(expr2_ast) + "; "

    stmt3s = []
    for child in children[2:-1]:
        stmt = generate_linestmt(child)
        if stmt.endswith(';'):
            stmt = stmt[:-1]
        stmt3s.append(stmt)
    output += ", ".join(stmt3s)

    output += ") "
    scope_ast = children[-1]
    output += generate_scope(scope_ast)
    return output


def generate_struct(ast):
    """Generates a C struct declaration."""
    assert is_asttype(ast, 'struct')
    identifier_token = ast_children(ast)[0]
    assert is_toktype(identifier_token, 'IDENTIFIER')
    identifier = token_text(identifier_token)
    output = "typedef struct _struct_%s %s;\n" % (identifier, identifier)
    output += "struct _struct_%s {\n" % (identifier)
    scope_ast = ast_children(ast)[1]
    assert is_asttype(scope_ast, 'structscope')
    vardecl_asts = ast_children(scope_ast)
    for ast in vardecl_asts:
        output += "    " + generate_vardecl(ast) + ";\n"
    output += "};"
    return output


def generate_scope(ast):
    """Generates a C scope."""
    assert is_asttype(ast, 'scope')
    output = "{"
    for statement_ast in ast_children(ast):
        statement = generate_statement(statement_ast)
        output += ("\n" + indent_text(statement))
    line = "\n}"
    output += line
    return output


def generate_rvalue(ast):
    """Generates a C rvalue expression."""
    assert is_asttype(ast, 'rvalue')
    output = ""
    for child in ast_children(ast):
        if is_asttype(child, 'ternary'):
            output += generate_ternary(child)
        elif is_asttype(child, 'binary'):
            output += generate_binary(child)
        elif is_asttype(child, 'unary'):
            output += generate_unary(child)
        elif is_asttype(child, 'dotted'):
            output += generate_dotted(child)
        elif is_asttype(child, 'arrowed'):
            output += generate_arrowed(child)
        elif is_asttype(child, 'indexed'):
            output += generate_indexed(child)
        elif is_asttype(child, 'called'):
            output += generate_called(child)
        elif is_asttype(child, 'type'):
            output += generate_type(child)
        elif is_asttype(child, 'cast'):
            output += generate_cast(child)
        elif is_toktype(child, 'INTLIT') \
            or is_toktype(child, 'FLOATLIT') \
            or is_toktype(child, 'STRINGLIT') \
            or is_toktype(child, 'CHARLIT') \
            or is_toktype(child, 'BOOLLIT') \
            or is_toktype(child, 'IDENTIFIER') \
        :
            output += token_text(child)
        else:
            raise Exception("generate_rvalue: unexpcted ast %s" % str(ast))
    return output


def generate_ternary(ast):
    """Generates a C ternary statement."""
    assert is_asttype(ast, 'ternary')
    rval1_ast = ast_children(ast)[0]
    rval1 = generate_rvalue(rval1_ast)
    rval2_ast = ast_children(ast)[1]
    rval2 = generate_rvalue(rval2_ast)
    rval3_ast = ast_children(ast)[2]
    rval3 = generate_rvalue(rval3_ast)
    return "(%s ? %s : %s)" % (rval1, rval2, rval3)


def generate_binary(ast):
    """Generates a binary-operator C statement."""
    assert is_asttype(ast, 'binary')
    children = ast_children(ast)

    expr1_ast = children[0]
    expr1 = generate_rvalue(expr1_ast)

    op_ast = children[1]
    assert is_asttype(op_ast, 'boperator')
    op_token = ast_children(op_ast)[0]
    assert is_token(op_token)
    op = token_text(op_token)

    expr2_ast = children[2]
    expr2 = generate_rvalue(expr2_ast)

    return "(%s %s %s)" % (expr1, op, expr2)


def generate_unary(ast):
    """Generates a unary-operator C statement."""
    assert is_asttype(ast, 'unary')
    children = ast_children(ast)
    op_ast = children[0]
    assert is_asttype(op_ast, 'uoperator')
    op_token = ast_children(op_ast)[0]
    assert is_token(op_token)
    op = token_text(op_token)
    expr_ast = children[1]
    expr = generate_rvalue(expr_ast)
    return "(%s(%s))" % (op, expr)


def generate_dotted(ast):
    """Generates a C dotted member access expression."""
    assert is_asttype(ast, 'dotted')
    return ".%s" % token_text(ast_children(ast)[0])


def generate_arrowed(ast):
    """Generates a C arrow access expression."""
    assert is_asttype(ast, 'arrowed')
    return "->%s" % token_text(ast_children(ast)[0])


def generate_indexed(ast):
    """Generates a C indexed access expression."""
    assert is_asttype(ast, 'indexed')
    return "[%s]" % generate_rvalue(ast_children(ast)[0])


def generate_called(ast):
    """Generates a C function call suffix."""
    assert is_asttype(ast, 'called')
    output = "("
    output += ", ".join([generate_rvalue(child) for child in ast_children(ast)])
    output += ")"
    return output


def generate_type(ast, identifier=""):
    """
    Generates a C type declaration.

    If identifier is not given, this is an abstract type declaration (e.g. a cast).
    """
    def did_switch_direction(previous, current):
        """
        Detect a change in 'direction' while intepreting the type stack.

        C type declaration operator precedence requires that we "go right" first.
        i.e. 'int *foo[]' is "array of pointer to int", not "pointer to array of int".
        In order to express "pointer to array of int", we have to use parenthesis
        to change overcome operator precedence, i.e. 'int (*foo)[]'.
        Any time we need to "change direction", we need to wrap in parenthesis.
        See http://unixwiz.net/techtips/reading-cdecl.html
        """
        lefts = ['pointer']
        rights = ['array', 'function']
        return (previous in lefts and current in rights) \
            or (previous in rights and current in lefts)

    assert is_asttype(ast, 'type')
    types = type_ast_as_list(ast)
    output = identifier
    previous = None
    for t in types[:-1]:
        if t == 'pointer':
            if did_switch_direction(previous, t):
                output = "*(%s)" % output
            else:
                output = "*%s" % output
        elif t == 'array':
            if did_switch_direction(previous, t):
                output = "(%s)[]" % output
            else:
                output = "%s[]" % output
        elif t == 'function':
            if did_switch_direction(previous, t):
                output = "(%s)()" % output
            else:
                output = "%s()" % output
        elif t.startswith('array['):
            array_size = t.split('[')[1].split(']')[0]
            if did_switch_direction(previous, 'array'):
                output = "(%s)[%s]" % (output, array_size)
            else:
                output = "%s[%s]" % (output, array_size)
        elif t in ['const', 'volatile', 'restrict', '_Atomic']:
            output = "%s %s" % (t, output)
        else:
            raise Exception("generate_type: don't know how to handle %s" % t)

        if t.startswith('array'):
            previous = 'array'
        elif t in ['pointer', 'function']:
            previous = t

    base_type = types[-1]
    if identifier == "":
        output = "%s%s" % (base_type, output)
    else:
        output = "%s %s" % (base_type, output)
    return output


def type_ast_as_list(ast):
    """
    Return the type AST as a list of types.

    e.g.
    - 'int' results in ['int']
    - 'pointer<array<char>>' results in ['pointer', 'array', 'char']
    - 'array[8]<int>' results in ['array[8]', 'int']
    - 'volatile<pointer<char>>' results in ['volatile', 'pointer', 'char']
    """
    assert is_asttype(ast, 'type')
    children = ast_children(ast)
    if is_token(children[0]) and is_toktype(children[0], 'IDENTIFIER'):
        for token in children:
            assert is_toktype(token, 'IDENTIFIER')
        # return e.g. ['int'], ['unsigned char'], etc.
        composite_type = " ".join([token_text(token) for token in children])
        return [composite_type]
    elif len(children) == 2 and is_token(children[0]) and is_asttype(children[1], 'type'):
        token = children[0]
        sub_ast = children[1]
        # return e.g. ['array', <recursive call>]
        return [token_text(token)] + type_ast_as_list(sub_ast)
    elif len(children) == 3 and is_toktype(children[0], 'ARRAY'):
        # this is a dimensioned array (e.g. array[8])
        size_ast = children[1]
        array_size = generate_rvalue(size_ast)
        assert is_ast(children[2])
        sub_ast = children[2]
        assert is_asttype(sub_ast, 'type')
        # return e.g. ['array[8]', <recursive call>]
        return ["array[%s]" % (array_size)] + type_ast_as_list(sub_ast)
    elif is_asttype(children[0], 'tqualifier'):
        qualifier_ast = children[0]
        qualifier = token_text(ast_children(qualifier_ast)[0])
        assert is_asttype(children[1], 'type')
        return [qualifier] + type_ast_as_list(children[1])
    else:
        raise Exception("type_ast_as_list: don't know how to handle %s" % str(ast))


def generate_cast(ast):
    """Generates a C cast statement."""
    assert is_asttype(ast, 'cast')
    var = generate_rvalue(ast_children(ast)[0])
    type_ = generate_rvalue(ast_children(ast)[1])
    output = "((%s)%s)" % (type_, var)
    return output


def indent_text(text):
    """Add indentation to the given block of text."""
    indent = " " * INDENT_UNIT
    indented_lines = [indent + line for line in text.splitlines()]
    return "\n".join(indented_lines)


if __name__ == "__main__":
    import sys
    import pprint
    tdefs = load_tokendefs("tokendefs.txt")
    keywords = load_keywords("keywords.txt")
    input = None
    if len(sys.argv) > 1 and not sys.argv[-1].startswith('--'):
        input = open(sys.argv[-1]).read()
    else:
        input = sys.stdin.read()
    tokens = tokenize(tdefs, keywords, input)
    if '--tokens' in sys.argv:
        pprint.pprint(tokens)
        sys.exit(0)
    ast = parse(tokens)
    if '--ast' in sys.argv:
        pprint.pprint(ast)
        sys.exit(0)
    output = generate(ast)
    sys.stdout.write(output)

## keywords.txt
pointer
POINTER
array
ARRAY
function
FUNCTION
func
FUNC
return
RETURN
true
BOOLLIT
false
BOOLLIT
if
IF
elif
ELIF
else
ELSE
while
WHILE
for
FOR
break
BREAK
continue
CONTINUE
struct
STRUCT
cast
CAST
auto
AUTO
register
REGISTER
static
STATIC
extern
EXTERN
_Thread_local
THREADLOCAL
inline
INLINE
_Noreturn
NORETURN
const
CONST
volatile
VOLATILE
restrict
RESTRICT
_Atomic
ATOMIC

## test-output.txt
👉 test27.cy
  input:
    #include <stdbool.h>

    func fint() -> int:
        return 0

    func fint2(x: int) -> int:
        return 0

    func fint3(a: int, b: int, c: int, d: int, e: int) -> int:
        return 0

    func f1():
        // declaration assignment
        a: int
        b: int = 3
        c: float = -3.14159
        d: bool = true
        e: bool = false
        f: pointer<char> = "hello, world!"

        // assignment
        a = 1
        a = (1 + (2 * (3 / 4)))
        a = fint()
        a = fint2(1)
        a = fint3(1, 2, (1 + 1), fint(), fint2(1))

    func f2():
        // declaration without assignment
        a: int
        b: char
        c: pointer<char>
        d: array[8]<pointer<signed char>>

    func f3():
        // integer literals
        a: int
        a = 1
        a = -123
        a = 1u
        a = 1l
        a = 0xFFFF
        a = 0xDEADbeef
        a = 0123

    func f4():
        // float literals
        a: float
        a = 1.0
        a = -3.14159
        a = 1.0f
        a = 1.0l
        a = 4.0e-11
        a = 1.0e+5
        a = 3.0e3
        a = 1.234e-6

    #include <stdbool.h>
    func f5():
        // boolean literals
        a: bool
        a = true
        a = false

    func f6():
        // string literals
        a: pointer<char>
        a = "Hello, world!"
        a = "I said \"Hello\" to the baker."
        b: pointer<int> = L"wide characters"

    func f7():
        // character literals
        a: char
        a = 'z'
        a = '\n'
        a = L'k'

    func f8():
        // assignment operators
        a: int = 0
        b: int = 0
        a = b
        a += b
        a -= b
        a *= b
        a /= b
        a %= b
        a &= b
        a |= b
        a ^= b
        a <<= b
        a >>= b

    func f9():
        // unary operators
        a: int = 0
        b: int = 0
        c: pointer<int>
        a = -(b)
        c = &(b)
        a = *(c)
        a = !(b)
        a = ~(b)
        a = !(!(b))

    func f10():
        // binary operators
        a: int = 0
        b: int = 0
        a = (a + b)
        a = (a - b)
        a = (a * b)
        a = (a / b)
        a = (a % b)
        a = (a == b)
        a = (a != b)
        a = (a < b)
        a = (a > b)
        a = (a <= b)
        a = (a >= b)
        a = (a && b)
        a = (a || b)
        a = (a & b)
        a = (a | b)
        a = (a ^ b)
        a = (a << b)
        a = (a >> b)

    func f11():
        // ternary
        a: char = (true ? 'a' : 'b')
        b: int = (false ? 3 : (4 + 5))
        c: int = (fint() ? cast(a, int) : (4 + (6 << 1)))

    func f12():
        // casting (i.e. passing a type as a funcall arg)
        a: char = 'a'
        b: int = cast(a, int)
        c: pointer<char>
        c = cast(&(b), pointer<char>)

    // return
    func f13():
        return

    func f13b() -> int:
        return (1 + 1)

    // FIXME returning a function pointer is broken
    //func ff() -> pointer<function<int>>:
    //    return fint

    func f14():
        // funcalls
        fint()
        fint2(1)
        fint2(fint2(1))
        //a: int = ff()()  // FIXME

    func f15():
        // scopes
        :
            return

        :
            a: int = 1
            return

        :
            a: int = 1
            a += 1

        :
            b: int = (1 + fint())
            :
                c: float = -1.1
                return

    // function declaration
    func foo():
        return

    b1: bool
    b2: bool
    func foo2(a: int, b: pointer<char>) -> pointer<bool>:
        c: float = -1.1
        if (c < 0):
            return &(b1)
        else:
            return &(b2)

    func f16():
        // while loops
        while 1:
            break
        a: int = 0
        while true:
            a += 1
            if (a > 42):
                break
            else:
                a += 2
                continue

    func f17():
        i: int
        // for loops
        for ; (i < 1); i += 1:
            continue

        for i = 2; (i < 42); i += 2:
            foo()

        for i: int = 0; (i < fint()); i += 1:
            if (i == 0):
                continue
            else:
                foo()

        for i: int = 0; (i < 99); i += 1, i += 1:
            continue

    // this is a line-level comment

    /* this is a block-level comment */

    /*
     * This is a
     * multi-line
     * block-level
     * comment.
     */

    // define statements
    #define FOOS 1
    #define debug(m,e) printf("%s:%d: %s:",__FILE__,__LINE__,m); print_obj(e,1); puts("");

    // structs
    struct s1:
        bar: int

    // self-referential struct
    struct list_t:
        value: pointer<char>
        next: pointer<list_t>


    // dotted access and assignment
    struct dot2:
        c: int

    struct dot1:
        b: dot2

    func f18():
        b: dot2
        b.c = 42
        a: dot1
        a.b = b
        a.b.c = 1
        a.b.c = a.b.c

    // arrowed access and assignment
    struct p2:
        c: pointer<int>

    struct p1:
        b: pointer<p2>

    func f18b():
        i: int = 42
        bb: p2
        b: pointer<p2> = &(bb)
        b->c = &(i)
        *(b).c = &(i)
        aa: p1
        a: pointer<p1> = &(aa)
        a->b = b
        a->b->c = &(i)
        a->b->c = a->b->c

    // indexing
    func f19():
        a: array[3]<int>
        a[0] = 42
        a[0] = a[0]
        a[(1 + 1)] = 3
        b: array[3]<array[3]<int>>
        b[0][0] = 0
        b[2][2] = b[0][0]


    // mixed access
    func f20():
        a: array[2]<pointer<int>>
        *(a[0]) = 1
        aa: array[2]<pointer<pointer<int>>>
        *(*(aa[0])) = 1
        b: array[2]<pointer<dot2>>
        b[0]->c = 1
        b[0]->c = b[0]->c
        ii: array[8]<int>
        pii: pointer<array<int>>
        *(pii)[2] = 1
        ppii: pointer<pointer<array<int>>>
        *(*(ppii))[3] = 1
        ap2: p2
        *(ap2.c) = 1
        app2: pointer<p2> = &(ap2)
        *(app2->c) = 1
    /* TODO working examples of function calls
        a = a.b()
        a = a->b()
        a = a[0]()
        a = foo().b
        a = foo()->b
        a = foo()[0]
        a = foo()()
        a.b() = 0
        a->b() = 0
        a[0]() = 0
        foo().b = 0
        foo()->b = 0
        foo()[0] = 0
        foo()() = 0
    */

    // static, extern, inline function declaration
    static func sf1():
        return

    static inline func sif1():
        return

    extern func ef1():
        return

    extern inline func ei1():
        return

    inline func i1():
        return

    // forward-declaring function signatures
    func forf1()
    extern func eforf1(a: int, b: char) -> pointer<char>


    // composite types
    func f21():
        a: int
        b: unsigned char
        c: long long int
        d: pointer<char>
        e: pointer<unsigned int>
        f: array[8]<signed char>
        g: pointer<array<long double>>
        h: array[4]<pointer<long double>>
        i: pointer<function<int>>
        j: array[3]<pointer<function<pointer<char>>>>
        k: array[8]<int>
        l: array[32]<pointer<char>>

    // storage for variables
    static g1: int
    extern g2: array<int>

    func f24():
        register g3: pointer<pointer<long int>>

    // type qualifiers for variables
    g4: restrict<pointer<int>>
    g5: volatile<pointer<int>>
    g6: pointer<volatile<long double>>
    g7: volatile<pointer<volatile<char>>>

    // both
    extern g8: _Atomic<pointer<int>>
    static g9: volatile<pointer<volatile<long int>>>

    func f23():
        // const
        // use https://cdecl.org/ to verify these
        a: const<pointer<int>>
        b: pointer<const<int>>
        c: array[2]<pointer<const<int>>>
        d: pointer<array[3]<const<int>>>
        e: pointer<pointer<int>>
        f: pointer<pointer<const<int>>>
        g: pointer<const<pointer<int>>>
        h: const<pointer<pointer<int>>>
        i: const<pointer<const<pointer<const<int>>>>>

    extern z: const<pointer<const<pointer<array<volatile<char>>>>>>

    func main(argc: int, argv: pointer<pointer<char>>) -> int:
        return 0
  output:
    #include <stdbool.h>

    int fint(void) {
        return 0;
    }

    int fint2(int x) {
        return 0;
    }

    int fint3(int a, int b, int c, int d, int e) {
        return 0;
    }

    void f1(void) {
        // declaration assignment
        int a;
        int b = 3;
        float c = -3.14159;
        bool d = true;
        bool e = false;
        char *f = "hello, world!";

        // assignment
        a = 1;
        a = (1 + (2 * (3 / 4)));
        a = fint();
        a = fint2(1);
        a = fint3(1, 2, (1 + 1), fint(), fint2(1));
    }

    void f2(void) {
        // declaration without assignment
        int a;
        char b;
        char *c;
        signed char *(d[8]);
    }

    void f3(void) {
        // integer literals
        int a;
        a = 1;
        a = -123;
        a = 1u;
        a = 1l;
        a = 0xFFFF;
        a = 0xDEADbeef;
        a = 0123;
    }

    void f4(void) {
        // float literals
        float a;
        a = 1.0;
        a = -3.14159;
        a = 1.0f;
        a = 1.0l;
        a = 4.0e-11;
        a = 1.0e+5;
        a = 3.0e3;
        a = 1.234e-6;
    }

    #include <stdbool.h>
    void f5(void) {
        // boolean literals
        bool a;
        a = true;
        a = false;
    }

    void f6(void) {
        // string literals
        char *a;
        a = "Hello, world!";
        a = "I said \"Hello\" to the baker.";
        int *b = L"wide characters";
    }

    void f7(void) {
        // character literals
        char a;
        a = 'z';
        a = '\n';
        a = L'k';
    }

    void f8(void) {
        // assignment operators
        int a = 0;
        int b = 0;
        a = b;
        a += b;
        a -= b;
        a *= b;
        a /= b;
        a %= b;
        a &= b;
        a |= b;
        a ^= b;
        a <<= b;
        a >>= b;
    }

    void f9(void) {
        // unary operators
        int a = 0;
        int b = 0;
        int *c;
        a = (-(b));
        c = (&(b));
        a = (*(c));
        a = (!(b));
        a = (~(b));
        a = (!((!(b))));
    }

    void f10(void) {
        // binary operators
        int a = 0;
        int b = 0;
        a = (a + b);
        a = (a - b);
        a = (a * b);
        a = (a / b);
        a = (a % b);
        a = (a == b);
        a = (a != b);
        a = (a < b);
        a = (a > b);
        a = (a <= b);
        a = (a >= b);
        a = (a && b);
        a = (a || b);
        a = (a & b);
        a = (a | b);
        a = (a ^ b);
        a = (a << b);
        a = (a >> b);
    }

    void f11(void) {
        // ternary
        char a = (true ? 'a' : 'b');
        int b = (false ? 3 : (4 + 5));
        int c = (fint() ? ((int)a) : (4 + (6 << 1)));
    }

    void f12(void) {
        // casting (i.e. passing a type as a funcall arg)
        char a = 'a';
        int b = ((int)a);
        char *c;
        c = ((char*)(&(b)));
    }

    // return
    void f13(void) {
        return;
    }

    int f13b(void) {
        return (1 + 1);
    }

    // FIXME returning a function pointer is broken
    //func ff() -> pointer<function<int>>:
    //    return fint

    void f14(void) {
        // funcalls
        fint();
        fint2(1);
        fint2(fint2(1));
        //a: int = ff()()  // FIXME
    }

    void f15(void) {
        // scopes
        {
            return;
        }

        {
            int a = 1;
            return;
        }

        {
            int a = 1;
            a += 1;
        }

        {
            int b = (1 + fint());
            {
                float c = -1.1;
                return;
            }
        }
    }

    // function declaration
    void foo(void) {
        return;
    }

    bool b1;
    bool b2;
    bool* foo2(int a, char *b) {
        float c = -1.1;
        if ((c < 0)) {
            return (&(b1));
        } else {
            return (&(b2));
        }
    }

    void f16(void) {
        // while loops
        while (1) {
            break;
        }
        int a = 0;
        while (true) {
            a += 1;
            if ((a > 42)) {
                break;
            } else {
                a += 2;
                continue;
            }
        }
    }

    void f17(void) {
        int i;
        // for loops
        for (; (i < 1); i += 1) {
            continue;
        }

        for (i = 2; (i < 42); i += 2) {
            foo();
        }

        for (int i = 0; (i < fint()); i += 1) {
            if ((i == 0)) {
                continue;
            } else {
                foo();
            }
        }

        for (int i = 0; (i < 99); i += 1, i += 1) {
            continue;
        }
    }

    // this is a line-level comment

    /* this is a block-level comment */

    /*
     * This is a
     * multi-line
     * block-level
     * comment.
     */

    // define statements
    #define FOOS 1
    #define debug(m,e) printf("%s:%d: %s:",__FILE__,__LINE__,m); print_obj(e,1); puts("");

    // structs
    typedef struct _struct_s1 s1;
    struct _struct_s1 {
        int bar;
    };

    // self-referential struct
    typedef struct _struct_list_t list_t;
    struct _struct_list_t {
        char *value;
        list_t *next;
    };


    // dotted access and assignment
    typedef struct _struct_dot2 dot2;
    struct _struct_dot2 {
        int c;
    };

    typedef struct _struct_dot1 dot1;
    struct _struct_dot1 {
        dot2 b;
    };

    void f18(void) {
        dot2 b;
        b.c = 42;
        dot1 a;
        a.b = b;
        a.b.c = 1;
        a.b.c = a.b.c;
    }

    // arrowed access and assignment
    typedef struct _struct_p2 p2;
    struct _struct_p2 {
        int *c;
    };

    typedef struct _struct_p1 p1;
    struct _struct_p1 {
        p2 *b;
    };

    void f18b(void) {
        int i = 42;
        p2 bb;
        p2 *b = (&(bb));
        b->c = (&(i));
        (*(b)).c = (&(i));
        p1 aa;
        p1 *a = (&(aa));
        a->b = b;
        a->b->c = (&(i));
        a->b->c = a->b->c;
    }

    // indexing
    void f19(void) {
        int a[3];
        a[0] = 42;
        a[0] = a[0];
        a[(1 + 1)] = 3;
        int b[3][3];
        b[0][0] = 0;
        b[2][2] = b[0][0];
    }


    // mixed access
    void f20(void) {
        int *(a[2]);
        (*(a[0])) = 1;
        int **(aa[2]);
        (*((*(aa[0])))) = 1;
        dot2 *(b[2]);
        b[0]->c = 1;
        b[0]->c = b[0]->c;
        int ii[8];
        int (*pii)[];
        (*(pii))[2] = 1;
        int (**ppii)[];
        (*((*(ppii))))[3] = 1;
        p2 ap2;
        (*(ap2.c)) = 1;
        p2 *app2 = (&(ap2));
        (*(app2->c)) = 1;
    }
    /* TODO working examples of function calls
        a = a.b()
        a = a->b()
        a = a[0]()
        a = foo().b
        a = foo()->b
        a = foo()[0]
        a = foo()()
        a.b() = 0
        a->b() = 0
        a[0]() = 0
        foo().b = 0
        foo()->b = 0
        foo()[0] = 0
        foo()() = 0
    */

    // static, extern, inline function declaration
    void sf1(void) {
        return;
    }

    void sif1(void) {
        return;
    }

    void ef1(void) {
        return;
    }

    void ei1(void) {
        return;
    }

    void i1(void) {
        return;
    }

    // forward-declaring function signatures
    void forf1(void);
    char* eforf1(int a, char b);


    // composite types
    void f21(void) {
        int a;
        unsigned char b;
        long long int c;
        char *d;
        unsigned int *e;
        signed char f[8];
        long double (*g)[];
        long double *(h[4]);
        int (*i)();
        char *((*(j[3]))());
        int k[8];
        char *(l[32]);
    }

    // storage for variables
    static int g1;
    extern int g2[];

    void f24(void) {
        register long int **g3;
    }

    // type qualifiers for variables
    int *restrict g4;
    int *volatile g5;
    long double volatile *g6;
    char volatile *volatile g7;

    // both
    extern int *_Atomic g8;
    static long int volatile *volatile g9;

    void f23(void) {
        // const
        // use https://cdecl.org/ to verify these
        int *const a;
        int const *b;
        int const *(c[2]);
        int const (*d)[3];
        int **e;
        int const **f;
        int *const *g;
        int **const h;
        int const *const *const i;
    }

    extern char volatile (*const *const z)[];

    int main(int argc, char **argv) {
        return 0;
    }
✅ test27.cy


## test.sh
#!/bin/bash

# run all of the tests

set -e

for f in test*.cy
do
    echo "👉 $f"

    echo "  input:"
    cat $f | sed 's/^/    /'

    outfile=`mktemp`
    ./cy.py $f > $outfile
    echo "  output:"
    cat $outfile | sed 's/^/    /'

    cfile="`basename $f .cy`.c"
    if diff -q $cfile $outfile >/dev/null
    then
        echo "✅ $f"
    else
        echo "❌ $f"
        diff -urN --color=auto $cfile $outfile
    fi

    echo
done

## test27.c
#include <stdbool.h>

int fint(void) {
    return 0;
}

int fint2(int x) {
    return 0;
}

int fint3(int a, int b, int c, int d, int e) {
    return 0;
}

void f1(void) {
    // declaration assignment
    int a;
    int b = 3;
    float c = -3.14159;
    bool d = true;
    bool e = false;
    char *f = "hello, world!";

    // assignment
    a = 1;
    a = (1 + (2 * (3 / 4)));
    a = fint();
    a = fint2(1);
    a = fint3(1, 2, (1 + 1), fint(), fint2(1));
}

void f2(void) {
    // declaration without assignment
    int a;
    char b;
    char *c;
    signed char *(d[8]);
}

void f3(void) {
    // integer literals
    int a;
    a = 1;
    a = -123;
    a = 1u;
    a = 1l;
    a = 0xFFFF;
    a = 0xDEADbeef;
    a = 0123;
}

void f4(void) {
    // float literals
    float a;
    a = 1.0;
    a = -3.14159;
    a = 1.0f;
    a = 1.0l;
    a = 4.0e-11;
    a = 1.0e+5;
    a = 3.0e3;
    a = 1.234e-6;
}

#include <stdbool.h>
void f5(void) {
    // boolean literals
    bool a;
    a = true;
    a = false;
}

void f6(void) {
    // string literals
    char *a;
    a = "Hello, world!";
    a = "I said \"Hello\" to the baker.";
    int *b = L"wide characters";
}

void f7(void) {
    // character literals
    char a;
    a = 'z';
    a = '\n';
    a = L'k';
}

void f8(void) {
    // assignment operators
    int a = 0;
    int b = 0;
    a = b;
    a += b;
    a -= b;
    a *= b;
    a /= b;
    a %= b;
    a &= b;
    a |= b;
    a ^= b;
    a <<= b;
    a >>= b;
}

void f9(void) {
    // unary operators
    int a = 0;
    int b = 0;
    int *c;
    a = (-(b));
    c = (&(b));
    a = (*(c));
    a = (!(b));
    a = (~(b));
    a = (!((!(b))));
}

void f10(void) {
    // binary operators
    int a = 0;
    int b = 0;
    a = (a + b);
    a = (a - b);
    a = (a * b);
    a = (a / b);
    a = (a % b);
    a = (a == b);
    a = (a != b);
    a = (a < b);
    a = (a > b);
    a = (a <= b);
    a = (a >= b);
    a = (a && b);
    a = (a || b);
    a = (a & b);
    a = (a | b);
    a = (a ^ b);
    a = (a << b);
    a = (a >> b);
}

void f11(void) {
    // ternary
    char a = (true ? 'a' : 'b');
    int b = (false ? 3 : (4 + 5));
    int c = (fint() ? ((int)a) : (4 + (6 << 1)));
}

void f12(void) {
    // casting (i.e. passing a type as a funcall arg)
    char a = 'a';
    int b = ((int)a);
    char *c;
    c = ((char*)(&(b)));
}

// return
void f13(void) {
    return;
}

int f13b(void) {
    return (1 + 1);
}

// FIXME returning a function pointer is broken
//func ff() -> pointer<function<int>>:
//    return fint

void f14(void) {
    // funcalls
    fint();
    fint2(1);
    fint2(fint2(1));
    //a: int = ff()()  // FIXME
}

void f15(void) {
    // scopes
    {
        return;
    }

    {
        int a = 1;
        return;
    }

    {
        int a = 1;
        a += 1;
    }

    {
        int b = (1 + fint());
        {
            float c = -1.1;
            return;
        }
    }
}

// function declaration
void foo(void) {
    return;
}

bool b1;
bool b2;
bool* foo2(int a, char *b) {
    float c = -1.1;
    if ((c < 0)) {
        return (&(b1));
    } else {
        return (&(b2));
    }
}

void f16(void) {
    // while loops
    while (1) {
        break;
    }
    int a = 0;
    while (true) {
        a += 1;
        if ((a > 42)) {
            break;
        } else {
            a += 2;
            continue;
        }
    }
}

void f17(void) {
    int i;
    // for loops
    for (; (i < 1); i += 1) {
        continue;
    }

    for (i = 2; (i < 42); i += 2) {
        foo();
    }

    for (int i = 0; (i < fint()); i += 1) {
        if ((i == 0)) {
            continue;
        } else {
            foo();
        }
    }

    for (int i = 0; (i < 99); i += 1, i += 1) {
        continue;
    }
}

// this is a line-level comment

/* this is a block-level comment */

/*
 * This is a
 * multi-line
 * block-level
 * comment.
 */

// define statements
#define FOOS 1
#define debug(m,e) printf("%s:%d: %s:",__FILE__,__LINE__,m); print_obj(e,1); puts("");

// structs
typedef struct _struct_s1 s1;
struct _struct_s1 {
    int bar;
};

// self-referential struct
typedef struct _struct_list_t list_t;
struct _struct_list_t {
    char *value;
    list_t *next;
};


// dotted access and assignment
typedef struct _struct_dot2 dot2;
struct _struct_dot2 {
    int c;
};

typedef struct _struct_dot1 dot1;
struct _struct_dot1 {
    dot2 b;
};

void f18(void) {
    dot2 b;
    b.c = 42;
    dot1 a;
    a.b = b;
    a.b.c = 1;
    a.b.c = a.b.c;
}

// arrowed access and assignment
typedef struct _struct_p2 p2;
struct _struct_p2 {
    int *c;
};

typedef struct _struct_p1 p1;
struct _struct_p1 {
    p2 *b;
};

void f18b(void) {
    int i = 42;
    p2 bb;
    p2 *b = (&(bb));
    b->c = (&(i));
    (*(b)).c = (&(i));
    p1 aa;
    p1 *a = (&(aa));
    a->b = b;
    a->b->c = (&(i));
    a->b->c = a->b->c;
}

// indexing
void f19(void) {
    int a[3];
    a[0] = 42;
    a[0] = a[0];
    a[(1 + 1)] = 3;
    int b[3][3];
    b[0][0] = 0;
    b[2][2] = b[0][0];
}


// mixed access
void f20(void) {
    int *(a[2]);
    (*(a[0])) = 1;
    int **(aa[2]);
    (*((*(aa[0])))) = 1;
    dot2 *(b[2]);
    b[0]->c = 1;
    b[0]->c = b[0]->c;
    int ii[8];
    int (*pii)[];
    (*(pii))[2] = 1;
    int (**ppii)[];
    (*((*(ppii))))[3] = 1;
    p2 ap2;
    (*(ap2.c)) = 1;
    p2 *app2 = (&(ap2));
    (*(app2->c)) = 1;
}
/* TODO working examples of function calls
    a = a.b()
    a = a->b()
    a = a[0]()
    a = foo().b
    a = foo()->b
    a = foo()[0]
    a = foo()()
    a.b() = 0
    a->b() = 0
    a[0]() = 0
    foo().b = 0
    foo()->b = 0
    foo()[0] = 0
    foo()() = 0
*/

// static, extern, inline function declaration
void sf1(void) {
    return;
}

void sif1(void) {
    return;
}

void ef1(void) {
    return;
}

void ei1(void) {
    return;
}

void i1(void) {
    return;
}

// forward-declaring function signatures
void forf1(void);
char* eforf1(int a, char b);


// composite types
void f21(void) {
    int a;
    unsigned char b;
    long long int c;
    char *d;
    unsigned int *e;
    signed char f[8];
    long double (*g)[];
    long double *(h[4]);
    int (*i)();
    char *((*(j[3]))());
    int k[8];
    char *(l[32]);
}

// storage for variables
static int g1;
extern int g2[];

void f24(void) {
    register long int **g3;
}

// type qualifiers for variables
int *restrict g4;
int *volatile g5;
long double volatile *g6;
char volatile *volatile g7;

// both
extern int *_Atomic g8;
static long int volatile *volatile g9;

void f23(void) {
    // const
    // use https://cdecl.org/ to verify these
    int *const a;
    int const *b;
    int const *(c[2]);
    int const (*d)[3];
    int **e;
    int const **f;
    int *const *g;
    int **const h;
    int const *const *const i;
}

extern char volatile (*const *const z)[];

int main(int argc, char **argv) {
    return 0;
}

## test27.cy
#include <stdbool.h>

func fint() -> int:
    return 0

func fint2(x: int) -> int:
    return 0

func fint3(a: int, b: int, c: int, d: int, e: int) -> int:
    return 0

func f1():
    // declaration assignment
    a: int
    b: int = 3
    c: float = -3.14159
    d: bool = true
    e: bool = false
    f: pointer<char> = "hello, world!"

    // assignment
    a = 1
    a = (1 + (2 * (3 / 4)))
    a = fint()
    a = fint2(1)
    a = fint3(1, 2, (1 + 1), fint(), fint2(1))

func f2():
    // declaration without assignment
    a: int
    b: char
    c: pointer<char>
    d: array[8]<pointer<signed char>>

func f3():
    // integer literals
    a: int
    a = 1
    a = -123
    a = 1u
    a = 1l
    a = 0xFFFF
    a = 0xDEADbeef
    a = 0123

func f4():
    // float literals
    a: float
    a = 1.0
    a = -3.14159
    a = 1.0f
    a = 1.0l
    a = 4.0e-11
    a = 1.0e+5
    a = 3.0e3
    a = 1.234e-6

#include <stdbool.h>
func f5():
    // boolean literals
    a: bool
    a = true
    a = false

func f6():
    // string literals
    a: pointer<char>
    a = "Hello, world!"
    a = "I said \"Hello\" to the baker."
    b: pointer<int> = L"wide characters"

func f7():
    // character literals
    a: char
    a = 'z'
    a = '\n'
    a = L'k'

func f8():
    // assignment operators
    a: int = 0
    b: int = 0
    a = b
    a += b
    a -= b
    a *= b
    a /= b
    a %= b
    a &= b
    a |= b
    a ^= b
    a <<= b
    a >>= b

func f9():
    // unary operators
    a: int = 0
    b: int = 0
    c: pointer<int>
    a = -(b)
    c = &(b)
    a = *(c)
    a = !(b)
    a = ~(b)
    a = !(!(b))

func f10():
    // binary operators
    a: int = 0
    b: int = 0
    a = (a + b)
    a = (a - b)
    a = (a * b)
    a = (a / b)
    a = (a % b)
    a = (a == b)
    a = (a != b)
    a = (a < b)
    a = (a > b)
    a = (a <= b)
    a = (a >= b)
    a = (a && b)
    a = (a || b)
    a = (a & b)
    a = (a | b)
    a = (a ^ b)
    a = (a << b)
    a = (a >> b)

func f11():
    // ternary
    a: char = (true ? 'a' : 'b')
    b: int = (false ? 3 : (4 + 5))
    c: int = (fint() ? cast(a, int) : (4 + (6 << 1)))

func f12():
    // casting (i.e. passing a type as a funcall arg)
    a: char = 'a'
    b: int = cast(a, int)
    c: pointer<char>
    c = cast(&(b), pointer<char>)

// return
func f13():
    return

func f13b() -> int:
    return (1 + 1)

// FIXME returning a function pointer is broken
//func ff() -> pointer<function<int>>:
//    return fint

func f14():
    // funcalls
    fint()
    fint2(1)
    fint2(fint2(1))
    //a: int = ff()()  // FIXME

func f15():
    // scopes
    :
        return

    :
        a: int = 1
        return

    :
        a: int = 1
        a += 1

    :
        b: int = (1 + fint())
        :
            c: float = -1.1
            return

// function declaration
func foo():
    return

b1: bool
b2: bool
func foo2(a: int, b: pointer<char>) -> pointer<bool>:
    c: float = -1.1
    if (c < 0):
        return &(b1)
    else:
        return &(b2)

func f16():
    // while loops
    while 1:
        break
    a: int = 0
    while true:
        a += 1
        if (a > 42):
            break
        else:
            a += 2
            continue

func f17():
    i: int
    // for loops
    for ; (i < 1); i += 1:
        continue

    for i = 2; (i < 42); i += 2:
        foo()

    for i: int = 0; (i < fint()); i += 1:
        if (i == 0):
            continue
        else:
            foo()

    for i: int = 0; (i < 99); i += 1, i += 1:
        continue

// this is a line-level comment

/* this is a block-level comment */

/*
 * This is a
 * multi-line
 * block-level
 * comment.
 */

// define statements
#define FOOS 1
#define debug(m,e) printf("%s:%d: %s:",__FILE__,__LINE__,m); print_obj(e,1); puts("");

// structs
struct s1:
    bar: int

// self-referential struct
struct list_t:
    value: pointer<char>
    next: pointer<list_t>


// dotted access and assignment
struct dot2:
    c: int

struct dot1:
    b: dot2

func f18():
    b: dot2
    b.c = 42
    a: dot1
    a.b = b
    a.b.c = 1
    a.b.c = a.b.c

// arrowed access and assignment
struct p2:
    c: pointer<int>

struct p1:
    b: pointer<p2>

func f18b():
    i: int = 42
    bb: p2
    b: pointer<p2> = &(bb)
    b->c = &(i)
    *(b).c = &(i)
    aa: p1
    a: pointer<p1> = &(aa)
    a->b = b
    a->b->c = &(i)
    a->b->c = a->b->c

// indexing
func f19():
    a: array[3]<int>
    a[0] = 42
    a[0] = a[0]
    a[(1 + 1)] = 3
    b: array[3]<array[3]<int>>
    b[0][0] = 0
    b[2][2] = b[0][0]


// mixed access
func f20():
    a: array[2]<pointer<int>>
    *(a[0]) = 1
    aa: array[2]<pointer<pointer<int>>>
    *(*(aa[0])) = 1
    b: array[2]<pointer<dot2>>
    b[0]->c = 1
    b[0]->c = b[0]->c
    ii: array[8]<int>
    pii: pointer<array<int>>
    *(pii)[2] = 1
    ppii: pointer<pointer<array<int>>>
    *(*(ppii))[3] = 1
    ap2: p2
    *(ap2.c) = 1
    app2: pointer<p2> = &(ap2)
    *(app2->c) = 1
/* TODO working examples of function calls
    a = a.b()
    a = a->b()
    a = a[0]()
    a = foo().b
    a = foo()->b
    a = foo()[0]
    a = foo()()
    a.b() = 0
    a->b() = 0
    a[0]() = 0
    foo().b = 0
    foo()->b = 0
    foo()[0] = 0
    foo()() = 0
*/

// static, extern, inline function declaration
static func sf1():
    return

static inline func sif1():
    return

extern func ef1():
    return

extern inline func ei1():
    return

inline func i1():
    return

// forward-declaring function signatures
func forf1()
extern func eforf1(a: int, b: char) -> pointer<char>


// composite types
func f21():
    a: int
    b: unsigned char
    c: long long int
    d: pointer<char>
    e: pointer<unsigned int>
    f: array[8]<signed char>
    g: pointer<array<long double>>
    h: array[4]<pointer<long double>>
    i: pointer<function<int>>
    j: array[3]<pointer<function<pointer<char>>>>
    k: array[8]<int>
    l: array[32]<pointer<char>>

// storage for variables
static g1: int
extern g2: array<int>

func f24():
    register g3: pointer<pointer<long int>>

// type qualifiers for variables
g4: restrict<pointer<int>>
g5: volatile<pointer<int>>
g6: pointer<volatile<long double>>
g7: volatile<pointer<volatile<char>>>

// both
extern g8: _Atomic<pointer<int>>
static g9: volatile<pointer<volatile<long int>>>

func f23():
    // const
    // use https://cdecl.org/ to verify these
    a: const<pointer<int>>
    b: pointer<const<int>>
    c: array[2]<pointer<const<int>>>
    d: pointer<array[3]<const<int>>>
    e: pointer<pointer<int>>
    f: pointer<pointer<const<int>>>
    g: pointer<const<pointer<int>>>
    h: const<pointer<pointer<int>>>
    i: const<pointer<const<pointer<const<int>>>>>

extern z: const<pointer<const<pointer<array<volatile<char>>>>>>

func main(argc: int, argv: pointer<pointer<char>>) -> int:
    return 0

## tokendefs.txt
FLOATLIT
[-\+]?\d+\.\d+([eE][-\+]?\d+)?[fFlL]?
INTLIT
((0x[0-9a-fA-F]+)|(0[0-7]+)|([-\+]?[0-9]+))[uUlL]*
STRINGLIT
L?"([^"\\]|\\[\s\S])*"
CHARLIT
L?'([^'\\]|\\[\s\S])*'
IDENTIFIER
[a-zA-Z_][a-zA-Z0-9_]*
LCOMMENT
//.*
BCOMMENT
/\*[\s\S]*?\*/
INCLUDE
#include .*
DEFINE
#define .*
LTLTEQ
<<=
GTGTEQ
>>=
ARROW
->
LTEQ
<=
GTEQ
>=
EQEQ
==
BANGEQ
!=
PLUSEQ
\+=
MINUSEQ
-=
STAREQ
\*=
SLASHEQ
/=
PERCENTEQ
%=
AMPEQ
&=
CARATEQ
\^=
PIPEEQ
\|=
AMPAMP
&&
BARBAR
\|\|
LTLT
<<
TILDE
~
CARAT
\^
COLON
:
SEMICOLON
;
COMMA
,
PERIOD
\.
PLUS
\+
MINUS
-
STAR
\*
SLASH
/
PERCENT
%
LT
<
GT
>
EQ
=
AMP
&
BAR
\|
BANG
!
QMARK
\?
OBRACKET
\[
CBRACKET
]
OPAREN
\(
CPAREN
\)
S
 +
NL
\n
	pointer
	POINTER
	array
	ARRAY
	function
	FUNCTION
	func
	FUNC
	return
	RETURN
	true
	BOOLLIT
	false
	BOOLLIT
	if
	IF
	elif
	ELIF
	else
	ELSE
	while
	WHILE
	for
	FOR
	break
	BREAK
	continue
	CONTINUE
	struct
	STRUCT
	cast
	CAST
	auto
	AUTO
	register
	REGISTER
	static
	STATIC
	extern
	EXTERN
	_Thread_local
	THREADLOCAL
	inline
	INLINE
	_Noreturn
	NORETURN
	const
	CONST
	volatile
	VOLATILE
	restrict
	RESTRICT
	_Atomic
	ATOMIC
	👉 test27.cy
	input:
	#include <stdbool.h>

	func fint() -> int:
	return 0

	func fint2(x: int) -> int:
	return 0

	func fint3(a: int, b: int, c: int, d: int, e: int) -> int:
	return 0

	func f1():
	// declaration assignment
	a: int
	b: int = 3
	c: float = -3.14159
	d: bool = true
	e: bool = false
	f: pointer<char> = "hello, world!"

	// assignment
	a = 1
	a = (1 + (2 * (3 / 4)))
	a = fint()
	a = fint2(1)
	a = fint3(1, 2, (1 + 1), fint(), fint2(1))

	func f2():
	// declaration without assignment
	a: int
	b: char
	c: pointer<char>
	d: array[8]<pointer<signed char>>

	func f3():
	// integer literals
	a: int
	a = 1
	a = -123
	a = 1u
	a = 1l
	a = 0xFFFF
	a = 0xDEADbeef
	a = 0123

	func f4():
	// float literals
	a: float
	a = 1.0
	a = -3.14159
	a = 1.0f
	a = 1.0l
	a = 4.0e-11
	a = 1.0e+5
	a = 3.0e3
	a = 1.234e-6

	#include <stdbool.h>
	func f5():
	// boolean literals
	a: bool
	a = true
	a = false

	func f6():
	// string literals
	a: pointer<char>
	a = "Hello, world!"
	a = "I said \"Hello\" to the baker."
	b: pointer<int> = L"wide characters"

	func f7():
	// character literals
	a: char
	a = 'z'
	a = '\n'
	a = L'k'

	func f8():
	// assignment operators
	a: int = 0
	b: int = 0
	a = b
	a += b
	a -= b
	a *= b
	a /= b
	a %= b
	a &= b
	a \|= b
	a ^= b
	a <<= b
	a >>= b

	func f9():
	// unary operators
	a: int = 0
	b: int = 0
	c: pointer<int>
	a = -(b)
	c = &(b)
	a = *(c)
	a = !(b)
	a = ~(b)
	a = !(!(b))

	func f10():
	// binary operators
	a: int = 0
	b: int = 0
	a = (a + b)
	a = (a - b)
	a = (a * b)
	a = (a / b)
	a = (a % b)
	a = (a == b)
	a = (a != b)
	a = (a < b)
	a = (a > b)
	a = (a <= b)
	a = (a >= b)
	a = (a && b)
	a = (a \|\| b)
	a = (a & b)
	a = (a \| b)
	a = (a ^ b)
	a = (a << b)
	a = (a >> b)

	func f11():
	// ternary
	a: char = (true ? 'a' : 'b')
	b: int = (false ? 3 : (4 + 5))
	c: int = (fint() ? cast(a, int) : (4 + (6 << 1)))

	func f12():
	// casting (i.e. passing a type as a funcall arg)
	a: char = 'a'
	b: int = cast(a, int)
	c: pointer<char>
	c = cast(&(b), pointer<char>)

	// return
	func f13():
	return

	func f13b() -> int:
	return (1 + 1)

	// FIXME returning a function pointer is broken
	//func ff() -> pointer<function<int>>:
	// return fint

	func f14():
	// funcalls
	fint()
	fint2(1)
	fint2(fint2(1))
	//a: int = ff()() // FIXME

	func f15():
	// scopes
	:
	return

	:
	a: int = 1
	return

	:
	a: int = 1
	a += 1

	:
	b: int = (1 + fint())
	:
	c: float = -1.1
	return

	// function declaration
	func foo():
	return

	b1: bool
	b2: bool
	func foo2(a: int, b: pointer<char>) -> pointer<bool>:
	c: float = -1.1
	if (c < 0):
	return &(b1)
	else:
	return &(b2)

	func f16():
	// while loops
	while 1:
	break
	a: int = 0
	while true:
	a += 1
	if (a > 42):
	break
	else:
	a += 2
	continue

	func f17():
	i: int
	// for loops
	for ; (i < 1); i += 1:
	continue

	for i = 2; (i < 42); i += 2:
	foo()

	for i: int = 0; (i < fint()); i += 1:
	if (i == 0):
	continue
	else:
	foo()

	for i: int = 0; (i < 99); i += 1, i += 1:
	continue

	// this is a line-level comment

	/* this is a block-level comment */

	/*
	* This is a
	* multi-line
	* block-level
	* comment.
	*/

	// define statements
	#define FOOS 1
	#define debug(m,e) printf("%s:%d: %s:",__FILE__,__LINE__,m); print_obj(e,1); puts("");

	// structs
	struct s1:
	bar: int

	// self-referential struct
	struct list_t:
	value: pointer<char>
	next: pointer<list_t>


	// dotted access and assignment
	struct dot2:
	c: int

	struct dot1:
	b: dot2

	func f18():
	b: dot2
	b.c = 42
	a: dot1
	a.b = b
	a.b.c = 1
	a.b.c = a.b.c

	// arrowed access and assignment
	struct p2:
	c: pointer<int>

	struct p1:
	b: pointer<p2>

	func f18b():
	i: int = 42
	bb: p2
	b: pointer<p2> = &(bb)
	b->c = &(i)
	*(b).c = &(i)
	aa: p1
	a: pointer<p1> = &(aa)
	a->b = b
	a->b->c = &(i)
	a->b->c = a->b->c

	// indexing
	func f19():
	a: array[3]<int>
	a[0] = 42
	a[0] = a[0]
	a[(1 + 1)] = 3
	b: array[3]<array[3]<int>>
	b[0][0] = 0
	b[2][2] = b[0][0]


	// mixed access
	func f20():
	a: array[2]<pointer<int>>
	*(a[0]) = 1
	aa: array[2]<pointer<pointer<int>>>
	((aa[0])) = 1
	b: array[2]<pointer<dot2>>
	b[0]->c = 1
	b[0]->c = b[0]->c
	ii: array[8]<int>
	pii: pointer<array<int>>
	*(pii)[2] = 1
	ppii: pointer<pointer<array<int>>>
	((ppii))[3] = 1
	ap2: p2
	*(ap2.c) = 1
	app2: pointer<p2> = &(ap2)
	*(app2->c) = 1
	/* TODO working examples of function calls
	a = a.b()
	a = a->b()
	a = a[0]()
	a = foo().b
	a = foo()->b
	a = foo()[0]
	a = foo()()
	a.b() = 0
	a->b() = 0
	a[0]() = 0
	foo().b = 0
	foo()->b = 0
	foo()[0] = 0
	foo()() = 0
	*/

	// static, extern, inline function declaration
	static func sf1():
	return

	static inline func sif1():
	return

	extern func ef1():
	return

	extern inline func ei1():
	return

	inline func i1():
	return

	// forward-declaring function signatures
	func forf1()
	extern func eforf1(a: int, b: char) -> pointer<char>


	// composite types
	func f21():
	a: int
	b: unsigned char
	c: long long int
	d: pointer<char>
	e: pointer<unsigned int>
	f: array[8]<signed char>
	g: pointer<array<long double>>
	h: array[4]<pointer<long double>>
	i: pointer<function<int>>
	j: array[3]<pointer<function<pointer<char>>>>
	k: array[8]<int>
	l: array[32]<pointer<char>>

	// storage for variables
	static g1: int
	extern g2: array<int>

	func f24():
	register g3: pointer<pointer<long int>>

	// type qualifiers for variables
	g4: restrict<pointer<int>>
	g5: volatile<pointer<int>>
	g6: pointer<volatile<long double>>
	g7: volatile<pointer<volatile<char>>>

	// both
	extern g8: _Atomic<pointer<int>>
	static g9: volatile<pointer<volatile<long int>>>

	func f23():
	// const
	// use https://cdecl.org/ to verify these
	a: const<pointer<int>>
	b: pointer<const<int>>
	c: array[2]<pointer<const<int>>>
	d: pointer<array[3]<const<int>>>
	e: pointer<pointer<int>>
	f: pointer<pointer<const<int>>>
	g: pointer<const<pointer<int>>>
	h: const<pointer<pointer<int>>>
	i: const<pointer<const<pointer<const<int>>>>>

	extern z: const<pointer<const<pointer<array<volatile<char>>>>>>

	func main(argc: int, argv: pointer<pointer<char>>) -> int:
	return 0
	output:
	#include <stdbool.h>

	int fint(void) {
	return 0;
	}

	int fint2(int x) {
	return 0;
	}

	int fint3(int a, int b, int c, int d, int e) {
	return 0;
	}

	void f1(void) {
	// declaration assignment
	int a;
	int b = 3;
	float c = -3.14159;
	bool d = true;
	bool e = false;
	char *f = "hello, world!";

	// assignment
	a = 1;
	a = (1 + (2 * (3 / 4)));
	a = fint();
	a = fint2(1);
	a = fint3(1, 2, (1 + 1), fint(), fint2(1));
	}

	void f2(void) {
	// declaration without assignment
	int a;
	char b;
	char *c;
	signed char *(d[8]);
	}

	void f3(void) {
	// integer literals
	int a;
	a = 1;
	a = -123;
	a = 1u;
	a = 1l;
	a = 0xFFFF;
	a = 0xDEADbeef;
	a = 0123;
	}

	void f4(void) {
	// float literals
	float a;
	a = 1.0;
	a = -3.14159;
	a = 1.0f;
	a = 1.0l;
	a = 4.0e-11;
	a = 1.0e+5;
	a = 3.0e3;
	a = 1.234e-6;
	}

	#include <stdbool.h>
	void f5(void) {
	// boolean literals
	bool a;
	a = true;
	a = false;
	}

	void f6(void) {
	// string literals
	char *a;
	a = "Hello, world!";
	a = "I said \"Hello\" to the baker.";
	int *b = L"wide characters";
	}

	void f7(void) {
	// character literals
	char a;
	a = 'z';
	a = '\n';
	a = L'k';
	}

	void f8(void) {
	// assignment operators
	int a = 0;
	int b = 0;
	a = b;
	a += b;
	a -= b;
	a *= b;
	a /= b;
	a %= b;
	a &= b;
	a \|= b;
	a ^= b;
	a <<= b;
	a >>= b;
	}

	void f9(void) {
	// unary operators
	int a = 0;
	int b = 0;
	int *c;
	a = (-(b));
	c = (&(b));
	a = (*(c));
	a = (!(b));
	a = (~(b));
	a = (!((!(b))));
	}

	void f10(void) {
	// binary operators
	int a = 0;
	int b = 0;
	a = (a + b);
	a = (a - b);
	a = (a * b);
	a = (a / b);
	a = (a % b);
	a = (a == b);
	a = (a != b);
	a = (a < b);
	a = (a > b);
	a = (a <= b);
	a = (a >= b);
	a = (a && b);
	a = (a \|\| b);
	a = (a & b);
	a = (a \| b);
	a = (a ^ b);
	a = (a << b);
	a = (a >> b);
	}

	void f11(void) {
	// ternary
	char a = (true ? 'a' : 'b');
	int b = (false ? 3 : (4 + 5));
	int c = (fint() ? ((int)a) : (4 + (6 << 1)));
	}

	void f12(void) {
	// casting (i.e. passing a type as a funcall arg)
	char a = 'a';
	int b = ((int)a);
	char *c;
	c = ((char*)(&(b)));
	}

	// return
	void f13(void) {
	return;
	}

	int f13b(void) {
	return (1 + 1);
	}

	// FIXME returning a function pointer is broken
	//func ff() -> pointer<function<int>>:
	// return fint

	void f14(void) {
	// funcalls
	fint();
	fint2(1);
	fint2(fint2(1));
	//a: int = ff()() // FIXME
	}

	void f15(void) {
	// scopes
	{
	return;
	}

	{
	int a = 1;
	return;
	}

	{
	int a = 1;
	a += 1;
	}

	{
	int b = (1 + fint());
	{
	float c = -1.1;
	return;
	}
	}
	}

	// function declaration
	void foo(void) {
	return;
	}

	bool b1;
	bool b2;
	bool* foo2(int a, char *b) {
	float c = -1.1;
	if ((c < 0)) {
	return (&(b1));
	} else {
	return (&(b2));
	}
	}

	void f16(void) {
	// while loops
	while (1) {
	break;
	}
	int a = 0;
	while (true) {
	a += 1;
	if ((a > 42)) {
	break;
	} else {
	a += 2;
	continue;
	}
	}
	}

	void f17(void) {
	int i;
	// for loops
	for (; (i < 1); i += 1) {
	continue;
	}

	for (i = 2; (i < 42); i += 2) {
	foo();
	}

	for (int i = 0; (i < fint()); i += 1) {
	if ((i == 0)) {
	continue;
	} else {
	foo();
	}
	}

	for (int i = 0; (i < 99); i += 1, i += 1) {
	continue;
	}
	}

	// this is a line-level comment

	/* this is a block-level comment */

	/*
	* This is a
	* multi-line
	* block-level
	* comment.
	*/

	// define statements
	#define FOOS 1
	#define debug(m,e) printf("%s:%d: %s:",__FILE__,__LINE__,m); print_obj(e,1); puts("");

	// structs
	typedef struct _struct_s1 s1;
	struct _struct_s1 {
	int bar;
	};

	// self-referential struct
	typedef struct _struct_list_t list_t;
	struct _struct_list_t {
	char *value;
	list_t *next;
	};


	// dotted access and assignment
	typedef struct _struct_dot2 dot2;
	struct _struct_dot2 {
	int c;
	};

	typedef struct _struct_dot1 dot1;
	struct _struct_dot1 {
	dot2 b;
	};

	void f18(void) {
	dot2 b;
	b.c = 42;
	dot1 a;
	a.b = b;
	a.b.c = 1;
	a.b.c = a.b.c;
	}

	// arrowed access and assignment
	typedef struct _struct_p2 p2;
	struct _struct_p2 {
	int *c;
	};

	typedef struct _struct_p1 p1;
	struct _struct_p1 {
	p2 *b;
	};

	void f18b(void) {
	int i = 42;
	p2 bb;
	p2 *b = (&(bb));
	b->c = (&(i));
	(*(b)).c = (&(i));
	p1 aa;
	p1 *a = (&(aa));
	a->b = b;
	a->b->c = (&(i));
	a->b->c = a->b->c;
	}

	// indexing
	void f19(void) {
	int a[3];
	a[0] = 42;
	a[0] = a[0];
	a[(1 + 1)] = 3;
	int b[3][3];
	b[0][0] = 0;
	b[2][2] = b[0][0];
	}


	// mixed access
	void f20(void) {
	int *(a[2]);
	(*(a[0])) = 1;
	int **(aa[2]);
	((((aa[0])))) = 1;
	dot2 *(b[2]);
	b[0]->c = 1;
	b[0]->c = b[0]->c;
	int ii[8];
	int (*pii)[];
	(*(pii))[2] = 1;
	int (**ppii)[];
	((((ppii))))[3] = 1;
	p2 ap2;
	(*(ap2.c)) = 1;
	p2 *app2 = (&(ap2));
	(*(app2->c)) = 1;
	}
	/* TODO working examples of function calls
	a = a.b()
	a = a->b()
	a = a[0]()
	a = foo().b
	a = foo()->b
	a = foo()[0]
	a = foo()()
	a.b() = 0
	a->b() = 0
	a[0]() = 0
	foo().b = 0
	foo()->b = 0
	foo()[0] = 0
	foo()() = 0
	*/

	// static, extern, inline function declaration
	void sf1(void) {
	return;
	}

	void sif1(void) {
	return;
	}

	void ef1(void) {
	return;
	}

	void ei1(void) {
	return;
	}

	void i1(void) {
	return;
	}

	// forward-declaring function signatures
	void forf1(void);
	char* eforf1(int a, char b);


	// composite types
	void f21(void) {
	int a;
	unsigned char b;
	long long int c;
	char *d;
	unsigned int *e;
	signed char f[8];
	long double (*g)[];
	long double *(h[4]);
	int (*i)();
	char (((j[3]))());
	int k[8];
	char *(l[32]);
	}

	// storage for variables
	static int g1;
	extern int g2[];

	void f24(void) {
	register long int **g3;
	}

	// type qualifiers for variables
	int *restrict g4;
	int *volatile g5;
	long double volatile *g6;
	char volatile *volatile g7;

	// both
	extern int *_Atomic g8;
	static long int volatile *volatile g9;

	void f23(void) {
	// const
	// use https://cdecl.org/ to verify these
	int *const a;
	int const *b;
	int const *(c[2]);
	int const (*d)[3];
	int **e;
	int const **f;
	int const g;
	int **const h;
	int const const const i;
	}

	extern char volatile (const const z)[];

	int main(int argc, char **argv) {
	return 0;
	}
	✅ test27.cy
	#!/bin/bash

	# run all of the tests

	set -e

	for f in test*.cy
	do
	echo "👉 $f"

	echo " input:"
	cat $f \| sed 's/^/ /'

	outfile=`mktemp`
	./cy.py $f > $outfile
	echo " output:"
	cat $outfile \| sed 's/^/ /'

	cfile="`basename $f .cy`.c"
	if diff -q $cfile $outfile >/dev/null
	then
	echo "✅ $f"
	else
	echo "❌ $f"
	diff -urN --color=auto $cfile $outfile
	fi

	echo
	done
	FLOATLIT
	[-\+]?\d+\.\d+([eE][-\+]?\d+)?[fFlL]?
	INTLIT
	((0x[0-9a-fA-F]+)\|(0[0-7]+)\|([-\+]?[0-9]+))[uUlL]*
	STRINGLIT
	L?"([^"\\]\|\\[\s\S])*"
	CHARLIT
	L?'([^'\\]\|\\[\s\S])*'
	IDENTIFIER
	[a-zA-Z_][a-zA-Z0-9_]*
	LCOMMENT
	//.*
	BCOMMENT
	/\[\s\S]?\*/
	INCLUDE
	#include .*
	DEFINE
	#define .*
	LTLTEQ
	<<=
	GTGTEQ
	>>=
	ARROW
	->
	LTEQ
	<=
	GTEQ
	>=
	EQEQ
	==
	BANGEQ
	!=
	PLUSEQ
	\+=
	MINUSEQ
	-=
	STAREQ
	\*=
	SLASHEQ
	/=
	PERCENTEQ
	%=
	AMPEQ
	&=
	CARATEQ
	\^=
	PIPEEQ
	\\|=
	AMPAMP
	&&
	BARBAR
	\\|\\|
	LTLT
	<<
	TILDE
	~
	CARAT
	\^
	COLON
	:
	SEMICOLON
	;
	COMMA
	,
	PERIOD
	\.
	PLUS
	\+
	MINUS
	-
	STAR
	\*
	SLASH
	/
	PERCENT
	%
	LT
	<
	GT
	>
	EQ
	=
	AMP
	&
	BAR
	\\|
	BANG
	!
	QMARK
	\?
	OBRACKET
	\[
	CBRACKET
	]
	OPAREN
	\(
	CPAREN
	\)
	S
	+
	NL
	\n