|
#!/usr/bin/env python |
|
# -*- coding: utf-8 -*- |
|
|
|
# cy.py: a traspiler for an alternate syntax for C. |
|
# Note: this is an incomplete work-in-progress. |
|
|
|
|
|
# Data structures: |
|
|
|
# A token is a tuple. |
|
# First element is a string 'token'. |
|
# Second element is a string which is the token type (e.g. 'OBRACKET'). |
|
# Third element is a string which is the source text of the token (e.g. '['). |
|
# e.g. ('token', 'OBRACKET', '[') |
|
# e.g. ('token', 'IDENTIFIER', 'foo') |
|
# e.g. ('token', 'S', ' ') |
|
|
|
|
|
# An AST node is a tuple. |
|
# First element is a string 'ast'. |
|
# Second element is a string which is the AST node type (e.g. 'vardecl'). |
|
# Third element is an array which are the child tuples (AST nodes or tokens). |
|
# e.g. ('ast', 'type', [('token', 'IDENTIFIER', 'int')]) |
|
|
|
|
|
# The following EBNF notation is: |
|
# - lowercase are AST nodes |
|
# - ALLCAPS are tokens (terminals) |
|
# - Concatenation: rule1 = rule2 rule3 rule4 |
|
# - Alternation: rule1 = rule2 | rule3 | rule4 |
|
# - Repetition: rule1 = rule2 { rule3 } |
|
# - Grouping: rule1 = (rule2 rule3) | rule4 |
|
|
|
# Grammar: |
|
# |
|
# program = statement { statement } |
|
# |
|
# statement = ( linestmt NL ) | blockstmt |
|
# linestmt = declassign | assign | vardecl | rvalue | return | BREAK | CONTINUE | INCLUDE | DEFINE |
|
# blockstmt = fundecl | if | while | for | struct | scope | comment | blankline |
|
# comment = [S] ( LCOMMENT | BCOMMENT ) NL |
|
# blankline = [S] NL |
|
# |
|
# declassign = vardecl S EQ S rvalue |
|
# assign = rvalue S aoperator S rvalue |
|
# aoperator = PLUSEQ | MINUSEQ | STAREQ | SLASHEQ | PERCENTEQ |
|
# | AMPEQ | PIPEEQ | CARATEQ | LTLTEQ | GTGTEQ |
|
# | EQ |
|
# vardecl = [storage S] IDENTIFIER COLON S type |
|
# |
|
# storage = [ AUTO | REGISTER | STATIC | EXTERN ] [THREADLOCAL] |
|
# |
|
# fundecl = [storage S] {fspecifier S} FUNC S IDENTIFIER fundeclargs [fundeclret] (scope | NL) |
|
# fspecifier = INLINE | NORETURN |
|
# fundeclargs = OPAREN [ vardecl { COMMA S vardecl } ] CPAREN |
|
# fundeclret = S ARROW S type |
|
# |
|
# return = RETURN [ S rvalue ] |
|
# |
|
# if = IF S rvalue scope { elif } [else] |
|
# elif = ELIF S rvalue scope |
|
# else = ELSE scope |
|
# while = WHILE S rvalue scope |
|
# for = FOR S [linestmt] SEMICOLON S rvalue SEMICOLON S linestmt { COMMA S linestmt } scope |
|
# scope = COLON NL INDENT statement { statement } DEDENT |
|
# struct = STRUCT S IDENFITIER structscope |
|
# structscope = COLON NL INDENT vardecl NL { vardecl NL } DEDENT |
|
# |
|
# rvalue = ( ( ternary | binary | unary | IDENTIFIER ) { dotted | arrowed | indexed | called } ) |
|
# | type | cast | INTLIT | FLOATLIT | STRINGLIT | CHARLIT | BOOLLIT |
|
# cast = CAST called |
|
# |
|
# unary = uoperator OPAREN rvalue CPAREN |
|
# uoperator = MINUS | AMP | STAR | BANG | TILDE |
|
# binary = OPAREN rvalue S boperator S rvalue CPAREN |
|
# boperator = LTLT | GTGT | LTEQ | GTEQ | EQEQ | BANGEQ | AMPAMP | BARBAR |
|
# | PLUS | MINUS | STAR | SLASH | PERCENT | LT | GT |
|
# | BANG | AMP | BAR | TILDE | CARAT |
|
# ternary | OPAREN rvalue QMARK S rvalue S COLON S rvalue CPAREN |
|
# |
|
# dotted = PERIOD IDENTIFIER |
|
# arrowed = ARROW IDENTIFIER |
|
# indexed = OBRACKET rvalue CBRACKET |
|
# called = OPAREN [ rvalue { COMMA S rvalue } ] CPAREN |
|
# |
|
# type = ARRAY OBRACKET rvalue CBRACKET LT type GT |
|
# | ( ARRAY | POINTER | FUNCTION | tqualifier ) LT type GT |
|
# | IDENTIFIER { S IDENTIFIER } |
|
# tqualifier = CONST | VOLATILE | RESTRICT | ATOMIC |
|
# |
|
# ws = (S | NL) { (S | NL) } |
|
|
|
|
|
# Token data structure utils: |
|
|
|
def is_token(token): |
|
"""Returns whether the arg is a token.""" |
|
return isinstance(token, tuple) and len(token) > 0 and token[0] == 'token' |
|
|
|
|
|
def token_type(token): |
|
"""Returns the type of the token (e.g. 'IDENTIFIER').""" |
|
assert is_token(token) |
|
return token[1] |
|
|
|
|
|
def token_text(token): |
|
"""Returns the source text of the token (e.g. 'foo' for a 'IDENTIFIER' token).""" |
|
assert is_token(token) |
|
return token[2] |
|
|
|
|
|
def is_toktype(token, toktype): |
|
""" |
|
Returns whether the token is of the given type. |
|
_toktype_ is e.g. 'IDENTIFIER', 'COLON', etc. |
|
""" |
|
return is_token(token) and token_type(token) == toktype |
|
|
|
|
|
|
|
# Tokenizer implementation: |
|
|
|
def tokenize(tokendefs, keyword_map, input, indent=0): |
|
"""Uses tokendefs to tokenize the 'input' string and return a list of tokens""" |
|
tokens = [] |
|
offset = 0 |
|
previous = None |
|
while offset < len(input): |
|
for (token_name, regex) in tokendefs: |
|
m = regex.match(input, offset) |
|
if m is not None: |
|
matched_text = m.group(0) |
|
offset += len(matched_text) |
|
token = ('token', token_name, matched_text) |
|
|
|
# special-case: distinguish keywords from identifiers |
|
token = keyword_specialcases(token, keyword_map) |
|
|
|
# special-case: implement "offside-rule" |
|
(tokens2, indent) = offside_rule(token, previous, indent) |
|
|
|
tokens += tokens2 |
|
previous = token |
|
break |
|
else: |
|
raise Exception("Couldn't tokenize starting at '%s...'" % input[offset:offset+16]) |
|
|
|
# special-case: add any needed implicit trailing DEDENT tokens |
|
while indent > 0: |
|
tokens.append(('token', 'DEDENT', '')) |
|
indent -= 1 |
|
|
|
return tokens |
|
|
|
|
|
INDENT_UNIT=4 |
|
def offside_rule(token, previous, indent_level): |
|
""" |
|
Implement the "offside rule" indentation-based scoping mechanism. |
|
See https://en.wikipedia.org/wiki/Off-side_rule |
|
|
|
Args: |
|
- token: the current token |
|
- previous: the previous token |
|
- indent_level: the indent level before encounting this token |
|
|
|
Returns a list of replacement tokens and the new indentation level. |
|
|
|
If the previous token was a NL and the indentation level has changed, |
|
inject INDENT or DEDENT tokens as needed. |
|
Otherwise, return the token unmodified. |
|
|
|
Big-picture example: the token stream |
|
COLON NL S RETURN NL |
|
should become |
|
COLON NL INDENT RETURN NL DEDENT |
|
""" |
|
|
|
def get_indent_level(text): |
|
""" |
|
Determine the indentation level of the text. |
|
TODO: this implementation is pretty naive. |
|
""" |
|
indentation = text.split('\n')[-1] |
|
assert len(indentation) % INDENT_UNIT == 0 |
|
return len(indentation) / INDENT_UNIT |
|
|
|
assert is_token(token) |
|
|
|
if previous is None: |
|
return ([token], indent_level) |
|
assert is_token(previous) |
|
|
|
if not is_toktype(previous, 'NL'): |
|
return ([token], indent_level) |
|
|
|
tokens = [] |
|
if is_toktype(token, 'S'): |
|
text = token_text(token) |
|
new_level = get_indent_level(text) |
|
else: |
|
new_level = 0 |
|
|
|
for _ in range(new_level - indent_level): |
|
tokens.append(('token', 'INDENT', ' ')) |
|
for _ in range(indent_level - new_level): |
|
tokens.append(('token', 'DEDENT', '')) |
|
|
|
if not is_toktype(token, 'S'): |
|
tokens.append(token) |
|
|
|
return (tokens, new_level) |
|
|
|
|
|
def keyword_specialcases(token, keyword_map): |
|
""" |
|
Add a special-case which turns IDENTIFIER tokens into keyword tokens. |
|
|
|
e.g.: |
|
- 'array' changes from an IDENTIFIER token to an ARRAY token. |
|
- 'true' changes from an IDENTIFIER token to a BOOLLIT token. |
|
""" |
|
if is_toktype(token, 'IDENTIFIER'): |
|
text = token_text(token) |
|
if text in keyword_map.keys(): |
|
toktype = keyword_map[text] |
|
return ('token', toktype, text) |
|
return token |
|
|
|
|
|
def load_tokendefs(fpath): |
|
""" |
|
Load the token definitions from the file at fpath. |
|
|
|
Returns a list of tuples of the form (<token type>, <compiled regex>). |
|
|
|
The format of the tokendefs file should be pairs of lines: |
|
- a line which is the token type (e.g. 'IDENTIFIER', 'OBRACKET', etc), |
|
- followed by a line which is the regex which recognizes that token. |
|
|
|
Example tokendefs.txt: |
|
IDENTIFIER |
|
[a-zA-Z][a-zA-Z0-9]* |
|
OPAREN |
|
\( |
|
CPAREN |
|
) |
|
""" |
|
import re |
|
tokendefs = [] |
|
with open(fpath) as f: |
|
for line in f: |
|
token_name = line.rstrip('\n') |
|
pattern = f.next().rstrip('\n') |
|
regex = re.compile(pattern) |
|
tokendef = (token_name, regex) |
|
tokendefs.append(tokendef) |
|
return tokendefs |
|
|
|
|
|
def load_keywords(fpath): |
|
""" |
|
Load the keyword definitions from the file at the path. |
|
|
|
Returns a dict which maps keywords to toktypes. |
|
|
|
The format of the keywords file should be pairs of lines: |
|
- a keywork (e.g. 'true'), |
|
- followed by a line which is the toktype of that keyword (e.g. 'BOOLLIT'). |
|
|
|
Example keywords.txt file: |
|
true |
|
BOOLLIT |
|
false |
|
BOOLLIT |
|
array |
|
ARRAY |
|
return |
|
RETURN |
|
""" |
|
keyword_map = {} |
|
with open(fpath) as f: |
|
for line in f: |
|
keyword = line.rstrip('\n') |
|
toktype = f.next().rstrip('\n') |
|
keyword_map[keyword] = toktype |
|
return keyword_map |
|
|
|
|
|
|
|
# AST node data structure utils: |
|
|
|
def is_ast(ast): |
|
"""Returns whether the arg is an AST node.""" |
|
return isinstance(ast, tuple) and len(ast) > 0 and ast[0] == 'ast' |
|
|
|
|
|
def ast_type(ast): |
|
"""Returns the type of the AST (e.g. 'vardecl', 'statement', etc.)""" |
|
assert is_ast(ast) |
|
return ast[1] |
|
|
|
|
|
def is_asttype(ast, atype): |
|
""" |
|
Returns whether the AST node is of the given type. |
|
atype is e.g. 'vardecl', 'statement', etc. |
|
""" |
|
return is_ast(ast) and ast_type(ast) == atype |
|
|
|
|
|
def ast_children(ast): |
|
"""Return the child nodes of the given AST.""" |
|
assert is_ast(ast) |
|
return ast[2] |
|
|
|
|
|
|
|
# Parser helpers: |
|
# See https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form#Table_of_symbols |
|
|
|
def alternation(tokens, funcs): |
|
"""Try each of the parsing functions until one succeeds.""" |
|
failure = (None, tokens) |
|
for f in funcs: |
|
(ast, tokens) = f(tokens) |
|
if ast is not None: |
|
return (ast, tokens) |
|
return failure |
|
|
|
|
|
def concatenation(tokens, funcs): |
|
""" |
|
All of the parsing functions must succeed as a whole. |
|
Returns (<array of AST's>, <remaining tokens>). |
|
""" |
|
failure = (None, tokens) |
|
asts = [] |
|
for f in funcs: |
|
(ast, tokens) = f(tokens) |
|
if ast is None: |
|
return failure |
|
asts.append(ast) |
|
return (asts, tokens) |
|
|
|
|
|
|
|
# Parsing functions: |
|
|
|
# Note: all parse_x functions share a similar format: |
|
# - Their first argument is the list of tokens. |
|
# - They return a tuple of (<parsed result>, <remaining tokens>). |
|
# - <parsed result> is either an AST node or a token (a terminal). |
|
# - <remaining tokens> is the list of unconsumed tokens. |
|
# - If the parse fails, (None, <tokens>) is returned, where <tokens> |
|
# is the list of tokens which was passed in. |
|
|
|
def parse(tokens): |
|
"""Attempts to parse the tokens into an AST.""" |
|
(ast, tokens) = parse_program(tokens) |
|
if ast is None: |
|
raise Exception("Couldn't parse starting at %s" % tokens[:8]) |
|
if len(tokens) > 0: |
|
raise Exception("Leftover tokens: %s" % tokens) |
|
return ast |
|
|
|
|
|
def parse_program(tokens): |
|
""" |
|
Attempts to parse a program. |
|
|
|
Grammar: |
|
program = statement { statement } |
|
""" |
|
failure = (None, tokens) |
|
statement_asts = [] |
|
|
|
(ast, tokens) = parse_statement(tokens) |
|
if ast is None: |
|
return failure |
|
statement_asts.append(ast) |
|
|
|
while True: |
|
(ast, tokens) = parse_statement(tokens) |
|
if ast is None: |
|
break |
|
statement_asts.append(ast) |
|
|
|
ast = ('ast', 'program', statement_asts) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_statement(tokens): |
|
""" |
|
Attempts to parse a statement. |
|
|
|
Grammar: |
|
statement = ( linestmt NL ) | blockstmt |
|
""" |
|
failure = (None, tokens) |
|
|
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_linestmt, |
|
parse_NL |
|
]) |
|
if asts is not None: |
|
line_ast = asts[0] |
|
ast = ('ast', 'statement', [line_ast]) |
|
return (ast, tokens) |
|
|
|
(block_ast, tokens) = parse_blockstmt(tokens) |
|
if block_ast is not None: |
|
ast = ('ast', 'statement', [block_ast]) |
|
return (ast, tokens) |
|
|
|
return failure |
|
|
|
|
|
def parse_linestmt(tokens): |
|
""" |
|
Attempts to parse a "line" statement. |
|
|
|
Grammar: |
|
linestmt = declassign | assign | vardecl | rvalue | return | BREAK | CONTINUE | INCLUDE | DEFINE |
|
""" |
|
failure = (None, tokens) |
|
(ast, tokens) = alternation(tokens, [ |
|
parse_declassign, |
|
parse_assign, |
|
parse_vardecl, |
|
parse_rvalue, |
|
parse_return, |
|
parse_BREAK, |
|
parse_CONTINUE, |
|
parse_INCLUDE, |
|
parse_DEFINE |
|
]) |
|
if ast is None: |
|
return failure |
|
line_ast = ('ast', 'linestmt', [ast]) |
|
return (line_ast, tokens) |
|
|
|
|
|
def parse_blockstmt(tokens): |
|
""" |
|
Attempts to parse a "block" statement. |
|
|
|
Grammar: |
|
blockstmt = fundecl | if | while | for | struct | scope | comment | blankline |
|
""" |
|
failure = (None, tokens) |
|
(ast, tokens) = alternation(tokens, [ |
|
parse_fundecl, |
|
parse_if, |
|
parse_while, |
|
parse_for, |
|
parse_struct, |
|
parse_scope, |
|
parse_comment, |
|
parse_blankline |
|
]) |
|
if ast is None: |
|
return failure |
|
block_ast = ('ast', 'blockstmt', [ast]) |
|
return (block_ast, tokens) |
|
|
|
|
|
def parse_comment(tokens): |
|
""" |
|
Attempts to parse a (line or block-level) comment. |
|
|
|
Grammar: |
|
comment = [S] ( LCOMMENT | BCOMMENT ) NL |
|
""" |
|
failure = (None, tokens) |
|
(ast, tokens) = parse_S(tokens) |
|
(token, tokens) = alternation(tokens, [ |
|
parse_LCOMMENT, |
|
parse_BCOMMENT |
|
]) |
|
if token is None: |
|
return failure |
|
ast = ('ast', 'comment', [token]) |
|
(nl_ast, tokens) = parse_NL(tokens) |
|
if nl_ast is None: |
|
return failure |
|
return (ast, tokens) |
|
|
|
|
|
def parse_blankline(tokens): |
|
""" |
|
Attempts to parse a blank line. |
|
|
|
Grammar: |
|
blankline = [S] NL |
|
""" |
|
failure = (None, tokens) |
|
(_, tokens) = parse_S(tokens) |
|
(ast, tokens) = parse_NL(tokens) |
|
if ast is None: |
|
return failure |
|
ast = ('ast', 'blankline', []) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_declassign(tokens): |
|
""" |
|
Attempts to parse a declaration-assignment statement. |
|
|
|
Grammar: |
|
declassign = vardecl S EQ S rvalue |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_vardecl, |
|
parse_S, |
|
parse_EQ, |
|
parse_S, |
|
parse_rvalue |
|
]) |
|
if asts is None: |
|
return failure |
|
vardecl_ast = asts[0] |
|
expr_ast = asts[4] |
|
ast = ('ast', 'declassign', [vardecl_ast, expr_ast]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_assign(tokens): |
|
""" |
|
Attempts to parse an assignment statement. |
|
|
|
Grammar: |
|
assign = rvalue S aoperator S rvalue |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_rvalue, |
|
parse_S, |
|
parse_aoperator, |
|
parse_S, |
|
parse_rvalue |
|
]) |
|
if asts is None: |
|
return failure |
|
ast = ('ast', 'assign', [asts[0], asts[2], asts[4]]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_aoperator(tokens): |
|
""" |
|
Attempts to parse an assignment operator. |
|
|
|
Grammar: |
|
aoperator = PLUSEQ | MINUSEQ | STAREQ | SLASHEQ | PERCENTEQ |
|
| AMPEQ | PIPEEQ | CARATEQ | LTLTEQ | GTGTEQ |
|
| EQ |
|
""" |
|
failure = (None, tokens) |
|
(token, tokens) = alternation(tokens, [ |
|
parse_PLUSEQ, |
|
parse_MINUSEQ, |
|
parse_STAREQ, |
|
parse_SLASHEQ, |
|
parse_PERCENTEQ, |
|
parse_AMPEQ, |
|
parse_PIPEEQ, |
|
parse_CARATEQ, |
|
parse_LTLTEQ, |
|
parse_GTGTEQ, |
|
parse_EQ |
|
]) |
|
if token is None: |
|
return failure |
|
ast = ('ast', 'aoperator', [token]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_vardecl(tokens): |
|
""" |
|
Attempts to parse a vardecl AST node. |
|
|
|
Grammar: |
|
vardecl = [storage S] IDENTIFIER COLON S type |
|
""" |
|
failure = (None, tokens) |
|
|
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_storage, |
|
parse_S |
|
]) |
|
if asts is not None: |
|
storage_ast = asts[0] |
|
else: |
|
storage_ast = None |
|
|
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_IDENTIFIER, |
|
parse_COLON, |
|
parse_S, |
|
parse_type |
|
]) |
|
if asts is None: |
|
return failure |
|
identifier_token = asts[0] |
|
type_ast = asts[3] |
|
|
|
if storage_ast is None: |
|
subnodes = [identifier_token, type_ast] |
|
else: |
|
subnodes = [storage_ast, identifier_token, type_ast] |
|
|
|
vardecl_ast = ('ast', 'vardecl', subnodes) |
|
return (vardecl_ast, tokens) |
|
|
|
|
|
def parse_storage(tokens): |
|
""" |
|
Attempts to parse a declaration storage type. |
|
|
|
Grammar: |
|
storage = [ AUTO | REGISTER | STATIC | EXTERN ] [THREADLOCAL] |
|
""" |
|
failure = (None, tokens) |
|
asts = [] |
|
(ast, tokens) = alternation(tokens, [ |
|
parse_AUTO, |
|
parse_REGISTER, |
|
parse_STATIC, |
|
parse_EXTERN |
|
]) |
|
if ast is not None: |
|
asts.append(ast) |
|
(ast, tokens) = parse_THREADLOCAL(tokens) |
|
if ast is not None: |
|
asts.append(ast) |
|
if len(asts) == 0: |
|
return failure |
|
else: |
|
ast = ('ast', 'storage', asts) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_return(tokens): |
|
""" |
|
Attempts to parse a return statement. |
|
|
|
Grammar: |
|
return = RETURN [ S rvalue ] |
|
""" |
|
failure = (None, tokens) |
|
(ast, tokens) = parse_RETURN(tokens) |
|
if ast is None: |
|
return failure |
|
return_ast = ('ast', 'return', []) |
|
|
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_S, |
|
parse_rvalue |
|
]) |
|
if asts is not None: |
|
expr_ast = asts[1] |
|
return_ast = ('ast', 'return', [expr_ast]) |
|
|
|
return (return_ast, tokens) |
|
|
|
|
|
# fundecl examples: |
|
# func f(): |
|
# func f(a: int): |
|
# func f() -> int: |
|
# func f(a: int) -> int: |
|
# func f(a: int, b: int): |
|
# func f(a: int, b: int) -> int: |
|
def parse_fundecl(tokens): |
|
""" |
|
Attempts to parse a function declaration. |
|
|
|
Grammar: |
|
fundecl = [storage S] {fspecifier S} FUNC S IDENTIFIER fundeclargs [fundeclret] (scope | NL) |
|
""" |
|
failure = (None, tokens) |
|
subnodes = [] |
|
(asts, tokens) = concatenation(tokens, [parse_storage, parse_S]) |
|
if asts is not None: |
|
subnodes.append(asts[0]) |
|
while True: |
|
(asts, tokens) = concatenation(tokens, [parse_fspecifier, parse_S]) |
|
if asts is None: |
|
break |
|
else: |
|
subnodes.append(asts[0]) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_FUNC, |
|
parse_S, |
|
parse_IDENTIFIER, |
|
parse_fundeclargs |
|
]) |
|
if asts is None: |
|
return failure |
|
subnodes.append(asts[2]) |
|
subnodes.append(asts[3]) |
|
(ast, tokens) = parse_fundeclret(tokens) |
|
if ast is not None: |
|
subnodes.append(ast) |
|
(ast, tokens) = alternation(tokens, [parse_scope, parse_NL]) |
|
if ast is None: |
|
return failure |
|
if is_asttype(ast, 'scope'): |
|
subnodes.append(ast) |
|
ast = ('ast', 'fundecl', subnodes) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_fspecifier(tokens): |
|
""" |
|
Attempts to parse a function specifier. |
|
|
|
Grammar: |
|
fspecifier = INLINE | NORETURN |
|
""" |
|
failure = (None, tokens) |
|
(ast, tokens) = alternation(tokens, [parse_INLINE, parse_NORETURN]) |
|
if ast is None: |
|
return failure |
|
else: |
|
ast = ('ast', 'fspecifier', [ast]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_funtype(tokens): |
|
""" |
|
Attempts to parse the "type" of a function. |
|
|
|
Grammar: |
|
funtype = FUNC |
|
| ( STATIC LT FUNC GT ) | ( STATIC LT INLINE LT FUNC GT GT ) |
|
| ( EXTERN LT FUNC GT ) | ( EXTERN LT INLINE LT FUNC GT GT ) |
|
| ( INLINE LT FUNC GT ) |
|
""" |
|
failure = (None, tokens) |
|
(ast, tokens) = parse_FUNC(tokens) |
|
if ast is not None: |
|
ast = ('ast', 'funtype', []) |
|
return (ast, tokens) |
|
(asts, tokens) = concatenation(tokens, [parse_STATIC, parse_LT, parse_FUNC, parse_GT]) |
|
if asts is not None: |
|
ast = ('ast', 'funtype', [asts[0]]) |
|
return (ast, tokens) |
|
(asts, tokens) = concatenation(tokens, [parse_EXTERN, parse_LT, parse_FUNC, parse_GT]) |
|
if asts is not None: |
|
ast = ('ast', 'funtype', [asts[0]]) |
|
return (ast, tokens) |
|
(asts, tokens) = concatenation(tokens, [parse_INLINE, parse_LT, parse_FUNC, parse_GT]) |
|
if asts is not None: |
|
ast = ('ast', 'funtype', [asts[0]]) |
|
return (ast, tokens) |
|
(asts, tokens) = concatenation(tokens, [parse_STATIC, parse_LT, parse_INLINE, parse_LT, parse_FUNC, parse_GT, parse_GT]) |
|
if asts is not None: |
|
ast = ('ast', 'funtype', [asts[0], asts[2]]) |
|
return (ast, tokens) |
|
(asts, tokens) = concatenation(tokens, [parse_EXTERN, parse_LT, parse_INLINE, parse_LT, parse_FUNC, parse_GT, parse_GT]) |
|
if asts is not None: |
|
ast = ('ast', 'funtype', [asts[0], asts[2]]) |
|
return (ast, tokens) |
|
return failure |
|
|
|
|
|
def parse_fundeclargs(tokens): |
|
""" |
|
Attempts to parse a list of function declaration arguments. |
|
|
|
Grammar: |
|
fundeclargs = OPAREN [ vardecl { COMMA S vardecl } ] CPAREN |
|
""" |
|
failure = (None, tokens) |
|
|
|
def list_of_args(tokens): |
|
""" |
|
Grammar: vardecl { COMMA S vardecl } |
|
Returns (<list of args>, <remaining tokens>) |
|
""" |
|
args = [] |
|
(vardecl_ast, tokens) = parse_vardecl(tokens) |
|
if vardecl_ast is None: |
|
return (args, tokens) |
|
args.append(vardecl_ast) |
|
|
|
while True: |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_COMMA, |
|
parse_S, |
|
parse_vardecl |
|
]) |
|
if asts is None: |
|
break |
|
vardecl_ast = asts[2] |
|
args.append(vardecl_ast) |
|
|
|
return (args, tokens) |
|
|
|
(ast, tokens) = parse_OPAREN(tokens) |
|
if ast is None: |
|
return failure |
|
|
|
(args, tokens) = list_of_args(tokens) |
|
|
|
(ast, tokens) = parse_CPAREN(tokens) |
|
if ast is None: |
|
return failure |
|
|
|
ast = ('ast', 'fundeclargs', args) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_fundeclret(tokens): |
|
""" |
|
Attempts to parse a function declaration return value. |
|
|
|
Grammar: |
|
fundeclret = S ARROW S type |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_S, |
|
parse_ARROW, |
|
parse_S, |
|
parse_type |
|
]) |
|
if asts is None: |
|
return failure |
|
type_ast = asts[3] |
|
ast = ('ast', 'fundeclret', [type_ast]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_if(tokens): |
|
""" |
|
Attempts to parse an if statement. |
|
|
|
Grammar: |
|
if = IF S rvalue scope { elif } [else] |
|
""" |
|
failure = (None, tokens) |
|
|
|
def parse_elif(tokens): |
|
""" |
|
Attempts to parse an elif statement. |
|
|
|
Grammar: |
|
elif = ELIF S rvalue scope |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_ELIF, |
|
parse_S, |
|
parse_rvalue, |
|
parse_scope |
|
]) |
|
if asts is None: |
|
return failure |
|
ast = ('ast', 'elif', [asts[2], asts[3]]) |
|
return (ast, tokens) |
|
|
|
def parse_else(tokens): |
|
""" |
|
Attempts to parse an else statement. |
|
|
|
Grammar: |
|
else = ELSE scope |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_ELSE, |
|
parse_scope |
|
]) |
|
if asts is None: |
|
return failure |
|
ast = ('ast', 'else', [asts[1]]) |
|
return (ast, tokens) |
|
|
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_IF, |
|
parse_S, |
|
parse_rvalue, |
|
parse_scope |
|
]) |
|
if asts is None: |
|
return failure |
|
sub_asts = [asts[2], asts[3]] |
|
|
|
elifs = [] |
|
while True: |
|
(ast, tokens) = parse_elif(tokens) |
|
if ast is None: |
|
break |
|
elifs.append(ast) |
|
sub_asts += elifs |
|
|
|
(else_ast, tokens) = parse_else(tokens) |
|
if else_ast is not None: |
|
sub_asts.append(else_ast) |
|
|
|
if_ast = ('ast', 'if', sub_asts) |
|
return (if_ast, tokens) |
|
|
|
|
|
def parse_while(tokens): |
|
""" |
|
Attempts to parse a while statement. |
|
|
|
Grammar: |
|
while = WHILE S rvalue scope |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_WHILE, |
|
parse_S, |
|
parse_rvalue, |
|
parse_scope |
|
]) |
|
if asts is None: |
|
return failure |
|
expr_ast = asts[2] |
|
scope_ast = asts[3] |
|
ast = ('ast', 'while', [expr_ast, scope_ast]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_for(tokens): |
|
""" |
|
Attempts to parse a for statement. |
|
|
|
Grammar: |
|
for = FOR S [linestmt] SEMICOLON S rvalue SEMICOLON S linestmt { COMMA S linestmt } scope |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_FOR, |
|
parse_S |
|
]) |
|
if asts is None: |
|
return failure |
|
(linestmt1_ast, tokens) = parse_linestmt(tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_SEMICOLON, |
|
parse_S, |
|
parse_rvalue, |
|
parse_SEMICOLON, |
|
parse_S, |
|
parse_linestmt |
|
]) |
|
if asts is None: |
|
return failure |
|
expr2_ast = asts[2] |
|
linestmt3_asts = [asts[5]] |
|
while True: |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_COMMA, |
|
parse_S, |
|
parse_linestmt |
|
]) |
|
if asts is None: |
|
break |
|
linestmt3_asts.append(asts[2]) |
|
(scope_ast, tokens) = parse_scope(tokens) |
|
if scope_ast is None: |
|
return failure |
|
ast = ('ast', 'for', [linestmt1_ast, expr2_ast] + linestmt3_asts + [scope_ast]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_scope(tokens): |
|
""" |
|
Attempts to parse a scoped sequence of statements. |
|
|
|
Grammar: |
|
scope = COLON NL INDENT statement { statement } DEDENT |
|
""" |
|
failure = (None, tokens) |
|
statements = [] |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_COLON, |
|
parse_NL, |
|
parse_INDENT, |
|
parse_statement |
|
]) |
|
if asts is None: |
|
return failure |
|
statements.append(asts[3]) |
|
|
|
while True: |
|
(ast, tokens) = parse_statement(tokens) |
|
if ast is None: |
|
break |
|
statements.append(ast) |
|
|
|
(ast, tokens) = parse_DEDENT(tokens) |
|
if ast is None: |
|
return failure |
|
|
|
ast = ('ast', 'scope', statements) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_struct(tokens): |
|
""" |
|
Attempts to parse a struct. |
|
|
|
Grammar: |
|
struct = STRUCT S IDENFITIER structscope |
|
""" |
|
failure = (None, tokens) |
|
|
|
def parse_structscope(tokens): |
|
""" |
|
Grammar: |
|
structscope = COLON NL INDENT vardecl NL { vardecl NL } DEDENT |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_COLON, |
|
parse_NL, |
|
parse_INDENT, |
|
parse_vardecl, |
|
parse_NL |
|
]) |
|
if asts is None: |
|
return failure |
|
vardecls = [asts[3]] |
|
while True: |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_vardecl, |
|
parse_NL |
|
]) |
|
if asts is None: |
|
break |
|
vardecls.append(asts[0]) |
|
(dedent, tokens) = parse_DEDENT(tokens) |
|
if dedent is None: |
|
return failure |
|
ast = ('ast', 'structscope', vardecls) |
|
return (ast, tokens) |
|
|
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_STRUCT, |
|
parse_S, |
|
parse_IDENTIFIER, |
|
parse_structscope |
|
]) |
|
if asts is None: |
|
return failure |
|
identifier_token = asts[2] |
|
scope_ast = asts[3] |
|
ast = ('ast', 'struct', [identifier_token, scope_ast]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_rvalue(tokens): |
|
""" |
|
Attempts to parse an rvalue (the right-hand side of an assignment statement). |
|
I.e. an expression which result in a value. |
|
|
|
Grammar: |
|
rvalue = ( ( ternary | binary | unary | IDENTIFIER ) { dotted | arrowed | indexed | called } ) |
|
| type | cast | INTLIT | FLOATLIT | STRINGLIT | CHARLIT | BOOLLIT |
|
""" |
|
failure = (None, tokens) |
|
|
|
def parse_rvalue1(tokens): |
|
""" |
|
Grammar fragment: |
|
( ternary | binary | unary | IDENTIFIER ) { dotted | arrowed | indexed | called } |
|
""" |
|
failure = (None, tokens) |
|
(ast1, tokens) = alternation(tokens, [ |
|
parse_ternary, |
|
parse_binary, |
|
parse_unary, |
|
parse_IDENTIFIER |
|
]) |
|
if ast1 is None: |
|
return failure |
|
asts = [] |
|
while True: |
|
(ast, tokens) = alternation(tokens, [ |
|
parse_dotted, |
|
parse_arrowed, |
|
parse_indexed, |
|
parse_called, |
|
]) |
|
if ast is None: |
|
break |
|
asts.append(ast) |
|
ast = ('ast', 'rvalue', [ast1] + asts) |
|
return (ast, tokens) |
|
|
|
(ast, tokens) = alternation(tokens, [ |
|
parse_rvalue1, |
|
parse_type, |
|
parse_cast, |
|
parse_INTLIT, |
|
parse_FLOATLIT, |
|
parse_STRINGLIT, |
|
parse_CHARLIT, |
|
parse_BOOLLIT |
|
]) |
|
if ast is None: |
|
return failure |
|
if is_asttype(ast, 'rvalue'): |
|
return (ast, tokens) |
|
else: |
|
return (('ast', 'rvalue', [ast]), tokens) |
|
|
|
|
|
def parse_cast(tokens): |
|
""" |
|
Attempts to parse a cast statement. |
|
|
|
Grammar: |
|
cast = CAST called |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_CAST, |
|
parse_called |
|
]) |
|
if asts is None: |
|
return failure |
|
assert len(ast_children(asts[1])) == 2 |
|
var_ast = ast_children(asts[1])[0] |
|
type_ast = ast_children(asts[1])[1] |
|
ast = ('ast', 'cast', [var_ast, type_ast]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_dotted(tokens): |
|
""" |
|
Attempt to parse a member access (dot) expression. |
|
|
|
Grammar: |
|
dotted = PERIOD IDENTIFIER |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_PERIOD, |
|
parse_IDENTIFIER |
|
]) |
|
if asts is None: |
|
return failure |
|
ast = ('ast', 'dotted', [asts[1]]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_arrowed(tokens): |
|
""" |
|
Attempt to parse a pointer access (arrow) expression. |
|
|
|
Grammar: |
|
arrowed = ARROW IDENTIFIER |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_ARROW, |
|
parse_IDENTIFIER |
|
]) |
|
if asts is None: |
|
return failure |
|
ast = ('ast', 'arrowed', [asts[1]]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_indexed(tokens): |
|
""" |
|
Attempt to parse an array access (bracket) expression. |
|
|
|
Grammar: |
|
indexed = OBRACKET rvalue CBRACKET |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_OBRACKET, |
|
parse_rvalue, |
|
parse_CBRACKET |
|
]) |
|
if asts is None: |
|
return failure |
|
ast = ('ast', 'indexed', [asts[1]]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_called(tokens): |
|
""" |
|
Grammar: |
|
OPAREN [ rvalue { COMMA S rvalue } ] CPAREN |
|
""" |
|
failure = (None, tokens) |
|
|
|
def list_of_exprs(tokens): |
|
""" |
|
Grammar: rvalue { COMMA S rvalue } |
|
Returns (<list of exprs>, <remaining tokens>) |
|
""" |
|
exprs = [] |
|
(expr_ast, tokens) = parse_rvalue(tokens) |
|
if expr_ast is None: |
|
return (exprs, tokens) |
|
exprs.append(expr_ast) |
|
|
|
while True: |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_COMMA, |
|
parse_S, |
|
parse_rvalue |
|
]) |
|
if asts is None: |
|
break |
|
expr_ast = asts[2] |
|
exprs.append(expr_ast) |
|
|
|
return (exprs, tokens) |
|
|
|
(ast, tokens) = parse_OPAREN(tokens) |
|
if ast is None: |
|
return failure |
|
(exprs, tokens) = list_of_exprs(tokens) |
|
(ast, tokens) = parse_CPAREN(tokens) |
|
if ast is None: |
|
return failure |
|
ast = ('ast', 'called', exprs) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_ternary(tokens): |
|
""" |
|
Attempts to parse a ternary statement. |
|
|
|
Grammar: |
|
ternary | OPAREN rvalue S QMARK S rvalue S COLON S rvalue CPAREN |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_OPAREN, |
|
parse_rvalue, |
|
parse_S, |
|
parse_QMARK, |
|
parse_S, |
|
parse_rvalue, |
|
parse_S, |
|
parse_COLON, |
|
parse_S, |
|
parse_rvalue, |
|
parse_CPAREN |
|
]) |
|
if asts is None: |
|
return failure |
|
ast = ('ast', 'ternary', [asts[1], asts[5], asts[9]]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_binary(tokens): |
|
""" |
|
Attempts to parse a binary expression. |
|
|
|
Grammar: |
|
binary = OPAREN rvalue S boperator S expr CPAREN |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_OPAREN, |
|
parse_rvalue, |
|
parse_S, |
|
parse_boperator, |
|
parse_S, |
|
parse_rvalue, |
|
parse_CPAREN |
|
]) |
|
if asts is None: |
|
return failure |
|
expr1 = asts[1] |
|
op = asts[3] |
|
expr2 = asts[5] |
|
ast = ('ast', 'binary', [expr1, op, expr2]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_boperator(tokens): |
|
""" |
|
Attempts to parse a binary operator. |
|
|
|
Grammar: |
|
boperator = LTLT | GTGT | LTEQ | GTEQ | EQEQ | BANGEQ | AMPAMP | BARBAR |
|
| PLUS | MINUS | STAR | SLASH | PERCENT | LT | GT |
|
| BANG | AMP | BAR | TILDE | CARAT |
|
""" |
|
failure = (None, tokens) |
|
(token, tokens) = alternation(tokens, [ |
|
parse_LTLT, |
|
parse_GTGT, |
|
parse_LTEQ, |
|
parse_GTEQ, |
|
parse_EQEQ, |
|
parse_BANGEQ, |
|
parse_AMPAMP, |
|
parse_BARBAR, |
|
parse_PLUS, |
|
parse_MINUS, |
|
parse_STAR, |
|
parse_SLASH, |
|
parse_PERCENT, |
|
parse_LT, |
|
parse_GT, |
|
parse_BANG, |
|
parse_AMP, |
|
parse_BAR, |
|
parse_TILDE, |
|
parse_CARAT |
|
]) |
|
if token is None: |
|
return failure |
|
ast = ('ast', 'boperator', [token]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_unary(tokens): |
|
""" |
|
Attempts to parse a unary expression. |
|
|
|
Grammar: |
|
unary = uoperator OPAREN rvalue CPAREN |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_uoperator, |
|
parse_OPAREN, |
|
parse_rvalue, |
|
parse_CPAREN |
|
]) |
|
if asts is None: |
|
return failure |
|
op = asts[0] |
|
expr = asts[2] |
|
ast = ('ast', 'unary', [op, expr]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_uoperator(tokens): |
|
""" |
|
Attempts to parse a unary operator. |
|
|
|
Grammar: |
|
uoperator = MINUS | AMP | STAR | BANG | TILDE |
|
""" |
|
failure = (None, tokens) |
|
(token, tokens) = alternation(tokens, [ |
|
parse_MINUS, |
|
parse_AMP, |
|
parse_STAR, |
|
parse_BANG, |
|
parse_TILDE |
|
]) |
|
if token is None: |
|
return failure |
|
ast = ('ast', 'uoperator', [token]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_type(tokens): |
|
""" |
|
Attemps to parse a type declaration. |
|
|
|
Grammar: |
|
type = ARRAY OBRACKET rvalue CBRACKET LT type GT |
|
| ( ARRAY | POINTER | FUNCTION | tqualifier ) LT type GT |
|
| IDENTIFIER { S IDENTIFIER } |
|
""" |
|
failure = (None, tokens) |
|
|
|
def parse_type1(tokens): |
|
""" |
|
Grammar fragment: |
|
ARRAY OBRACKET rvalue CBRACKET LT type GT |
|
|
|
'array[8]<int>' becomes: |
|
('ast', 'type', [ |
|
('token', 'ARRAY', 'array'), |
|
('token', 'INTLIT', '8'), |
|
('ast', 'type', [ |
|
('token', 'IDENTIFIER', 'int') |
|
]) |
|
]) |
|
""" |
|
failure = (None, tokens) |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_ARRAY, |
|
parse_OBRACKET, |
|
parse_rvalue, |
|
parse_CBRACKET, |
|
parse_LT, |
|
parse_type, |
|
parse_GT |
|
]) |
|
if asts is None: |
|
return failure |
|
identifier_token = asts[0] |
|
rvalue_ast = asts[2] |
|
subtype_ast = asts[5] |
|
ast = ('ast', 'type', [identifier_token, rvalue_ast, subtype_ast]) |
|
return (ast, tokens) |
|
|
|
def parse_type2(tokens): |
|
""" |
|
Grammar fragment: |
|
( ARRAY | POINTER | FUNCTION | tqualifier ) LT type GT |
|
|
|
'array<int>' becomes: |
|
('ast', 'type', [ |
|
('token', 'ARRAY', 'array'), |
|
('ast', 'type', [ |
|
('token', 'IDENTIFIER', 'int') |
|
]) |
|
]) |
|
|
|
'volatile<char>' becomes: |
|
('ast', 'type', [ |
|
('ast', 'tqualifier', [ |
|
('token', 'VOLATILE', 'volatile') |
|
]), |
|
('ast', 'type', [ |
|
('token', 'IDENTIFIER', 'char') |
|
]) |
|
]) |
|
|
|
'array<pointer<char>>' becomes: |
|
('ast', 'type', [ |
|
('token', 'ARRAY', 'array'), |
|
('ast', 'type', [ |
|
('token', 'POINTER', 'pointer'), |
|
('ast', 'type', [ |
|
('token', 'IDENTIFIER', 'char') |
|
]) |
|
]) |
|
]) |
|
|
|
( tqualifier | storage | ARRAY | POINTER | FUNCTION ) LT type GT |
|
""" |
|
failure = (None, tokens) |
|
(ast1, tokens) = alternation(tokens, [ |
|
parse_ARRAY, |
|
parse_POINTER, |
|
parse_FUNCTION, |
|
parse_tqualifier |
|
]) |
|
if ast1 is None: |
|
return failure |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_LT, |
|
parse_type, |
|
parse_GT |
|
]) |
|
if asts is None: |
|
return failure |
|
ast = ('ast', 'type', [ast1, asts[1]]) |
|
return (ast, tokens) |
|
|
|
def parse_type3(tokens): |
|
""" |
|
Grammar fragment: |
|
IDENTIFIER { S IDENTIFIER } |
|
|
|
'int' becomes: |
|
('ast', 'type', [('token', 'IDENTIFIER', 'int')]) |
|
|
|
'unsigned char' becomes: |
|
('ast', 'type', [('token', 'IDENTIFIER', 'unsigned'), ('token', 'IDENTIFIER', 'char')]) |
|
""" |
|
failure = (None, tokens) |
|
types = [] |
|
(identifier_token, tokens) = parse_IDENTIFIER(tokens) |
|
if identifier_token is None: |
|
return failure |
|
types.append(identifier_token) |
|
while True: |
|
(asts, tokens) = concatenation(tokens, [ |
|
parse_S, |
|
parse_IDENTIFIER |
|
]) |
|
if asts is None: |
|
break |
|
types.append(asts[1]) |
|
type_ast = ('ast', 'type', types) |
|
return (type_ast, tokens) |
|
|
|
(ast, tokens) = alternation(tokens, [ |
|
parse_type1, |
|
parse_type2, |
|
parse_type3 |
|
]) |
|
if ast is None: |
|
return failure |
|
else: |
|
return (ast, tokens) |
|
|
|
|
|
def parse_tqualifier(tokens): |
|
""" |
|
Attempts to parse a type qualifier. |
|
|
|
Grammar: |
|
tqualifier = CONST | VOLATILE | RESTRICT | ATOMIC |
|
""" |
|
failure = (None, tokens) |
|
(ast, tokens) = alternation(tokens, [ |
|
parse_CONST, |
|
parse_VOLATILE, |
|
parse_RESTRICT, |
|
parse_ATOMIC |
|
]) |
|
if ast is None: |
|
return failure |
|
else: |
|
ast = ('ast', 'tqualifier', [ast]) |
|
return (ast, tokens) |
|
|
|
|
|
def parse_ws(tokens): |
|
""" |
|
Attempts to parse any amount of whitespace. |
|
|
|
Grammar: |
|
ws = (S | NL) { (S | NL) } |
|
""" |
|
failure = (None, tokens) |
|
subnodes = [] |
|
(token, tokens) = alternation(tokens, [ |
|
parse_S, |
|
parse_NL |
|
]) |
|
if token is None: |
|
return failure |
|
subnodes.append(token) |
|
|
|
while True: |
|
(token, tokens) = alternation(tokens, [ |
|
parse_S, |
|
parse_NL |
|
]) |
|
if token is None: |
|
break |
|
subnodes.append(token) |
|
|
|
ast = ('ast', 'ws', subnodes) |
|
return (ast, tokens) |
|
|
|
|
|
# Terminal parsing: parsing individual tokens. |
|
|
|
def parse_terminal(tokens, toktype): |
|
""" |
|
Attempts to parse a terminal node of type _toktype_. |
|
Note that the parsed result of a terminal is the token itself, not an AST node. |
|
""" |
|
failure = (None, tokens) |
|
if len(tokens) > 0 and is_toktype(tokens[0], toktype): |
|
return (tokens[0], tokens[1:]) |
|
else: |
|
return failure |
|
|
|
def parse_FUNC(tokens): |
|
return parse_terminal(tokens, 'FUNC') |
|
|
|
def parse_ARRAY(tokens): |
|
return parse_terminal(tokens, 'ARRAY') |
|
|
|
def parse_POINTER(tokens): |
|
return parse_terminal(tokens, 'POINTER') |
|
|
|
def parse_FUNCTION(tokens): |
|
return parse_terminal(tokens, 'FUNCTION') |
|
|
|
def parse_COMMA(tokens): |
|
return parse_terminal(tokens, 'COMMA') |
|
|
|
def parse_PERIOD(tokens): |
|
return parse_terminal(tokens, 'PERIOD') |
|
|
|
def parse_ARROW(tokens): |
|
return parse_terminal(tokens, 'ARROW') |
|
|
|
def parse_IDENTIFIER(tokens): |
|
return parse_terminal(tokens, 'IDENTIFIER') |
|
|
|
def parse_COLON(tokens): |
|
return parse_terminal(tokens, 'COLON') |
|
|
|
def parse_SEMICOLON(tokens): |
|
return parse_terminal(tokens, 'SEMICOLON') |
|
|
|
def parse_INTLIT(tokens): |
|
return parse_terminal(tokens, 'INTLIT') |
|
|
|
def parse_FLOATLIT(tokens): |
|
return parse_terminal(tokens, 'FLOATLIT') |
|
|
|
def parse_STRINGLIT(tokens): |
|
return parse_terminal(tokens, 'STRINGLIT') |
|
|
|
def parse_CHARLIT(tokens): |
|
return parse_terminal(tokens, 'CHARLIT') |
|
|
|
def parse_BOOLLIT(tokens): |
|
return parse_terminal(tokens, 'BOOLLIT') |
|
|
|
def parse_OPAREN(tokens): |
|
return parse_terminal(tokens, 'OPAREN') |
|
|
|
def parse_CPAREN(tokens): |
|
return parse_terminal(tokens, 'CPAREN') |
|
|
|
def parse_OBRACKET(tokens): |
|
return parse_terminal(tokens, 'OBRACKET') |
|
|
|
def parse_CBRACKET(tokens): |
|
return parse_terminal(tokens, 'CBRACKET') |
|
|
|
def parse_MINUS(tokens): |
|
return parse_terminal(tokens, 'MINUS') |
|
|
|
def parse_AMP(tokens): |
|
return parse_terminal(tokens, 'AMP') |
|
|
|
def parse_STAR(tokens): |
|
return parse_terminal(tokens, 'STAR') |
|
|
|
def parse_BANG(tokens): |
|
return parse_terminal(tokens, 'BANG') |
|
|
|
def parse_PLUS(tokens): |
|
return parse_terminal(tokens, 'PLUS') |
|
|
|
def parse_SLASH(tokens): |
|
return parse_terminal(tokens, 'SLASH') |
|
|
|
def parse_PERCENT(tokens): |
|
return parse_terminal(tokens, 'PERCENT') |
|
|
|
def parse_LT(tokens): |
|
return parse_terminal(tokens, 'LT') |
|
|
|
def parse_LTEQ(tokens): |
|
return parse_terminal(tokens, 'LTEQ') |
|
|
|
def parse_GT(tokens): |
|
return parse_terminal(tokens, 'GT') |
|
|
|
def parse_GTEQ(tokens): |
|
return parse_terminal(tokens, 'GTEQ') |
|
|
|
def parse_EQ(tokens): |
|
return parse_terminal(tokens, 'EQ') |
|
|
|
def parse_EQEQ(tokens): |
|
return parse_terminal(tokens, 'EQEQ') |
|
|
|
def parse_BANGEQ(tokens): |
|
return parse_terminal(tokens, 'BANGEQ') |
|
|
|
def parse_AMPAMP(tokens): |
|
return parse_terminal(tokens, 'AMPAMP') |
|
|
|
def parse_BARBAR(tokens): |
|
return parse_terminal(tokens, 'BARBAR') |
|
|
|
def parse_BAR(tokens): |
|
return parse_terminal(tokens, 'BAR') |
|
|
|
def parse_LTLT(tokens): |
|
return parse_terminal(tokens, 'LTLT') |
|
|
|
def parse_GTGT(tokens): |
|
# So, I'm not super happy about this, but here's why this has to be special-cased: |
|
# If we just treated this like all of the other tokenization and parsing functions, |
|
# we would have a token for '>>', and this would be a one-liner: |
|
# return parse_terminal(tokens, 'GTGT') |
|
# However, that would break `pointer<pointer<char>>`, because the last two |
|
# characters would be a single GTGT token, rathe than two separate GT tokens. |
|
# So we have to implement parse_GTGT by looking for two distinct GT tokens. |
|
failure = (None, tokens) |
|
(gt_tokens, tokens) = concatenation(tokens, [parse_GT, parse_GT]) |
|
if gt_tokens is None: |
|
return failure |
|
token = ('token', 'GTGT', '>>') |
|
return (token, tokens) |
|
|
|
def parse_GTGTEQ(tokens): |
|
return parse_terminal(tokens, 'GTGTEQ') |
|
|
|
def parse_LTLTEQ(tokens): |
|
return parse_terminal(tokens, 'LTLTEQ') |
|
|
|
def parse_TILDE(tokens): |
|
return parse_terminal(tokens, 'TILDE') |
|
|
|
def parse_CARAT(tokens): |
|
return parse_terminal(tokens, 'CARAT') |
|
|
|
def parse_PLUSEQ(tokens): |
|
return parse_terminal(tokens, 'PLUSEQ') |
|
|
|
def parse_MINUSEQ(tokens): |
|
return parse_terminal(tokens, 'MINUSEQ') |
|
|
|
def parse_STAREQ(tokens): |
|
return parse_terminal(tokens, 'STAREQ') |
|
|
|
def parse_SLASHEQ(tokens): |
|
return parse_terminal(tokens, 'SLASHEQ') |
|
|
|
def parse_PERCENTEQ(tokens): |
|
return parse_terminal(tokens, 'PERCENTEQ') |
|
|
|
def parse_AMPEQ(tokens): |
|
return parse_terminal(tokens, 'AMPEQ') |
|
|
|
def parse_PIPEEQ(tokens): |
|
return parse_terminal(tokens, 'PIPEEQ') |
|
|
|
def parse_CARATEQ(tokens): |
|
return parse_terminal(tokens, 'CARATEQ') |
|
|
|
def parse_INDENT(tokens): |
|
return parse_terminal(tokens, 'INDENT') |
|
|
|
def parse_DEDENT(tokens): |
|
return parse_terminal(tokens, 'DEDENT') |
|
|
|
def parse_IF(tokens): |
|
return parse_terminal(tokens, 'IF') |
|
|
|
def parse_ELIF(tokens): |
|
return parse_terminal(tokens, 'ELIF') |
|
|
|
def parse_ELSE(tokens): |
|
return parse_terminal(tokens, 'ELSE') |
|
|
|
def parse_WHILE(tokens): |
|
return parse_terminal(tokens, 'WHILE') |
|
|
|
def parse_FOR(tokens): |
|
return parse_terminal(tokens, 'FOR') |
|
|
|
def parse_BREAK(tokens): |
|
return parse_terminal(tokens, 'BREAK') |
|
|
|
def parse_CONTINUE(tokens): |
|
return parse_terminal(tokens, 'CONTINUE') |
|
|
|
def parse_NL(tokens): |
|
return parse_terminal(tokens, 'NL') |
|
|
|
def parse_S(tokens): |
|
return parse_terminal(tokens, 'S') |
|
|
|
def parse_LCOMMENT(tokens): |
|
return parse_terminal(tokens, 'LCOMMENT') |
|
|
|
def parse_BCOMMENT(tokens): |
|
return parse_terminal(tokens, 'BCOMMENT') |
|
|
|
def parse_INCLUDE(tokens): |
|
return parse_terminal(tokens, 'INCLUDE') |
|
|
|
def parse_DEFINE(tokens): |
|
return parse_terminal(tokens, 'DEFINE') |
|
|
|
def parse_STRUCT(tokens): |
|
return parse_terminal(tokens, 'STRUCT') |
|
|
|
def parse_RETURN(tokens): |
|
return parse_terminal(tokens, 'RETURN') |
|
|
|
def parse_QMARK(tokens): |
|
return parse_terminal(tokens, 'QMARK') |
|
|
|
def parse_CAST(tokens): |
|
return parse_terminal(tokens, 'CAST') |
|
|
|
def parse_AUTO(tokens): |
|
return parse_terminal(tokens, 'AUTO') |
|
|
|
def parse_REGISTER(tokens): |
|
return parse_terminal(tokens, 'REGISTER') |
|
|
|
def parse_STATIC(tokens): |
|
return parse_terminal(tokens, 'STATIC') |
|
|
|
def parse_EXTERN(tokens): |
|
return parse_terminal(tokens, 'EXTERN') |
|
|
|
def parse_THREADLOCAL(tokens): |
|
return parse_terminal(tokens, 'THREADLOCAL') |
|
|
|
def parse_INLINE(tokens): |
|
return parse_terminal(tokens, 'INLINE') |
|
|
|
def parse_NORETURN(tokens): |
|
return parse_terminal(tokens, 'NORETURN') |
|
|
|
def parse_CONST(tokens): |
|
return parse_terminal(tokens, 'CONST') |
|
|
|
def parse_VOLATILE(tokens): |
|
return parse_terminal(tokens, 'VOLATILE') |
|
|
|
def parse_RESTRICT(tokens): |
|
return parse_terminal(tokens, 'RESTRICT') |
|
|
|
def parse_ATOMIC(tokens): |
|
return parse_terminal(tokens, 'ATOMIC') |
|
|
|
|
|
# Output generator implementation: |
|
|
|
# Note: all generate_x functions share a similar format: |
|
# - Their first argument is as AST. |
|
# - They return a string (of C code). |
|
# If the passed AST is valid, then errors are not possible. |
|
# Exceptions are thrown otherwise. |
|
|
|
def generate(ast): |
|
"""Generates C code from the given AST.""" |
|
return generate_program(ast) |
|
|
|
|
|
def generate_program(ast): |
|
"""Generates a C program.""" |
|
assert is_asttype(ast, 'program') |
|
outputs = [] |
|
for statement_ast in ast_children(ast): |
|
statement = generate_statement(statement_ast) |
|
outputs.append(statement) |
|
output = '\n'.join(outputs) |
|
if not output.endswith('\n'): |
|
output += '\n' # always include a trailing newline. |
|
return output |
|
|
|
|
|
def generate_statement(ast): |
|
"""Generates a C statement.""" |
|
assert is_asttype(ast, 'statement') |
|
sub_ast = ast_children(ast)[0] |
|
assert is_ast(sub_ast) |
|
if is_asttype(sub_ast, 'linestmt'): |
|
return generate_linestmt(sub_ast) |
|
elif is_asttype(sub_ast, 'blockstmt'): |
|
return generate_blockstmt(sub_ast) |
|
else: |
|
raise Exception("generate_statement: don't know how to generate %s" % str(sub_ast)) |
|
|
|
|
|
def generate_blockstmt(ast): |
|
"""Generates a C "block" statement.""" |
|
assert is_asttype(ast, 'blockstmt') |
|
sub_ast = ast_children(ast)[0] |
|
assert is_ast(sub_ast) |
|
if is_asttype(sub_ast, 'fundecl'): |
|
return generate_fundecl(sub_ast) |
|
elif is_asttype(sub_ast, 'if'): |
|
return generate_if(sub_ast) |
|
elif is_asttype(sub_ast, 'while'): |
|
return generate_while(sub_ast) |
|
elif is_asttype(sub_ast, 'for'): |
|
return generate_for(sub_ast) |
|
elif is_asttype(sub_ast, 'struct'): |
|
return generate_struct(sub_ast) |
|
elif is_asttype(sub_ast, 'scope'): |
|
return generate_scope(sub_ast) |
|
elif is_asttype(sub_ast, 'comment'): |
|
return generate_comment(sub_ast) |
|
elif is_asttype(sub_ast, 'blankline'): |
|
return generate_blankline(sub_ast) |
|
else: |
|
raise Exception("generate_blockstmt: don't know how to generate %s" % str(sub_ast)) |
|
|
|
|
|
def generate_linestmt(ast): |
|
"""Generates a C "line" statement.""" |
|
assert is_asttype(ast, 'linestmt') |
|
subnode = ast_children(ast)[0] |
|
if is_asttype(subnode, 'declassign'): |
|
return generate_declassign(subnode) |
|
elif is_asttype(subnode, 'assign'): |
|
return generate_assign(subnode) |
|
elif is_asttype(subnode, 'vardecl'): |
|
return generate_vardecl(subnode) + ';' |
|
elif is_asttype(subnode, 'rvalue'): |
|
return generate_rvalue(subnode) + ';' |
|
elif is_asttype(subnode, 'return'): |
|
return generate_return(subnode) |
|
elif is_toktype(subnode, 'BREAK'): |
|
return "break;" |
|
elif is_toktype(subnode, 'CONTINUE'): |
|
return "continue;" |
|
elif is_toktype(subnode, 'INCLUDE'): |
|
return token_text(subnode) |
|
elif is_toktype(subnode, 'DEFINE'): |
|
return token_text(subnode) |
|
else: |
|
raise Exception("generate_linestmt: don't know how to generate %s" % str(subnode)) |
|
|
|
|
|
def generate_blankline(ast): |
|
"""Generates a blank line.""" |
|
assert is_asttype(ast, 'blankline') |
|
return "" |
|
|
|
|
|
def generate_comment(ast): |
|
"""Generates a C (line or block-level) comment.""" |
|
assert is_asttype(ast, 'comment') |
|
comment_token = ast_children(ast)[0] |
|
assert is_toktype(comment_token, 'LCOMMENT') or is_toktype(comment_token, 'BCOMMENT') |
|
return token_text(comment_token) |
|
|
|
|
|
def generate_declassign(ast): |
|
"""Generates a C declaration-assignment statement.""" |
|
assert is_asttype(ast, 'declassign') |
|
children = ast_children(ast) |
|
vardecl_ast = children[0] |
|
vardecl = generate_vardecl(vardecl_ast) |
|
expr_ast = children[1] |
|
expr = generate_rvalue(expr_ast) |
|
return "%s = %s;" % (vardecl, expr) |
|
|
|
|
|
def generate_assign(ast): |
|
"""Generates a C assignment statement.""" |
|
assert is_asttype(ast, 'assign') |
|
children = ast_children(ast) |
|
|
|
rvalue1_ast = children[0] |
|
rvalue1 = generate_rvalue(rvalue1_ast) |
|
|
|
op_ast = children[1] |
|
assert is_asttype(op_ast, 'aoperator') |
|
op_token = ast_children(op_ast)[0] |
|
assert is_token(op_token) |
|
op = token_text(op_token) |
|
|
|
rvalue2_ast = children[2] |
|
rvalue2 = generate_rvalue(rvalue2_ast) |
|
return "%s %s %s;" % (rvalue1, op, rvalue2) |
|
|
|
|
|
def generate_vardecl(ast): |
|
""" |
|
Generates a C variable declaration. |
|
|
|
Note: we don't append the trailing ';' here because this is also used for function args. |
|
""" |
|
assert is_asttype(ast, 'vardecl') |
|
children = ast_children(ast) |
|
|
|
i = 0 |
|
if is_asttype(children[i], 'storage'): |
|
output = generate_storage(children[i]) + " " |
|
i += 1 |
|
else: |
|
output = "" |
|
|
|
identifier_token = children[i] |
|
assert is_toktype(identifier_token, 'IDENTIFIER') |
|
var_name = token_text(identifier_token) |
|
i += 1 |
|
|
|
type_ast = children[i] |
|
|
|
# e.g. 'int a', 'char* b', 'static int c[8]', 'extern char** d', 'volatile int* d[3]', etc. |
|
output += generate_type(type_ast, var_name) |
|
return output |
|
|
|
|
|
def generate_call(ast): |
|
"""Generates a parenthetical C function call.""" |
|
assert is_asttype(ast, 'call') |
|
return "(%s)" % ", ".join([generate_rvalue(child) for child in ast_children(ast)]) |
|
|
|
|
|
def generate_return(ast): |
|
"""Generates a C return statement.""" |
|
assert is_asttype(ast, 'return') |
|
children = ast_children(ast) |
|
if len(children) == 0: |
|
return "return;" |
|
else: |
|
expr_ast = children[0] |
|
return "return %s;" % generate_rvalue(expr_ast) |
|
|
|
|
|
def generate_storage(ast): |
|
"""Generates a C storage classifier.""" |
|
assert is_asttype(ast, 'storage') |
|
return token_text(ast_children(ast)[0]) |
|
|
|
|
|
def generate_fundecl(ast): |
|
"""Generates a C function declaration.""" |
|
|
|
def generate_funtype(ast): |
|
"""Generates the "type" of a C function.""" |
|
assert is_asttype(ast, 'funtype') |
|
asts = ast_children(ast) |
|
if len(asts) == 0: |
|
return None |
|
else: |
|
return " ".join([token_text(token) for token in asts]) |
|
|
|
def generate_fspecifier(ast): |
|
"""Generates a C function declaration specifier.""" |
|
assert is_asttype(ast, 'fspecifier') |
|
return token_text(ast_children(ast)[0]) |
|
|
|
def generate_fundeclargs(ast): |
|
"""Generates a list of arguments in a C function declaration.""" |
|
assert is_asttype(ast, 'fundeclargs') |
|
arg_asts = ast_children(ast) |
|
args = [] |
|
for vardecl_ast in arg_asts: |
|
arg = generate_vardecl(vardecl_ast) |
|
args.append(arg) |
|
if len(args) == 0: |
|
return "void" |
|
else: |
|
return ", ".join(args) |
|
|
|
def generate_fundeclret(ast): |
|
"""Generates the return type of a C function declaration.""" |
|
assert is_asttype(ast, 'fundeclret') |
|
return generate_type(ast_children(ast)[0]) |
|
|
|
assert is_asttype(ast, 'fundecl') |
|
asts = ast_children(ast) |
|
i = 0 |
|
|
|
prefix = "" |
|
if is_asttype(asts[i], 'storage'): |
|
prefix += generate_storage(asts[i]) + " " |
|
i += 1 |
|
|
|
while is_asttype(asts[i], 'fspecifier'): |
|
prefix += generate_fspecifier(asts[i]) + " " |
|
i += 1 |
|
|
|
identifier = token_text(asts[i]) |
|
i += 1 |
|
|
|
args = "%s" % generate_fundeclargs(asts[i]) |
|
i += 1 |
|
|
|
if i < len(asts) and is_asttype(asts[i], 'fundeclret'): |
|
ret = generate_fundeclret(asts[i]) |
|
i += 1 |
|
else: |
|
ret = "void" |
|
|
|
if i < len(asts) and is_asttype(asts[i], 'scope'): |
|
scope = generate_scope(asts[i]) |
|
else: |
|
scope = None |
|
|
|
output = "%s %s(%s)" % (ret, identifier, args) |
|
if scope is None: |
|
output += ";" |
|
else: |
|
output += " %s" % scope |
|
return output |
|
|
|
|
|
def generate_if(ast): |
|
"""Generates a C if statement""" |
|
assert is_asttype(ast, 'if') |
|
children = ast_children(ast) |
|
|
|
predicate_ast = children[0] |
|
predicate = generate_rvalue(predicate_ast) |
|
output = "if (%s) " % (predicate) |
|
|
|
scope_ast = children[1] |
|
output += generate_scope(scope_ast) |
|
|
|
for ast in children[2:]: |
|
assert is_ast(ast) |
|
if ast_type(ast) == 'elif': |
|
elif_children = ast_children(ast) |
|
|
|
predicate_ast = elif_children[0] |
|
predicate = generate_rvalue(predicate_ast) |
|
output += " else if (%s) " % (predicate) |
|
|
|
scope_ast = elif_children[1] |
|
output += generate_scope(scope_ast) |
|
|
|
elif ast_type(ast) == 'else': |
|
else_children = ast_children(ast) |
|
|
|
scope_ast = else_children[0] |
|
output += " else " + generate_scope(scope_ast) |
|
|
|
else: |
|
raise Exception("generate_if: unexpected AST type %s" % ast_type(ast)) |
|
|
|
return output |
|
|
|
|
|
def generate_while(ast): |
|
"""Generates a C while statement.""" |
|
assert is_asttype(ast, 'while') |
|
children = ast_children(ast) |
|
expr_ast = children[0] |
|
expr = generate_rvalue(expr_ast) |
|
scope_ast = children[1] |
|
scope = generate_scope(scope_ast) |
|
output = "while (%s) %s" % (expr, scope) |
|
return output |
|
|
|
|
|
def generate_for(ast): |
|
"""Generates a C for statement.""" |
|
assert is_asttype(ast, 'for') |
|
children = ast_children(ast) |
|
output = "for (" |
|
|
|
linestmt1_ast = children[0] |
|
if linestmt1_ast is not None: |
|
output += generate_linestmt(linestmt1_ast) + " " |
|
else: |
|
output += "; " |
|
|
|
expr2_ast = children[1] |
|
output += generate_rvalue(expr2_ast) + "; " |
|
|
|
stmt3s = [] |
|
for child in children[2:-1]: |
|
stmt = generate_linestmt(child) |
|
if stmt.endswith(';'): |
|
stmt = stmt[:-1] |
|
stmt3s.append(stmt) |
|
output += ", ".join(stmt3s) |
|
|
|
output += ") " |
|
scope_ast = children[-1] |
|
output += generate_scope(scope_ast) |
|
return output |
|
|
|
|
|
def generate_struct(ast): |
|
"""Generates a C struct declaration.""" |
|
assert is_asttype(ast, 'struct') |
|
identifier_token = ast_children(ast)[0] |
|
assert is_toktype(identifier_token, 'IDENTIFIER') |
|
identifier = token_text(identifier_token) |
|
output = "typedef struct _struct_%s %s;\n" % (identifier, identifier) |
|
output += "struct _struct_%s {\n" % (identifier) |
|
scope_ast = ast_children(ast)[1] |
|
assert is_asttype(scope_ast, 'structscope') |
|
vardecl_asts = ast_children(scope_ast) |
|
for ast in vardecl_asts: |
|
output += " " + generate_vardecl(ast) + ";\n" |
|
output += "};" |
|
return output |
|
|
|
|
|
def generate_scope(ast): |
|
"""Generates a C scope.""" |
|
assert is_asttype(ast, 'scope') |
|
output = "{" |
|
for statement_ast in ast_children(ast): |
|
statement = generate_statement(statement_ast) |
|
output += ("\n" + indent_text(statement)) |
|
line = "\n}" |
|
output += line |
|
return output |
|
|
|
|
|
def generate_rvalue(ast): |
|
"""Generates a C rvalue expression.""" |
|
assert is_asttype(ast, 'rvalue') |
|
output = "" |
|
for child in ast_children(ast): |
|
if is_asttype(child, 'ternary'): |
|
output += generate_ternary(child) |
|
elif is_asttype(child, 'binary'): |
|
output += generate_binary(child) |
|
elif is_asttype(child, 'unary'): |
|
output += generate_unary(child) |
|
elif is_asttype(child, 'dotted'): |
|
output += generate_dotted(child) |
|
elif is_asttype(child, 'arrowed'): |
|
output += generate_arrowed(child) |
|
elif is_asttype(child, 'indexed'): |
|
output += generate_indexed(child) |
|
elif is_asttype(child, 'called'): |
|
output += generate_called(child) |
|
elif is_asttype(child, 'type'): |
|
output += generate_type(child) |
|
elif is_asttype(child, 'cast'): |
|
output += generate_cast(child) |
|
elif is_toktype(child, 'INTLIT') \ |
|
or is_toktype(child, 'FLOATLIT') \ |
|
or is_toktype(child, 'STRINGLIT') \ |
|
or is_toktype(child, 'CHARLIT') \ |
|
or is_toktype(child, 'BOOLLIT') \ |
|
or is_toktype(child, 'IDENTIFIER') \ |
|
: |
|
output += token_text(child) |
|
else: |
|
raise Exception("generate_rvalue: unexpcted ast %s" % str(ast)) |
|
return output |
|
|
|
|
|
def generate_ternary(ast): |
|
"""Generates a C ternary statement.""" |
|
assert is_asttype(ast, 'ternary') |
|
rval1_ast = ast_children(ast)[0] |
|
rval1 = generate_rvalue(rval1_ast) |
|
rval2_ast = ast_children(ast)[1] |
|
rval2 = generate_rvalue(rval2_ast) |
|
rval3_ast = ast_children(ast)[2] |
|
rval3 = generate_rvalue(rval3_ast) |
|
return "(%s ? %s : %s)" % (rval1, rval2, rval3) |
|
|
|
|
|
def generate_binary(ast): |
|
"""Generates a binary-operator C statement.""" |
|
assert is_asttype(ast, 'binary') |
|
children = ast_children(ast) |
|
|
|
expr1_ast = children[0] |
|
expr1 = generate_rvalue(expr1_ast) |
|
|
|
op_ast = children[1] |
|
assert is_asttype(op_ast, 'boperator') |
|
op_token = ast_children(op_ast)[0] |
|
assert is_token(op_token) |
|
op = token_text(op_token) |
|
|
|
expr2_ast = children[2] |
|
expr2 = generate_rvalue(expr2_ast) |
|
|
|
return "(%s %s %s)" % (expr1, op, expr2) |
|
|
|
|
|
def generate_unary(ast): |
|
"""Generates a unary-operator C statement.""" |
|
assert is_asttype(ast, 'unary') |
|
children = ast_children(ast) |
|
op_ast = children[0] |
|
assert is_asttype(op_ast, 'uoperator') |
|
op_token = ast_children(op_ast)[0] |
|
assert is_token(op_token) |
|
op = token_text(op_token) |
|
expr_ast = children[1] |
|
expr = generate_rvalue(expr_ast) |
|
return "(%s(%s))" % (op, expr) |
|
|
|
|
|
def generate_dotted(ast): |
|
"""Generates a C dotted member access expression.""" |
|
assert is_asttype(ast, 'dotted') |
|
return ".%s" % token_text(ast_children(ast)[0]) |
|
|
|
|
|
def generate_arrowed(ast): |
|
"""Generates a C arrow access expression.""" |
|
assert is_asttype(ast, 'arrowed') |
|
return "->%s" % token_text(ast_children(ast)[0]) |
|
|
|
|
|
def generate_indexed(ast): |
|
"""Generates a C indexed access expression.""" |
|
assert is_asttype(ast, 'indexed') |
|
return "[%s]" % generate_rvalue(ast_children(ast)[0]) |
|
|
|
|
|
def generate_called(ast): |
|
"""Generates a C function call suffix.""" |
|
assert is_asttype(ast, 'called') |
|
output = "(" |
|
output += ", ".join([generate_rvalue(child) for child in ast_children(ast)]) |
|
output += ")" |
|
return output |
|
|
|
|
|
def generate_type(ast, identifier=""): |
|
""" |
|
Generates a C type declaration. |
|
|
|
If identifier is not given, this is an abstract type declaration (e.g. a cast). |
|
""" |
|
def did_switch_direction(previous, current): |
|
""" |
|
Detect a change in 'direction' while intepreting the type stack. |
|
|
|
C type declaration operator precedence requires that we "go right" first. |
|
i.e. 'int *foo[]' is "array of pointer to int", not "pointer to array of int". |
|
In order to express "pointer to array of int", we have to use parenthesis |
|
to change overcome operator precedence, i.e. 'int (*foo)[]'. |
|
Any time we need to "change direction", we need to wrap in parenthesis. |
|
See http://unixwiz.net/techtips/reading-cdecl.html |
|
""" |
|
lefts = ['pointer'] |
|
rights = ['array', 'function'] |
|
return (previous in lefts and current in rights) \ |
|
or (previous in rights and current in lefts) |
|
|
|
assert is_asttype(ast, 'type') |
|
types = type_ast_as_list(ast) |
|
output = identifier |
|
previous = None |
|
for t in types[:-1]: |
|
if t == 'pointer': |
|
if did_switch_direction(previous, t): |
|
output = "*(%s)" % output |
|
else: |
|
output = "*%s" % output |
|
elif t == 'array': |
|
if did_switch_direction(previous, t): |
|
output = "(%s)[]" % output |
|
else: |
|
output = "%s[]" % output |
|
elif t == 'function': |
|
if did_switch_direction(previous, t): |
|
output = "(%s)()" % output |
|
else: |
|
output = "%s()" % output |
|
elif t.startswith('array['): |
|
array_size = t.split('[')[1].split(']')[0] |
|
if did_switch_direction(previous, 'array'): |
|
output = "(%s)[%s]" % (output, array_size) |
|
else: |
|
output = "%s[%s]" % (output, array_size) |
|
elif t in ['const', 'volatile', 'restrict', '_Atomic']: |
|
output = "%s %s" % (t, output) |
|
else: |
|
raise Exception("generate_type: don't know how to handle %s" % t) |
|
|
|
if t.startswith('array'): |
|
previous = 'array' |
|
elif t in ['pointer', 'function']: |
|
previous = t |
|
|
|
base_type = types[-1] |
|
if identifier == "": |
|
output = "%s%s" % (base_type, output) |
|
else: |
|
output = "%s %s" % (base_type, output) |
|
return output |
|
|
|
|
|
def type_ast_as_list(ast): |
|
""" |
|
Return the type AST as a list of types. |
|
|
|
e.g. |
|
- 'int' results in ['int'] |
|
- 'pointer<array<char>>' results in ['pointer', 'array', 'char'] |
|
- 'array[8]<int>' results in ['array[8]', 'int'] |
|
- 'volatile<pointer<char>>' results in ['volatile', 'pointer', 'char'] |
|
""" |
|
assert is_asttype(ast, 'type') |
|
children = ast_children(ast) |
|
if is_token(children[0]) and is_toktype(children[0], 'IDENTIFIER'): |
|
for token in children: |
|
assert is_toktype(token, 'IDENTIFIER') |
|
# return e.g. ['int'], ['unsigned char'], etc. |
|
composite_type = " ".join([token_text(token) for token in children]) |
|
return [composite_type] |
|
elif len(children) == 2 and is_token(children[0]) and is_asttype(children[1], 'type'): |
|
token = children[0] |
|
sub_ast = children[1] |
|
# return e.g. ['array', <recursive call>] |
|
return [token_text(token)] + type_ast_as_list(sub_ast) |
|
elif len(children) == 3 and is_toktype(children[0], 'ARRAY'): |
|
# this is a dimensioned array (e.g. array[8]) |
|
size_ast = children[1] |
|
array_size = generate_rvalue(size_ast) |
|
assert is_ast(children[2]) |
|
sub_ast = children[2] |
|
assert is_asttype(sub_ast, 'type') |
|
# return e.g. ['array[8]', <recursive call>] |
|
return ["array[%s]" % (array_size)] + type_ast_as_list(sub_ast) |
|
elif is_asttype(children[0], 'tqualifier'): |
|
qualifier_ast = children[0] |
|
qualifier = token_text(ast_children(qualifier_ast)[0]) |
|
assert is_asttype(children[1], 'type') |
|
return [qualifier] + type_ast_as_list(children[1]) |
|
else: |
|
raise Exception("type_ast_as_list: don't know how to handle %s" % str(ast)) |
|
|
|
|
|
def generate_cast(ast): |
|
"""Generates a C cast statement.""" |
|
assert is_asttype(ast, 'cast') |
|
var = generate_rvalue(ast_children(ast)[0]) |
|
type_ = generate_rvalue(ast_children(ast)[1]) |
|
output = "((%s)%s)" % (type_, var) |
|
return output |
|
|
|
|
|
def indent_text(text): |
|
"""Add indentation to the given block of text.""" |
|
indent = " " * INDENT_UNIT |
|
indented_lines = [indent + line for line in text.splitlines()] |
|
return "\n".join(indented_lines) |
|
|
|
|
|
if __name__ == "__main__": |
|
import sys |
|
import pprint |
|
tdefs = load_tokendefs("tokendefs.txt") |
|
keywords = load_keywords("keywords.txt") |
|
input = None |
|
if len(sys.argv) > 1 and not sys.argv[-1].startswith('--'): |
|
input = open(sys.argv[-1]).read() |
|
else: |
|
input = sys.stdin.read() |
|
tokens = tokenize(tdefs, keywords, input) |
|
if '--tokens' in sys.argv: |
|
pprint.pprint(tokens) |
|
sys.exit(0) |
|
ast = parse(tokens) |
|
if '--ast' in sys.argv: |
|
pprint.pprint(ast) |
|
sys.exit(0) |
|
output = generate(ast) |
|
sys.stdout.write(output) |