Created
June 7, 2016 02:01
-
-
Save kiran/24935714f36145cff4d15e352f15b827 to your computer and use it in GitHub Desktop.
writing a simple lisp parser in python
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# turn lisp statements into an AST | |
# going off a super basic grammar: | |
# - atoms are numbers or symbols | |
Number = (int, float) | |
Symbol = str | |
Atom = (Number, Symbol) | |
def parse(program): | |
# lex | |
tokens = tokenize(program) | |
# and parse | |
ast = build_ast(tokens) | |
return ast | |
def tokenize(program): | |
"turn a string into a series of tokens" | |
return program.replace('(', ' ( ').replace(')', ' ) ').split() | |
def build_ast(tokens): | |
"build an expression from lexed tokens" | |
ast = [] | |
if len(tokens) == 0: | |
raise SyntaxError('unexpected EOF while reading string') | |
tok = tokens.pop(0) | |
if tok == '(': | |
subtree = [] | |
while tokens[0] != ')': # peek to see if we've hit the end of the expression | |
subtree.append(build_ast(tokens)) | |
tokens.pop(0) # pop off the ')' | |
return subtree | |
elif tok == ')': | |
# hit the end of an expression without an ( | |
raise SyntaxError('unexpected ) while reading string') | |
else: | |
return atom(tok) | |
def atom(token): | |
try: | |
return int(token) | |
except ValueError: | |
try: | |
return float(token) | |
except ValueError: | |
return Symbol(token) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import unittest | |
import lisp_parser | |
class Lexer(unittest.TestCase): | |
def test_simplest_statement(self): | |
self.assertEqual(lisp_parser.tokenize('()'), ['(', ')']) | |
def test_nested_statement(self): | |
nested_statement = '(penguins (add n1 n2))' | |
expected_list = ['(', 'penguins', '(', 'add', 'n1', 'n2', ')', ')'] | |
self.assertEqual(lisp_parser.tokenize(nested_statement), expected_list) | |
class Parser(unittest.TestCase): | |
def test_statement(self): | |
self.assertEqual(lisp_parser.parse('(+ 1 2)'), ['+', 1, 2]) | |
def test_nesting(self): | |
nested_statement = '(penguins (+ 1 2) 3)' | |
expected_list = ['penguins', ['+', 1, 2], 3] | |
self.assertEqual(lisp_parser.parse(nested_statement), expected_list) | |
if __name__ == '__main__': | |
unittest.main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment