Last active
July 17, 2021 01:08
-
-
Save t-sin/aaa782a2ea71b4b319c4a33e336be5e1 to your computer and use it in GitHub Desktop.
入力が不完全なとき停止して、新たな入力から再開できるパーサーの実験
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
EMPTY = 0 | |
class Stack(): | |
def __init__(self): | |
self.stack = [] | |
def push(self, x): | |
self.stack.append(x) | |
def pop(self): | |
return self.stack.pop() | |
def peek(self): | |
if len(self.stack) == 0: | |
return EMPTY | |
return self.stack[-1] | |
def depth(self): | |
return len(self.stack) | |
EOF = 0 | |
class InputStream(): | |
def __init__(self, str): | |
self.input = str | |
self.pos = 0 | |
def peek(self): | |
if self.pos >= len(self.input): | |
return EOF | |
return self.input[self.pos] | |
def read(self): | |
if self.pos >= len(self.input): | |
return EOF | |
ch = self.input[self.pos] | |
self.pos += 1 | |
return ch | |
def parse_symbol(input): | |
symbol = [] | |
ch = input.peek() | |
while not ch in [EOF, '(', ')', ' ']: | |
symbol.append(input.read()) | |
ch = input.peek() | |
return ''.join([c for c in symbol if c != EOF]) | |
PARSE_INCOMPLETE = 'incomplete' | |
PARSING_LIST = 1 | |
def parse(stack, input): | |
state = None | |
parsed = None | |
while True: | |
ch = input.peek() | |
if ch == EOF: | |
if stack.depth() == 0: | |
# パース中のものがないとき | |
return parsed | |
else: | |
# パース中のものがあるとき | |
return PARSE_INCOMPLETE | |
elif ch == '(': | |
input.read() | |
# リストここまでというマーカーをいれておく | |
stack.push(PARSING_LIST) | |
elif ch == ')': | |
input.read() | |
tmp_list = [] | |
while stack.peek() != PARSING_LIST: | |
tmp_list.append(stack.pop()) | |
tmp_list = tmp_list[::-1] | |
stack.pop() | |
if len(tmp_list) < 1: | |
raise Error('assertion failed: tmp_list is too short!') | |
if stack.depth() == 0: | |
# トップレベルのリスト | |
parsed = tmp_list | |
else: | |
# リストの中のリスト | |
stack.push(tmp_list) | |
elif ch == ' ': | |
input.read() | |
else: | |
symbol = parse_symbol(input) | |
if stack.depth() == 0: | |
# トップレベルのシンボル | |
parsed = symbol | |
else: | |
# リストの中のシンボル | |
stack.push(symbol) | |
COMPLETE_INPUT = "(defun hoge (n) (unless (zerop n) (print n)))" | |
INCOMPLETE_INPUT1 = "(defun hoge (n) (unless" | |
INCOMPLETE_INPUT2 = "(zerop n)" | |
INCOMPLETE_INPUT3 = " (print n)))" | |
INCOMPLETE_INPUT_CONT = [INCOMPLETE_INPUT2, INCOMPLETE_INPUT3] | |
# stream = InputStream('abced') | |
# stream = InputStream(COMPLETE_INPUT) | |
stream = InputStream(INCOMPLETE_INPUT1) | |
stack = Stack() | |
res = parse(stack, stream) | |
if res == PARSE_INCOMPLETE: | |
idx = 0 | |
while res == PARSE_INCOMPLETE: | |
print('incomplete input!') | |
res = parse(stack, InputStream(INCOMPLETE_INPUT_CONT[idx])) | |
idx += 1 | |
print('parsed: {}'.format(res)) | |
else: | |
print('parsed: {}'.format(res)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment