Skip to content

Instantly share code, notes, and snippets.

@t-sin
Last active July 17, 2021 01:08
Show Gist options
  • Save t-sin/aaa782a2ea71b4b319c4a33e336be5e1 to your computer and use it in GitHub Desktop.
Save t-sin/aaa782a2ea71b4b319c4a33e336be5e1 to your computer and use it in GitHub Desktop.
入力が不完全なとき停止して、新たな入力から再開できるパーサーの実験
EMPTY = 0
class Stack():
def __init__(self):
self.stack = []
def push(self, x):
self.stack.append(x)
def pop(self):
return self.stack.pop()
def peek(self):
if len(self.stack) == 0:
return EMPTY
return self.stack[-1]
def depth(self):
return len(self.stack)
EOF = 0
class InputStream():
def __init__(self, str):
self.input = str
self.pos = 0
def peek(self):
if self.pos >= len(self.input):
return EOF
return self.input[self.pos]
def read(self):
if self.pos >= len(self.input):
return EOF
ch = self.input[self.pos]
self.pos += 1
return ch
def parse_symbol(input):
symbol = []
ch = input.peek()
while not ch in [EOF, '(', ')', ' ']:
symbol.append(input.read())
ch = input.peek()
return ''.join([c for c in symbol if c != EOF])
PARSE_INCOMPLETE = 'incomplete'
PARSING_LIST = 1
def parse(stack, input):
state = None
parsed = None
while True:
ch = input.peek()
if ch == EOF:
if stack.depth() == 0:
# パース中のものがないとき
return parsed
else:
# パース中のものがあるとき
return PARSE_INCOMPLETE
elif ch == '(':
input.read()
# リストここまでというマーカーをいれておく
stack.push(PARSING_LIST)
elif ch == ')':
input.read()
tmp_list = []
while stack.peek() != PARSING_LIST:
tmp_list.append(stack.pop())
tmp_list = tmp_list[::-1]
stack.pop()
if len(tmp_list) < 1:
raise Error('assertion failed: tmp_list is too short!')
if stack.depth() == 0:
# トップレベルのリスト
parsed = tmp_list
else:
# リストの中のリスト
stack.push(tmp_list)
elif ch == ' ':
input.read()
else:
symbol = parse_symbol(input)
if stack.depth() == 0:
# トップレベルのシンボル
parsed = symbol
else:
# リストの中のシンボル
stack.push(symbol)
COMPLETE_INPUT = "(defun hoge (n) (unless (zerop n) (print n)))"
INCOMPLETE_INPUT1 = "(defun hoge (n) (unless"
INCOMPLETE_INPUT2 = "(zerop n)"
INCOMPLETE_INPUT3 = " (print n)))"
INCOMPLETE_INPUT_CONT = [INCOMPLETE_INPUT2, INCOMPLETE_INPUT3]
# stream = InputStream('abced')
# stream = InputStream(COMPLETE_INPUT)
stream = InputStream(INCOMPLETE_INPUT1)
stack = Stack()
res = parse(stack, stream)
if res == PARSE_INCOMPLETE:
idx = 0
while res == PARSE_INCOMPLETE:
print('incomplete input!')
res = parse(stack, InputStream(INCOMPLETE_INPUT_CONT[idx]))
idx += 1
print('parsed: {}'.format(res))
else:
print('parsed: {}'.format(res))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment