Created
March 26, 2012 23:41
-
-
Save dgraham/2210713 to your computer and use it in GitHub Desktop.
Fibered Recursive Descent JSON Parser
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'fiber' | |
# A resumable, recursive descent JSON parser, using Fibers. | |
# http://www.negativecode.com/posts/2012/03/26/fibers-resumable-recursive-descent-json-parsing/ | |
class Parser | |
Result = Struct.new(:value) | |
def initialize | |
@finished = false | |
@scanner = Scanner.new | |
@fiber = Fiber.new do | |
result = parse.value | |
@finished = true | |
error unless @scanner.empty? | |
Fiber.yield result | |
end | |
end | |
def <<(data) | |
@scanner << data | |
if @finished | |
error unless @scanner.empty? | |
else | |
@fiber.resume | |
end | |
end | |
private | |
def parse | |
whitespace | |
parse_object || | |
parse_array || | |
error | |
end | |
def parse_value | |
whitespace | |
parse_object || | |
parse_array || | |
parse_string || | |
parse_number || | |
parse_true || | |
parse_false || | |
parse_null || | |
error | |
ensure | |
whitespace | |
end | |
def parse_object | |
parse_container('{}', {}) do |object| | |
key = parse_string || error | |
error unless @scanner.scan(':') | |
object[key.value] = parse_value.value | |
end | |
end | |
def parse_array | |
parse_container('[]', []) do |array| | |
array << parse_value.value | |
end | |
end | |
def parse_container(delimiters, result) | |
open, close = delimiters[0], delimiters[1] | |
return unless @scanner.scan(open) | |
whitespace | |
more = false | |
while @scanner.peek != close | |
yield result | |
ch = @scanner.peek | |
error unless ch == ',' || ch == close | |
more = (ch == ',') | |
@scanner.scan(',') if ch == ',' | |
end | |
error if more | |
@scanner.scan(close) | |
Result.new(result) | |
end | |
def parse_true | |
parse_keyword('true', true) | |
end | |
def parse_false | |
parse_keyword('false', false) | |
end | |
def parse_null | |
parse_keyword('null', nil) | |
end | |
def parse_keyword(word, value) | |
match = word.each_char.all? do |ch| | |
@scanner.scan(ch) | |
end | |
match ? Result.new(value) : nil | |
end | |
def parse_number | |
if ch = @scanner.scan(/\d/) | |
ch << @scanner.scan_until(/\D/) | |
Result.new(ch.to_i) | |
end | |
end | |
def parse_string | |
whitespace | |
return unless @scanner.scan('"') | |
value = @scanner.scan_until(/"/) | |
@scanner.scan('"') | |
whitespace | |
Result.new(value) | |
end | |
def whitespace | |
@scanner.skip(/\s/) | |
end | |
def error | |
raise "grammar error at character #{@scanner.position}" | |
end | |
end | |
class Scanner | |
attr_reader :position | |
def initialize | |
@buf = [] | |
@current = nil | |
@position = 0 | |
end | |
def empty? | |
[email protected]? {|ch| /\S/ =~ ch } | |
end | |
def <<(data) | |
@buf += data.split('') | |
end | |
def peek | |
return @current if @current | |
Fiber.yield if @buf.empty? | |
@buf.shift.tap do |ch| | |
@current = ch | |
@position += 1 | |
end | |
end | |
def skip(re) | |
while scan(re); end | |
end | |
def scan(re) | |
ch = peek | |
if re.is_a?(String) ? ch == re : re.match(ch) | |
@current = nil | |
ch | |
end | |
end | |
def scan_until(re) | |
''.tap do |buf| | |
until (ch = peek) =~ re || ch == nil | |
buf << ch | |
@current = nil | |
end | |
end | |
end | |
end | |
parser = Parser.new | |
json = '[1, 2, 3, [4, 5, 6]]' | |
json.each_char do |ch| | |
if result = parser << ch | |
puts result.inspect | |
end | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment