Skip to content

Instantly share code, notes, and snippets.

@dabrahams
Last active May 20, 2022 20:16
Show Gist options
  • Save dabrahams/56669d75236d16cd72fd4d9a1077a5db to your computer and use it in GitHub Desktop.
Save dabrahams/56669d75236d16cd72fd4d9a1077a5db to your computer and use it in GitHub Desktop.
Quick/Dirty Chart Parser.
/// A simple bottom-up chart parser.
///
/// - The chart is a lookup table of sets of partial parses (a.k.a. dotted
/// rules), indexed by (predicted-symbol, source-location) pairs.
///
/// - A partial parse is a triple (B, R, N) where:
/// - B is the position in the token stream where the partially-parsed input begins.
/// - R is the partially-recognized BNF rule.
/// - N is the number of RHS symbols of R that have been recognized.
///
/// - A chart location L is described by a pair (S, E) where, for all partial
/// parses P = (B, R, N) at L
/// - E is the position in the token stream where the input corresponding to P ends.
/// - The Nth RHS symbol of R is S
///
struct ChartParser {
/// Terminals and nonterminals are identified by a symbol ID.
typealias SymbolID = Int
/// The symbols of all rules, stored end-to-end.
typealias RuleStore = [SymbolID]
/// The RHS symbols of a parse rule followed by the rule's LHS symbol.
typealias Rule = RuleStore.SubSequence
/// Range of positions constituting a rule's RHS in `ruleStore`.
typealias RuleID = Range<RuleStore.Index>
/// A position in the input.
typealias SourcePosition = Int
/// A range of positions in the input.
typealias SourceRegion = Range<SourcePosition>
/// A location in the parse chart.
struct ChartLocation: Hashable {
let expecting: SymbolID
let at: SourcePosition
}
/// A parse rule being matched.
struct PartialParse {
/// The positions in ruleStore of yet-to-be recognized RHS symbols.
var expected: Range<RuleStore.Index>
/// The position in the token stream where the partially-parsed input begins.
var start: SourcePosition
/// True iff the entire RHS has been recognized.
var isComplete: Bool { expected.isEmpty }
}
/// Storage for all the rules.
var ruleStore: RuleStore = []
/// Rules indexed by their first RHS symbol.
var rulesByRHSStart: [SymbolID: [RuleID]] = [:]
/// All the partial parses so far.
var chart: [ChartLocation: [PartialParse]] = [:]
}
/// Initialization and algorithm.
extension ChartParser {
/// Creates a parser for the given grammar.
init<Grammar: Collection, RHS: Collection>(_ grammar: Grammar)
where Grammar.Element == (lhs: SymbolID, rhs: RHS),
RHS.Element == SymbolID
{
for r in grammar {
let start = ruleStore.count
ruleStore.append(contentsOf: r.rhs)
rulesByRHSStart[r.rhs.first!, default: []].append(start..<ruleStore.count)
ruleStore.append(r.lhs)
}
}
/// Parses the sequence of symbols in `source`.
mutating func parse<S: Collection>(_ source: S) where S.Element == SymbolID {
print("parse:", source.map(symbolName))
// Recognize each token over its range in the source.
for (i, s) in source.enumerated() {
recognize(s, spanningInput: i ..< i + 1)
}
}
/// Recognizes the expected (post-dot) symbol of `r`, ending at `l`.
private mutating func advance(_ r: PartialParse, to l: SourcePosition) {
print("\(r.start..<l): advance\t", ruleString(r.expected), "to", l)
var p = r
_ = p.expected.popFirst()
if p.isComplete {
let lhs = ruleStore[p.expected.upperBound]
recognize(lhs, spanningInput: p.start..<l)
}
else {
let postdot = ruleStore[p.expected.first!]
chart[ChartLocation(expecting: postdot, at: l), default: []].append(p)
}
}
/// Recognizes `s` over `g` in the input.
private mutating func recognize(_ s: SymbolID, spanningInput g: SourceRegion) {
print("\(g): recognize\t", symbolName(s))
// Advance all rules that are expecting s at g.lowerBound.
if let advancingRules = chart[ChartLocation(expecting: s, at: g.lowerBound)] {
for p in advancingRules {
advance(p, to: g.upperBound)
}
}
// Initiate all rules that start by expecting s.
for r in rulesByRHSStart[s, default: []] {
print("\(g): initiate\t", ruleString(r))
advance(PartialParse(expected: r, start: g.lowerBound), to: g.upperBound)
}
}
}
/// ------ Testing and diagnostics ------
/// Diagnostic output
extension ChartParser {
/// Returns the name of symbol s
func symbolName(_ s: SymbolID) -> String {
"\(Symbol.names[s])"
}
func ruleString(_ r: RuleID) -> String {
let r1 = rulesByRHSStart.values.lazy.joined().first { $0.upperBound == r.upperBound }!
let head = "\(symbolName(ruleStore[r.upperBound])) => "
let rhsPrefix = r1 == r ? ""
: (ruleStore[r1.lowerBound..<r.lowerBound].map(symbolName) + ["."])
.joined(separator: " ")
let tail = ruleStore[r].lazy.map(symbolName).joined(separator: " ")
return head + rhsPrefix + tail
}
}
/// Grammar definition
enum Symbol: Int, CaseIterable {
case LPAREN, RPAREN, NUMBER, PLUS, MINUS, TIMES, DIVIDE
case term, factor, expr
static let names = Array(allCases)
}
/// A tiny DSL for defining grammar rules.
infix operator => : AssignmentPrecedence
infix operator ~: MultiplicationPrecedence
func ~(a: Symbol, b: Symbol) -> [Symbol] { [a, b] }
func ~(a: [Symbol], b: Symbol) -> [Symbol] { a + [b] }
func =>(lhs: Symbol, rhs: Symbol) -> [Symbol] { [lhs, rhs] }
func =>(lhs: Symbol, rhs: [Symbol]) -> [Symbol] { [lhs] + rhs }
let grammar: [[Symbol]] = [
.expr => .term,
.expr => .term ~ .PLUS ~ .term,
.expr => .term ~ .MINUS ~ .term,
.term => .factor,
.term => .factor ~ .TIMES ~ .factor,
.term => .factor ~ .DIVIDE ~ .factor,
.factor => .NUMBER,
.factor => .LPAREN ~ .expr ~ .RPAREN,
.factor => .PLUS ~ .factor,
.factor => .MINUS ~ .factor,
]
var parser = ChartParser(
grammar.lazy.map {
(lhs: $0.first!.rawValue, rhs: $0.dropFirst().lazy.map { $0.rawValue })
}
)
let input: [Symbol] = [
.MINUS, .LPAREN, .NUMBER, .PLUS, .NUMBER, .TIMES, .MINUS, .NUMBER, .RPAREN,
.DIVIDE, .NUMBER,
.PLUS, .NUMBER]
parser.parse(input.lazy.map {$0.rawValue})
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment