Last active
November 7, 2023 12:53
-
-
Save pchiusano/1d7e498063dc1f9f4e24 to your computer and use it in GitHub Desktop.
Simple JSON parser combinator library that does not use zippers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// WARNING! totally untested, I have only compiled the code! :) | |
package json | |
import collection.immutable.Map | |
import scalaz.{\/, MonadPlus} | |
import scalaz.\/._ | |
import scalaz.std.vector._ | |
import scalaz.std.map._ | |
import scalaz.std.list._ | |
import scalaz.syntax.traverse._ | |
import Parser._ | |
object Example { | |
val M: MonadPlus[Parser] = Parser.parser | |
case class Foo(label: String, values: Vector[Double]) | |
/* | |
* A parser that parses a `Foo` from JSON documents of this form: | |
* [x, { "foo" : ["foolabel", [1.0, 2.0, 3.0]], ... }, ...] | |
*/ | |
val p: Parser[Foo] = | |
1 :: "foo" :: M.apply2(0 :: string, | |
1 :: array(double))(Foo(_,_)) | |
// NB: if we like, we could introduce `product2(p1, p2)(f)` | |
// as alias for `M.apply2(0 :: p1, 1 :: p2)(f)`, and so on | |
// for other arities | |
/* | |
* Unions are easy, too. This parses documents of the form: | |
* | |
* { "tag" : "theleft", | |
* "value" : ["uno", "dos", "tres"] | |
* } | |
* OR | |
* { "tag" : "theright", | |
* "value" : { "name" : "json", | |
* "status" : "annoying", | |
* "zippers-useful-for-parsing" : "no" } | |
* } | |
*/ | |
val p2: Parser[Either[List[String], Map[String,String]]] = | |
union("tag", "value") { | |
case "theleft" => array(string).map(_.toList).map(Left(_)) | |
case "theright" => obj(string).map(Right(_)) | |
case other => fail("unknown tag: " + other) | |
} | |
} | |
/** A JSON AST. */ | |
trait Json | |
object Json { | |
case object Null extends Json | |
case class N(value: Double) extends Json | |
case class B(value: Boolean) extends Json | |
case class S(value: String) extends Json | |
case class Array(values: Vector[Json]) extends Json | |
case class Object(values: Map[String,Json]) extends Json | |
} | |
/* | |
* The simplest thing that could possibly work. Parsers are just | |
* regular functions from the AST to an `Either`. Notice that with | |
* this model, we need not perform allocation unless we need to actually | |
* return a result (modulo allocating objects for `Map` lookups, etc). | |
* We just use Scala's regular call stack! | |
* | |
* In contrast, using zippers means that combinators like `at` are making | |
* relative movements by transforming `Cursor` objects. The `Cursor` is | |
* effectively a substitute for the call stack. Rather than simply extracting | |
* a subtree from the AST and passing that to a subparser, we are transforming | |
* the cursor to point to a different location and then calling the subparser | |
* to achieve the same effect! This is all a bit overkill. There's nothing wrong | |
* with just using Scala's regular call stack, with the movement patterns wrapped | |
* up into combinators like `Parser.array`, `Parser.object`, `Parser.at`, etc. | |
* | |
* Note that this is the same basic model used by Haskell's Aeson library, which | |
* has excellent performance. That library has some CPS'ing thrown in, but it is | |
* the same idea as here. | |
*/ | |
case class Parser[+A](run: Json => Error \/ A) { | |
def flatMap[B](f: A => Parser[B]): Parser[B] = | |
Parser { j => run(j) flatMap (f andThen (_ run j)) } | |
def map[B](f: A => B): Parser[B] = | |
Parser { j => run(j) map f } | |
def or[A2>:A](p2: => Parser[A2]): Parser[A2] = | |
Parser { j => run(j) orElse p2.run(j) } | |
// a bit of syntax sugar, for supporting | |
// "foo" :: "bar" :: Parser.array(Parser.number) | |
def ::(key: String): Parser[A] = | |
Parser.at(key)(this) | |
def ::(index: Int): Parser[A] = | |
Parser.at(index)(this) | |
} | |
object Parser { | |
import Json._ | |
implicit val parser = new MonadPlus[Parser] { | |
def point[A](a: => A): Parser[A] = Parser.unit(a) | |
def bind[A,B](a: Parser[A])(f: A => Parser[B]): Parser[B] = a flatMap f | |
def empty[A]: Parser[A] = zero | |
def plus[A](p: Parser[A], p2: => Parser[A]): Parser[A] = p or p2 | |
} | |
type Error = List[String] // stack of what | |
/** The parser which always succeeds with the given value. */ | |
def unit[A](a: => A): Parser[A] = Parser { _ => right(a) } | |
/** The parser which always fails and adds nothing to the error stack. */ | |
def zero[A]: Parser[A] = Parser { _ => left(Nil) } | |
/** The parser which always fails and adds `msg` to the error stack. */ | |
def fail[A](msg: String): Parser[A] = Parser { _ => left(List(msg)) } | |
/** Return the current subtree. */ | |
def value: Parser[Json] = Parser { j => right(j) } | |
/** Parse a `Double` from the current location, assumed to be a `Json.N`. */ | |
def double: Parser[Double] = Parser { | |
case N(n) => right(n) | |
case j => left(List("not a number: " + j)) | |
} | |
/** Parse an `Int` from the current location, assumed to be a `Json.N`. */ | |
def floor: Parser[Int] = double.map(_.toInt) | |
/** Parse a `String` from the current location, assumed to be a `Json.S`. */ | |
def string: Parser[String] = Parser { | |
case S(s) => right(s) | |
case j => left(List("not a string: " + j)) | |
} | |
/** Parse a `String` from the current location, assumed to be a `Json.B`. */ | |
def boolean: Parser[Boolean] = Parser { | |
case B(b) => right(b) | |
case j => left(List("not a boolean: " + j)) | |
} | |
/** Pushes `label` onto the `Error` stack if `p` fails. */ | |
def scope[A](label: String)(p: Parser[A]): Parser[A] = | |
Parser { j => p.run(j).leftMap { label :: _ } } | |
/** | |
* Run `p` at the subtree of the input corresponding | |
* to the given `Object` key. | |
*/ | |
def at[A](key: String)(p: Parser[A]): Parser[A] = | |
scope(key) { Parser { | |
case Object(m) => m.get(key) match { | |
case None => left(List("key not found in: " + m)) | |
case Some(j) => p.run(j) // notice `p` gets no access to parents | |
// also notice that `p` need not traverse | |
// from the root of the AST | |
} | |
case j => left(List("not an object: " + j)) | |
}} | |
/** | |
* Run `p` at the subtree of the input corresponding | |
* to the given `Array` index (0-based). | |
*/ | |
def at[A](index: Int)(p: Parser[A]): Parser[A] = | |
scope("index " + index) { Parser { | |
case Array(v) => | |
if (index < v.length) p.run(v(index)) | |
else left(List("invalid index in " + v)) | |
case j => left(List("not an array: " + j)) | |
}} | |
/** | |
* Parse a `Vector` of `A` values from the current subtree, | |
* assumed to be a `Json.Array`. | |
*/ | |
def array[A](p: Parser[A]): Parser[Vector[A]] = | |
Parser { | |
case Array(v) => v.traverseU(p run _) | |
case j => left(List("not an array: " + j)) | |
} | |
/** | |
* Parse a `Map` of `A` values from the current subtree, | |
* assumed to be a `Json.Object`. | |
*/ | |
def obj[A](p: Parser[A]): Parser[Map[String,A]] = | |
Parser { | |
// case Object(m) => m.traverseU(p run _) // this "should" work | |
case Object(m) => | |
val entries = m.toList | |
entries.map(_._2).traverseU(p run _).fold( | |
left, | |
as => right(entries.view.map(_._1).zip(as).toMap) | |
) | |
case j => left(List("not an object: " + j)) | |
} | |
/** | |
* Parse a union from a `Json.Object`, where the `tag` field is | |
* used to select a `Parser` using `f`, which is run on the `v` field. | |
*/ | |
def union[A](tag: String, value: String)(f: String => Parser[A]): Parser[A] = | |
(tag :: string).flatMap { t => value :: f(t) } | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment