Skip to content

Instantly share code, notes, and snippets.

@pchiusano
Last active November 7, 2023 12:53
Show Gist options
  • Save pchiusano/1d7e498063dc1f9f4e24 to your computer and use it in GitHub Desktop.
Save pchiusano/1d7e498063dc1f9f4e24 to your computer and use it in GitHub Desktop.
Simple JSON parser combinator library that does not use zippers
// WARNING! totally untested, I have only compiled the code! :)
package json
import collection.immutable.Map
import scalaz.{\/, MonadPlus}
import scalaz.\/._
import scalaz.std.vector._
import scalaz.std.map._
import scalaz.std.list._
import scalaz.syntax.traverse._
import Parser._
object Example {
val M: MonadPlus[Parser] = Parser.parser
case class Foo(label: String, values: Vector[Double])
/*
* A parser that parses a `Foo` from JSON documents of this form:
* [x, { "foo" : ["foolabel", [1.0, 2.0, 3.0]], ... }, ...]
*/
val p: Parser[Foo] =
1 :: "foo" :: M.apply2(0 :: string,
1 :: array(double))(Foo(_,_))
// NB: if we like, we could introduce `product2(p1, p2)(f)`
// as alias for `M.apply2(0 :: p1, 1 :: p2)(f)`, and so on
// for other arities
/*
* Unions are easy, too. This parses documents of the form:
*
* { "tag" : "theleft",
* "value" : ["uno", "dos", "tres"]
* }
* OR
* { "tag" : "theright",
* "value" : { "name" : "json",
* "status" : "annoying",
* "zippers-useful-for-parsing" : "no" }
* }
*/
val p2: Parser[Either[List[String], Map[String,String]]] =
union("tag", "value") {
case "theleft" => array(string).map(_.toList).map(Left(_))
case "theright" => obj(string).map(Right(_))
case other => fail("unknown tag: " + other)
}
}
/** A JSON AST. */
trait Json
object Json {
case object Null extends Json
case class N(value: Double) extends Json
case class B(value: Boolean) extends Json
case class S(value: String) extends Json
case class Array(values: Vector[Json]) extends Json
case class Object(values: Map[String,Json]) extends Json
}
/*
* The simplest thing that could possibly work. Parsers are just
* regular functions from the AST to an `Either`. Notice that with
* this model, we need not perform allocation unless we need to actually
* return a result (modulo allocating objects for `Map` lookups, etc).
* We just use Scala's regular call stack!
*
* In contrast, using zippers means that combinators like `at` are making
* relative movements by transforming `Cursor` objects. The `Cursor` is
* effectively a substitute for the call stack. Rather than simply extracting
* a subtree from the AST and passing that to a subparser, we are transforming
* the cursor to point to a different location and then calling the subparser
* to achieve the same effect! This is all a bit overkill. There's nothing wrong
* with just using Scala's regular call stack, with the movement patterns wrapped
* up into combinators like `Parser.array`, `Parser.object`, `Parser.at`, etc.
*
* Note that this is the same basic model used by Haskell's Aeson library, which
* has excellent performance. That library has some CPS'ing thrown in, but it is
* the same idea as here.
*/
case class Parser[+A](run: Json => Error \/ A) {
def flatMap[B](f: A => Parser[B]): Parser[B] =
Parser { j => run(j) flatMap (f andThen (_ run j)) }
def map[B](f: A => B): Parser[B] =
Parser { j => run(j) map f }
def or[A2>:A](p2: => Parser[A2]): Parser[A2] =
Parser { j => run(j) orElse p2.run(j) }
// a bit of syntax sugar, for supporting
// "foo" :: "bar" :: Parser.array(Parser.number)
def ::(key: String): Parser[A] =
Parser.at(key)(this)
def ::(index: Int): Parser[A] =
Parser.at(index)(this)
}
object Parser {
import Json._
implicit val parser = new MonadPlus[Parser] {
def point[A](a: => A): Parser[A] = Parser.unit(a)
def bind[A,B](a: Parser[A])(f: A => Parser[B]): Parser[B] = a flatMap f
def empty[A]: Parser[A] = zero
def plus[A](p: Parser[A], p2: => Parser[A]): Parser[A] = p or p2
}
type Error = List[String] // stack of what
/** The parser which always succeeds with the given value. */
def unit[A](a: => A): Parser[A] = Parser { _ => right(a) }
/** The parser which always fails and adds nothing to the error stack. */
def zero[A]: Parser[A] = Parser { _ => left(Nil) }
/** The parser which always fails and adds `msg` to the error stack. */
def fail[A](msg: String): Parser[A] = Parser { _ => left(List(msg)) }
/** Return the current subtree. */
def value: Parser[Json] = Parser { j => right(j) }
/** Parse a `Double` from the current location, assumed to be a `Json.N`. */
def double: Parser[Double] = Parser {
case N(n) => right(n)
case j => left(List("not a number: " + j))
}
/** Parse an `Int` from the current location, assumed to be a `Json.N`. */
def floor: Parser[Int] = double.map(_.toInt)
/** Parse a `String` from the current location, assumed to be a `Json.S`. */
def string: Parser[String] = Parser {
case S(s) => right(s)
case j => left(List("not a string: " + j))
}
/** Parse a `String` from the current location, assumed to be a `Json.B`. */
def boolean: Parser[Boolean] = Parser {
case B(b) => right(b)
case j => left(List("not a boolean: " + j))
}
/** Pushes `label` onto the `Error` stack if `p` fails. */
def scope[A](label: String)(p: Parser[A]): Parser[A] =
Parser { j => p.run(j).leftMap { label :: _ } }
/**
* Run `p` at the subtree of the input corresponding
* to the given `Object` key.
*/
def at[A](key: String)(p: Parser[A]): Parser[A] =
scope(key) { Parser {
case Object(m) => m.get(key) match {
case None => left(List("key not found in: " + m))
case Some(j) => p.run(j) // notice `p` gets no access to parents
// also notice that `p` need not traverse
// from the root of the AST
}
case j => left(List("not an object: " + j))
}}
/**
* Run `p` at the subtree of the input corresponding
* to the given `Array` index (0-based).
*/
def at[A](index: Int)(p: Parser[A]): Parser[A] =
scope("index " + index) { Parser {
case Array(v) =>
if (index < v.length) p.run(v(index))
else left(List("invalid index in " + v))
case j => left(List("not an array: " + j))
}}
/**
* Parse a `Vector` of `A` values from the current subtree,
* assumed to be a `Json.Array`.
*/
def array[A](p: Parser[A]): Parser[Vector[A]] =
Parser {
case Array(v) => v.traverseU(p run _)
case j => left(List("not an array: " + j))
}
/**
* Parse a `Map` of `A` values from the current subtree,
* assumed to be a `Json.Object`.
*/
def obj[A](p: Parser[A]): Parser[Map[String,A]] =
Parser {
// case Object(m) => m.traverseU(p run _) // this "should" work
case Object(m) =>
val entries = m.toList
entries.map(_._2).traverseU(p run _).fold(
left,
as => right(entries.view.map(_._1).zip(as).toMap)
)
case j => left(List("not an object: " + j))
}
/**
* Parse a union from a `Json.Object`, where the `tag` field is
* used to select a `Parser` using `f`, which is run on the `v` field.
*/
def union[A](tag: String, value: String)(f: String => Parser[A]): Parser[A] =
(tag :: string).flatMap { t => value :: f(t) }
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment