Skip to content

Instantly share code, notes, and snippets.

@joneshf
Forked from natefaubion/Grammar.purs
Created June 19, 2017 13:26
Show Gist options
  • Save joneshf/cdace1eeaf826b5c2a2a0fca5b99ce9e to your computer and use it in GitHub Desktop.
Save joneshf/cdace1eeaf826b5c2a2a0fca5b99ce9e to your computer and use it in GitHub Desktop.
module Grammar where
import Prelude
import Control.Applicative.Free (FreeAp, liftFreeAp, foldFreeAp)
import Control.Alt (class Alt, (<|>))
import Control.Alternative (class Alternative)
import Control.Monad.Eff (Eff)
import Control.Monad.Eff.Console (log, CONSOLE)
import Control.Plus (class Plus, empty)
import Data.Const (Const)
import Data.Either (Either)
import Data.Exists (Exists, mkExists, runExists)
import Data.Foldable (class Foldable, foldl)
import Data.Identity (Identity)
import Data.Leibniz (type (~), coerceSymm)
import Data.List (List(..))
import Data.List as List
import Data.Newtype (wrap, unwrap)
import Data.String as String
import Text.Parsing.Parser (ParserT)
import Text.Parsing.Parser (runParser, ParseError) as Parser
import Text.Parsing.Parser.Combinators (notFollowedBy, choice) as Parser
import Text.Parsing.Parser.String (char, anyChar) as Parser
import Unsafe.Coerce (unsafeCoerce)
-------------------------------------------------------------------------------
--- | Grammar | ---
-------------------------------------------------------------------------------
data GTerm (r ∷ * → *) (p ∷ * → *) t a
= Terminal (a ~ t) t
| NonTerminal (r a)
| Primitive (p a)
data Inj (f ∷ * → *) (g ∷ * → *) a = Inj (a → g a) (Exists f)
mkInj ∷ ∀ f g a b. Applicative g ⇒ (g a → b) → f a → Inj f g b
mkInj _ = Inj pure <<< mkExists
runInj ∷ ∀ f g h a. (∀ b. f b → h (g b)) → Inj f g a → h a
runInj f (Inj _ x) = runExists (unsafeCoerce <<< f) x
data Grammar (r ∷ * → *) (p ∷ * → *) t a
= Ap (FreeAp (Grammar r p t) a)
| Alt (List (Grammar r p t a))
| Repeat (Inj (Grammar r p t) List a)
| Term (GTerm r p t a)
instance functorGrammar ∷ Functor (Grammar r p t) where
map f (Ap a) = Ap (f <$> a)
map f (Alt a) = Alt (map f <$> a)
map f (Term a) = Ap (f <$> liftFreeAp (Term a))
map f a = Ap (f <$> liftFreeAp a)
instance applyGrammar ∷ Apply (Grammar r p t) where
apply (Ap f) (Ap a) = Ap (f <*> a)
apply f a = Ap (liftFreeAp f <*> liftFreeAp a)
instance applicativeGrammar ∷ Applicative (Grammar r p t) where
pure a = Ap (pure a)
instance altGrammar ∷ Alt (Grammar r p t) where
alt (Alt a) (Alt b) = Alt (a <> b)
alt (Alt a) b = Alt (a <> pure b)
alt a (Alt b) = Alt (pure a <> b)
alt a b = Alt (pure a <> pure b)
instance plusGrammar ∷ Plus (Grammar r p t) where
empty = Alt empty
instance alternativeGrammar ∷ Alternative (Grammar r p t)
interpretGrammar
∷ ∀ r p t f a
. (Grammar r p t ~> f)
→ (r ~> Grammar r p t)
→ r a
→ f a
interpretGrammar = (<<<)
lit ∷ ∀ r p t. t → Grammar r p t t
lit = Term <<< Terminal id
rule ∷ ∀ r p t a. r a → Grammar r p t a
rule = Term <<< NonTerminal
prim ∷ ∀ r p t a. p a → Grammar r p t a
prim = Term <<< Primitive
many ∷ ∀ r p t a. Grammar r p t a → Grammar r p t (List a)
many = Repeat <<< mkInj id
many1 ∷ ∀ r p t a. Grammar r p t a → Grammar r p t (List a)
many1 p = Cons <$> p <*> many p
sepBy ∷ ∀ r p t a b. Grammar r p t a → Grammar r p t b → Grammar r p t (List b)
sepBy p1 p2 = Cons <$> p2 <*> many (p1 *> p2) <|> pure Nil
define ∷ ∀ r p t a b. (a ~ b) → Grammar r p t b → Grammar r p t a
define _ = unsafeCoerce
infix 0 define as ::=
-------------------------------------------------------------------------------
--- | Definition | ---
-------------------------------------------------------------------------------
type Doc = List Line
type Line = List Cell
type Cell = List Char
data CSVRule a
= RCSV (a ~ Doc)
| RLine (a ~ Line)
| RCell (a ~ Cell)
data CSVPrim a
= PNotQuote (a ~ Char)
root ∷ ∀ p t. Grammar CSVRule p t Doc
root = rule (RCSV id)
line ∷ ∀ p t. Grammar CSVRule p t Line
line = rule (RLine id)
cell ∷ ∀ p t. Grammar CSVRule p t Cell
cell = rule (RCell id)
notQuote ∷ ∀ r t. Grammar r CSVPrim t Char
notQuote = prim (PNotQuote id)
csv ∷ CSVRule ~> Grammar CSVRule CSVPrim Char
csv = case _ of
RCSV p →
p ::= many line
RLine p →
p ::= commaSeparated cell <* newline
RCell p →
p ::= lit '"' *> many notQuote <* lit '"'
where
commaSeparated = sepBy (lit ',')
newline = lit '\n'
-------------------------------------------------------------------------------
--- | BNF Interpreter | ---
-------------------------------------------------------------------------------
csvBNF ∷ String
csvBNF =
String.joinWith "\n" $
map printDef
[ mkExists (RCSV id)
, mkExists (RLine id)
, mkExists (RCell id)
]
where
print ∷ ∀ a. CSVRule a → String
print = unwrap <<< interpretGrammar printer csv
printer ∷ Grammar CSVRule CSVPrim Char ~> Const String
printer = case _ of
Ap fa → wrap (joinWith ", " (unwrap (foldFreeAp (printAp <<< printer) fa)))
Alt gs → wrap (joinWith " | " (unwrap <<< printer <$> gs))
Repeat inj → runInj (\g → wrap ("{" <> unwrap (printer g) <> "}")) inj
Term t → printTerm t
printAp ∷ Const String ~> Const (List String)
printAp = wrap <<< pure <<< unwrap
printTerm ∷ GTerm CSVRule CSVPrim Char ~> Const String
printTerm = case _ of
Terminal _ t → wrap (show t)
NonTerminal r → wrap (printRule r)
Primitive p → wrap (printPrimitive p)
printRule ∷ ∀ a. CSVRule a → String
printRule = case _ of
RCSV _ → "root"
RLine _ → "line"
RCell _ → "cell"
printPrimitive ∷ ∀ a. CSVPrim a → String
printPrimitive = case _ of
PNotQuote _ → "NOT_QUOTE"
printDef ∷ Exists CSVRule → String
printDef = runExists \r → printRule r <> " = " <> print r
joinWith ∷ ∀ f. Foldable f ⇒ String → f String → String
joinWith w = foldl go ""
where
go a "" = a
go "" b = b
go a b = a <> w <> b
-------------------------------------------------------------------------------
--- | Parser Interpreter | ---
-------------------------------------------------------------------------------
type CSVParser = ParserT String Identity
csvParser ∷ CSVParser Doc
csvParser = parse (RCSV id)
where
parse ∷ CSVRule ~> CSVParser
parse a = interpretGrammar parser csv a
parser ∷ Grammar CSVRule CSVPrim Char ~> CSVParser
parser = case _ of
Ap fa → foldFreeAp parser fa
Alt gs → Parser.choice (parser <$> gs)
Repeat inj → runInj (List.many <<< parser) inj
Term t → parseTerm t
parseTerm ∷ GTerm CSVRule CSVPrim Char ~> CSVParser
parseTerm = case _ of
Terminal p t → coerceSymm p <$> Parser.char t
NonTerminal r → parse r -- TODO recursion/sharing
Primitive p → parsePrimitive p
parsePrimitive ∷ CSVPrim ~> CSVParser
parsePrimitive = case _ of
PNotQuote p →
coerceSymm p <$>
(Parser.notFollowedBy (Parser.char '"') *> Parser.anyChar)
parseCSV ∷ String → Either Parser.ParseError Doc
parseCSV = flip Parser.runParser csvParser
-------------------------------------------------------------------------------
--- | Main | ---
-------------------------------------------------------------------------------
testInput ∷ String
testInput = """"foo","bar","baz"
"a1","b2","c3"
"""
main ∷ Eff (console ∷ CONSOLE) Unit
main = do
log ""
log "Grammar:"
log ""
log csvBNF
log ""
log "Input:"
log ""
log testInput
log "Parse:"
log ""
log (show (parseCSV testInput))
log ""
<!DOCTYPE html>
<html>
<head>
<title>Interpreting Applicative Grammars</title>
<meta charset="utf-8">
<style>
@import url('https://fonts.googleapis.com/css?family=Bungee|Source+Code+Pro:400,700|Titillium+Web:400,400i,700');
body { font-family: 'Titillium Web'; }
h1 {
font-family: 'Bungee';
font-weight: normal;
}
.remark-code, .remark-inline-code { font-family: 'Source Code Pro'; }
.remark-slide-content { font-size: 32px; }
</style>
</head>
<body>
<script src="https://remarkjs.com/downloads/remark-latest.min.js"></script>
<script>
var slideshow = remark.create({
sourceUrl: 'presentation.md',
highlightLines: true
});
</script>
</body>
</html>

class: center, middle

Interpreting Applicative Grammars


name: parsers

Free Parsers?


template: parsers

Monadic Parsers

type Parser a = Free ParserF a

data ParserF a
  = Draw (Maybe Char  a)
  | Fail String
  | ...

template: parsers

Monadic Parsers

  • Simple, imperative primitives

--

  • Context-sensitive

--

  • Impossible to analyze

template: parsers

Applicative Parsers

type Parser a = Compose List (FreeAp ParserF) a

data ParserF a
  = Lit Char
  | ...

template: parsers

Applicative Parsers

  • Complex, declarative primitives

--

  • Context-free

--

  • Easy to analyze

name: grammars

Grammars


template: grammars

--

  • Stringly-typed

--

  • Awkward type class based interpreter

class: middle center

Keep it free

and well-typed


name: build

Build a Grammar


template: build

CSV

csv  = {line}
line = cell, {',', cell}, newline
cell = '"', {NOT_QUOTE} , '"'

template: build

data CSVRule
  = RCSV
  | RLine
  | RCell

--

csv  CSV -> Grammar ???

template: build name: rules

import Data.Leibniz (type (~))

type Doc  = List Line
type Line = List Cell
type Cell = List Char

data CSVRule a
  = RCSV  (a ~ Doc)
  | RLine (a ~ Line)
  | RCell (a ~ Cell)

template: rules

csv  CSVRule ~> Grammar ???

template: rules

csv  CSVRule ~> Grammar CSVRule ???

template: rules

csv  CSVRule ~> Grammar CSVRule Char ???

template: build

import Data.Leibniz (type (~))

type Doc  = List Line
type Line = List Cell
type Cell = List Char

data CSVRule a
  = RCSV  (a ~ Doc)
  | RLine (a ~ Line)
  | RCell (a ~ Cell)

data CSVPrim a
  = PNotQuote (a ~ Char)
csv  CSVRule ~> Grammar CSVRule CSVPrim Char

template: build name: gtype

data GTerm (r  *  *) (p  *  *) t a
  = Terminal (a ~ t) t
  | NonTerminal (r a)
  | Primitive (p a)

template: gtype

data Grammar (r  *  *) (p  *  *) t a
  = Term (GTerm r p t a)
  | ???

template: gtype

data Grammar (r  *  *) (p  *  *) t a
  = Term (GTerm r p t a)
  | Alt (List (Grammar r p t a))
  | ???

template: gtype

data Grammar (r  *  *) (p  *  *) t a
  = Term (GTerm r p t a)
  | Alt (List (Grammar r p t a))
  | Ap (FreeAp (Grammar r p t) a)
  | ???

class: center middle

Repetition?


template: build name: rep

many   r p t a. Grammar r p t a  Grammar r p t (List a)

template: gtype

data Grammar (r  *  *) (p  *  *) t a
  = Term (GTerm r p t a)
  | Alt (List (Grammar r p t a))
  | Ap (FreeAp (Grammar r p t) a)
* | Repeat (a ~ List a)

template: build

Repeat

  • Provide a contract for a → List a over some functor f
  • Interpreter must satisfy it

template: gtype

data Grammar (r  *  *) (p  *  *) t a
  = Term (GTerm r p t a)
  | Alt (List (Grammar r p t a))
  | Ap (FreeAp (Grammar r p t) a)
* | Repeat (Inj (Grammar r p t) List a)

template: build

data Inj (f  *  *) (g  *  *) a
  = Inj (a  g a) (Exists f)

mkInj
    f g a b
  . Applicative g
   (g a  b)
   f a
   Inj f g b
mkInj _ = Inj pure <<< mkExists

runInj
    f g h a
  . ( b. f b  h (g b))
   Inj f g a  h a
runInj f (Inj _ x) = runExists (unsafeCoerce <<< f) x

template: build

instance functorGrammar  Functor (Grammar r p t)
instance applyGrammar  Apply (Grammar r p t)
instance applicativeGrammar  Applicative (Grammar r p t)
instance altGrammar  Alt (Grammar r p t)
instance plusGrammar  Plus (Grammar r p t)
instance alternativeGrammar  Alternative (Grammar r p t)

template: build

lit   r p t. t  Grammar r p t t
lit = Term <<< Terminal id

rule   r p t a. r a  Grammar r p t a
rule = Term <<< NonTerminal

prim   r p t a. p a  Grammar r p t a
prim = Term <<< Primitive

many   r p t a. Grammar r p t a  Grammar r p t (List a)
many = Repeat <<< mkInj id

As well as many1, sepBy, etc


template: build

interpretGrammar
    r p t f a
  . (Grammar r p t ~> f)
   (r ~> Grammar r p t)
   r a
   f a
interpretGrammar = (<<<)

template: build

interpretGrammar
    r p t f a
  . (Grammar r p t ~> f)
   (r ~> Grammar r p t)
   r a
   f a
interpretGrammar = (<<<)

define
    r p t a b
  . (a ~ b)
   Grammar r p t b
   Grammar r p t a
define _ = unsafeCoerce

infix 0 define as ::=

template: build

type Doc  = List Line
type Line = List Cell
type Cell = List Char

data CSVRule a
  = RCSV  (a ~ Doc)
  | RLine (a ~ Line)
  | RCell (a ~ Cell)

data CSVPrim a
  = PNotQuote (a ~ Char)

template: build

root   p t. Grammar CSVRule p t Doc
root = rule (RCSV id)

line   p t. Grammar CSVRule p t Line
line = rule (RLine id)

cell   p t. Grammar CSVRule p t Cell
cell = rule (RCell id)

notQuote   r t. Grammar r CSVPrim t Char
notQuote = prim <<< PNotQuote id

template: build

csv  CSVRule ~> Grammar CSVRule CSVPrim Char

template: build

csv  CSVRule ~> Grammar CSVRule CSVPrim Char
csv = case _ of
  RCSV p 
    p ::= many line

template: build

csv  CSVRule ~> Grammar CSVRule CSVPrim Char
csv = case _ of
  RCSV p 
    p ::= many line

  RLine p 
    p ::= commaSeparated cell <* newline

template: build

csv  CSVRule ~> Grammar CSVRule CSVPrim Char
csv = case _ of
  RCSV p 
    p ::= many line

  RLine p 
    p ::= commaSeparated cell <* newline

  RCell p 
    p ::= lit '"' *> many notQuote <* lit '"'

template: build

csv  CSVRule ~> Grammar CSVRule CSVPrim Char
csv = case _ of
  RCSV p 
    p ::= many line

  RLine p 
    p ::= commaSeparated cell <* newline

  RCell p 
    p ::= lit '"' *> many notQuote <* lit '"'

  where
    commaSeparated = sepBy (lit ',')
    newline = lit '\n'

class: middle center

Examples


Related

--

Earley (Hackage)

--

  • Applicative interface

--

  • Embedded in a Grammar Monad

--

  • MonadFix used for sharing

Use Cases

--

  • Efficient parsers

--

  • Better error handling

--

  • Abstract over parsing strategy?

class: middle center

fin

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment