Last active
August 12, 2020 15:41
-
-
Save ivg/7c8f12d9e6ed9164c9c4fd7727529105 to your computer and use it in GitHub Desktop.
Lifting a toy bytecode using Core Theory
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
open Core_kernel | |
open Bap_core_theory | |
open Bap.Std | |
open KB.Syntax | |
include Self() | |
let package = "bytoy" | |
type name = string [@@deriving equal,sexp] | |
type oper = Reg of int | Imm of int [@@deriving equal,sexp] | |
type insn = name * int * oper [@@deriving equal,sexp] | |
type code = insn array [@@deriving equal,sexp] | |
type ctxt = {proc : int; pc: int} [@@deriving equal, sexp] | |
type proc = {name : string; code : code} [@@deriving equal, sexp] | |
type prog = proc array [@@deriving equal,sexp] | |
let byte = Theory.Bitv.define 8 | |
let word = Theory.Bitv.define 32 | |
let bool = Theory.Bool.t | |
let heap = Theory.Mem.define word byte (* global byte-addressable memory *) | |
let vars = Theory.Mem.define word word (* local word addressable variables *) | |
module Word = Bitvec.M32 | |
module Byte = Bitvec.M8 | |
let pp_insn ppf x = Sexp.pp_hum ppf (sexp_of_insn x) | |
let r = Array.init 32 ~f:(fun i -> | |
Theory.Var.define word (sprintf "R%d" i)) | |
let mem = Theory.Var.define heap "heap" | |
let local = Theory.Var.define vars "locals" | |
let fp = Theory.Var.define word "FP" | |
let frame_size = 128 | |
module Language(Core : Theory.Core) = struct | |
open Core | |
let reg i = var r.(i) | |
let imm x = int word x | |
let op : oper -> _ Theory.Bitv.t Theory.pure = function | |
| Reg x -> reg x | |
| Imm x -> imm (Word.int x) | |
let loads x = signed word @@ load (var mem) x | |
let loadb x = unsigned word @@ load (var mem) x | |
let loadw x = loadw word b1 (var mem) x | |
let saveb p x = store (var mem) p (low byte x) | |
let savew p x = storew b1 (var mem) p x | |
let get x = load (var local) (add (var fp) x) | |
let put p x = store (var local) (add (var fp) p) x | |
let case_binop name ~matches:ok no = match name with | |
| "add" -> ok add | |
| "sub" -> ok sub | |
| "mul" -> ok mul | |
| "div" -> ok div | |
| _ -> no () | |
let case_loadw name ~matches:ok no = match name with | |
| "lds" -> ok loads | |
| "ldb" -> ok loadb | |
| "ldw" -> ok loadw | |
| _ -> no () | |
let case_savew name ~matches:ok no = match name with | |
| "stb" -> ok saveb | |
| "stw" -> ok savew | |
| _ -> no () | |
let pass = perform Theory.Effect.Sort.bot | |
let skip = perform Theory.Effect.Sort.bot | |
let block seq data ctrl = | |
Theory.Label.for_addr (Word.int seq) >>= fun label -> | |
blk label data ctrl | |
let data seq data = block seq data skip | |
let ctrl seq ctrl = block seq pass ctrl | |
let const x = int word (Word.int x) | |
let new_frame pc = seq | |
(set fp (add (var fp) (const (frame_size+1)))) | |
(set local (put (const ~-1) (const pc))) | |
let del_frame = | |
set fp (sub (var fp) (const (frame_size+1))) | |
let jumps prog {proc; pc} name r x = match x with | |
| Reg _ -> failwith "ill-formed program: non-const jump" | |
| Imm dst -> | |
match name with | |
| "call" -> | |
Theory.Label.for_name prog.(dst).name >>= fun dst -> | |
KB.provide Theory.Label.is_subroutine dst (Some true) >>= | |
fun () -> | |
block pc (new_frame pc) (goto dst) | |
| "ret" -> | |
block pc del_frame (jmp (get (const frame_size))) | |
| "jmp" -> | |
Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst -> | |
ctrl pc @@ goto dst | |
| "beq" -> | |
Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst -> | |
ctrl pc @@ branch (non_zero (reg r)) | |
(goto dst) | |
skip | |
| opcode -> invalid_argf "unknown opcode %S" opcode () | |
let insn : prog -> ctxt -> insn -> _ = fun prog {proc;pc} (name,d,s) -> | |
case_binop name ~matches:(fun f -> | |
data pc @@ set r.(d) (f (reg d) (op s))) @@ fun () -> | |
case_loadw name ~matches:(fun load -> | |
data pc @@ set r.(d) (load (op s))) @@ fun () -> | |
case_savew name ~matches:(fun save -> | |
data pc @@ set mem (save (reg d) (op s))) @@ fun () -> | |
match name with | |
| "get" -> | |
data pc @@ set r.(d) (get (op s)) | |
| "set" -> | |
data pc @@ set local (put (reg d) (op s)) | |
| _ -> jumps prog {proc; pc} name d s | |
end | |
let def_only blk = Term.length jmp_t blk = 0 | |
let is_unconditional jmp = match Jmp.cond jmp with | |
| Bil.Int _ -> true | |
| _ -> false | |
let is_last_jump_unconditional blk = | |
match Term.last jmp_t blk with | |
| None -> false | |
| Some jmp -> is_unconditional jmp | |
let fall_if_possible dst blk = | |
if is_last_jump_unconditional blk | |
then blk | |
else | |
Term.append jmp_t blk @@ | |
Jmp.reify ~dst () | |
let localize_if_call pc blk = match Term.last jmp_t blk with | |
| None -> KB.return blk | |
| Some jmp -> match Jmp.alt jmp with | |
| None -> KB.return blk | |
| Some alt -> | |
Theory.Label.for_addr (Word.int (pc+1)) >>| fun dst -> | |
Term.update jmp_t blk @@ | |
Jmp.reify () | |
~tid:(Term.tid jmp) | |
?cnd:(Jmp.guard jmp) | |
~dst:(Jmp.resolved dst) | |
~alt | |
(* concat two def-only blocks *) | |
let append_def_only b1 b2 = | |
let b = Blk.Builder.init ~same_tid:true ~copy_defs:true b1 in | |
Term.enum def_t b2 |> Seq.iter ~f:(Blk.Builder.add_def b); | |
Term.enum jmp_t b2 |> Seq.iter ~f:(Blk.Builder.add_jmp b); | |
Blk.Builder.result b | |
(* [append xs ys] | |
pre: the first block of [xs] is the exit block; | |
pre: the first block of [ys] is the entry block; | |
pre: the last block of [ys] is the exit block; | |
post: the first block of [append xs ys] is the new exit block. *) | |
let append xs ys = match xs, ys with | |
| [],xs | xs,[] -> xs | |
| x :: xs, y :: ys when def_only x -> | |
List.rev_append ys (append_def_only x y :: xs) | |
| x::xs, y::_ -> | |
let fall = Jmp.resolved @@ Term.tid y in | |
let x = fall_if_possible fall x in | |
List.rev_append ys (x::xs) | |
let add_blks sub blks = List.fold blks ~init:sub ~f:(Term.append blk_t) | |
let clone ~tid blk = | |
let b = Blk.Builder.create ~tid () in | |
Term.enum phi_t blk |> Seq.iter ~f:(Blk.Builder.add_phi b); | |
Term.enum def_t blk |> Seq.iter ~f:(Blk.Builder.add_def b); | |
Term.enum jmp_t blk |> Seq.iter ~f:(Blk.Builder.add_jmp b); | |
Blk.Builder.result b | |
let mark_entry_blk pc blks = | |
Theory.Label.for_addr (Word.int pc) >>| fun tid -> | |
match blks with | |
| [] -> [Blk.create ~tid ()] | |
| blk :: blks -> clone ~tid blk :: blks | |
let update_call pc blks = | |
match blks with | |
| [] -> KB.return [] | |
| last :: blks -> | |
localize_if_call pc last >>| fun last -> | |
last :: blks | |
let build_program lift source = | |
let prog = Program.create () in | |
let ctxt = {proc=0; pc=0} in | |
Seq.of_array source |> | |
KB.Seq.fold ~init:(ctxt,prog) ~f:(fun (ctxt,prog) proc -> | |
Seq.of_array proc.code |> | |
KB.Seq.fold ~init:(ctxt,[]) ~f:(fun (ctxt,blks) insn -> | |
lift source ctxt insn >>= | |
mark_entry_blk ctxt.pc >>= fun blks' -> | |
update_call ctxt.pc (append blks blks') >>| fun blks -> | |
{ctxt with pc = ctxt.pc+1},blks) >>= fun (ctxt,blks) -> | |
Theory.Label.for_name proc.name >>| fun tid -> | |
let sub = Sub.create ~tid ~name:proc.name () in | |
let sub = add_blks sub (List.rev blks) in | |
let ctxt = {ctxt with proc = ctxt.proc + 1 } in | |
(ctxt,Term.append sub_t prog sub)) >>| snd | |
let load filename = | |
Sexp.load_sexps filename |> | |
Array.of_list_map ~f:(function | |
| Sexp.List (Atom "defun" :: Atom name :: _ :: code) -> | |
let code = code_of_sexp (Sexp.List code) in | |
{name; code} | |
| _ -> failwith "Parser error, expects (defun <name> () code)") | |
let lifter = KB.Class.declare "bytoy-lifter" () | |
let program_t = KB.Domain.optional "program_t" ~equal:Program.equal | |
let program = KB.Class.property lifter "program" | |
~public:true | |
~package | |
program_t | |
let bytecode_domain = KB.Domain.optional "bytecode" | |
~equal:[%equal: prog*ctxt*insn] | |
~inspect:(fun (_,_,insn) -> sexp_of_insn insn) | |
let bytecode = KB.Class.property Theory.Program.cls "bytecode" | |
~public:true | |
~package | |
bytecode_domain | |
let provide_semantics () = | |
KB.promise Theory.Program.Semantics.slot @@ fun insn -> | |
Theory.(instance>=>require) () >>= fun (module Core) -> | |
let module Lift = Language(Core) in | |
KB.collect bytecode insn >>= function | |
| None -> KB.return Insn.empty | |
| Some (prog,ctxt,code) -> Lift.insn prog ctxt code | |
let lift prog = | |
let collect_bir prog ctxt code = | |
Theory.Label.for_addr (Word.int ctxt.pc) >>= fun insn -> | |
KB.provide bytecode insn (Some (prog,ctxt,code)) >>= fun () -> | |
KB.collect Theory.Program.Semantics.slot insn >>| fun sema -> | |
KB.Value.get Term.slot sema in | |
build_program collect_bir prog | |
let run_lifter prog = | |
let obj = | |
KB.Object.create lifter >>= fun lifter -> | |
lift prog >>= fun prog -> | |
KB.provide program lifter (Some prog) >>| fun () -> | |
lifter in | |
match KB.run lifter obj (Toplevel.current ()) with | |
| Error conflict -> | |
error "ill-formed program: %a" KB.Conflict.pp conflict; | |
invalid_arg "ill-formed program" | |
| Ok (value,state) -> | |
Toplevel.set state; | |
match KB.Value.get program value with | |
| None -> assert false | |
| Some prog -> prog | |
let () = Project.Input.register_loader "bytoy" @@ fun filename -> | |
let empty = Memmap.empty in | |
let source = load filename in | |
provide_semantics (); | |
let prog = run_lifter source in | |
Project.Input.create `unknown filename ~code:empty ~data:empty | |
~finish:(fun proj -> Project.with_program proj prog) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Building
Put this file into a separate folder and issue the following command:
Using
Using the following example program (put it into the example.bytoy file):
We can get lift and gets its graph with the following commands:
You can then view the generated graph either using
xdot example.dot
or translate it to an image, e.g., withdot example.dot -Tpng -o example.png
, the sample output is provided below: