package bap-mc

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file mc_main.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
let mc_man = {|
# DESCRIPTION

Disassembles a (human readable) string of bytes. This command is the
BAP machine code playground, which is intended to mimic a subset of
llvm-mc functionality, using the BAP disassembly backend. The main
use case is to explore various information associated with the
dissasembled instruction.

# EXAMPLES

The following input formats are supported:

```
    0x31 0xd2 0x48 0xf7 0xf3
    \\\\x31\\\\xd2\\\\x48\\\\xf7\\\\xf3
    31 d2 48 f7 f3
    31d248f7f3";
```

|}

let objdump_man = {|
# DESCRIPTION

Disassembles and prints a binary using the linear sweep algorithm.
This command is a sibling to the $(b,mc) command, except
that it takes a binary file as an input. If the binary contains only
raw code (i.e., no meta information), then use the $(b,raw) loader.

# EXAMPLES

```
  bap objdump /bin/ls --show-asm
  bap objdump ./code --loader=raw
```
|}

open Core_kernel
open Format
open Regular.Std
open Bap.Std
open Bap_plugins.Std
open Bap_core_theory
open Bap_main

open Result.Monad_infix

type error =
  | Bad_user_input
  | Bad_insn of mem * int * int
  | Create_mem of Error.t
  | No_input
  | Unknown_arch
  | Invalid_base of string
  | Trailing_data of int
  | Inconsistency of KB.conflict
  | Unknown_format of string * string * string list
  | No_formats_expected of string
  | Disassembler_failed of Error.t
  | Loader_failed of Error.t

type Extension.Error.t += Fail of error

type output = [
  | `insn
  | `bil
  | `bir
  | `sema
  | `kinds
  | `size
  | `invalid
] [@@deriving compare]

let outputs = [
  `insn;
  `bil;
  `bir;
  `sema;
  `kinds;
  `size;
  `invalid;
]

let fail err = Error (Fail err)

let enabled = "enabled"

module Spec = struct
  open Extension

  let input = Command.arguments Type.string
  let file = Command.argument Type.file

  let arch_type = Type.define
      ~parse:(fun s -> match Arch.of_string s with
          | None -> invalid_arg "unknown architecture"
          | Some arch -> arch)
      ~print:Arch.to_string
      `x86_64

  let arch =
    let doc = "Target architecture" in
    Command.parameter ~doc arch_type "arch"

  let outputs =
    let name_of_output = function
      | `insn -> "insn"
      | `bil  -> "bil"
      | `bir  -> "bir"
      | `sema -> "sema"
      | `kinds -> "kinds"
      | `size -> "size"
      | `invalid -> "invalid" in

    let as_flag = function
      | `insn | `bil | `bir -> ["pretty"]
      | `sema -> ["all-slots"]
      | `kinds | `size | `invalid -> [enabled] in

    let doc = function
      | `insn -> "Print the decoded instruction."
      | `bil -> "Print the BIL code."
      | `bir -> "Print the IR."
      | `sema -> "Print the knowledge base."
      | `kinds -> "Print semantics tags associated with instruction."
      | `size -> "Print the instruction size."
      | `invalid -> "Print invalid instructions." in

    let name s = "show-" ^ name_of_output s in
    Extension.Command.dictionary ~doc ~as_flag outputs
      Type.(list string) name

  let base =
    let doc = "Specify an address of first byte" in
    Command.parameter ~aliases:["b"] ~doc Type.string "address"

  let only_one =
    let doc = "Stop after the first instruction is decoded" in
    Command.flag ~doc "only-one"

  let stop_on_error = Command.flag "stop-on-errors"
      ~doc:"Stop disassembling on the first error and report it"

  let backend =
    let doc = "Specify the disassembler backend" in
    Command.parameter ~doc Type.(some string) "backend"

  let loader =
    Extension.Command.parameter
      ~doc:"Use the specified loader .
          Use the loader `raw' to load unstructured files"
      Extension.Type.(string =? "llvm") "loader"

end

module Dis = Disasm_expert.Basic

let bad_insn addr state start =
  let stop = Addr.(Dis.addr state - addr |> to_int |> ok_exn) in
  fail (Bad_insn (Dis.memory state, start, stop))

let escape_0x =
  String.substr_replace_all ~pattern:"0x" ~with_:"\\x"

let prepend_slash_x x = "\\x" ^ x

(** [to_binary ?escape s] make a binary string from ascii
    representation, (e.g., "\x01\x02..."). Apply optional
    escape function for each byte *)
let to_binary ?(map=ident) s =
  let seps = [' '; ','; ';'] in
  let separated = List.exists seps ~f:(String.mem s) in
  let bytes = if separated
    then String.split_on_chars ~on:seps s
    else List.init (String.length s / 2) ~f:(fun n ->
        String.slice s (n*2) (n*2+2)) in
  try List.map bytes ~f:map |>
      String.concat |>
      Scanf.unescaped |>
      Result.return
  with _ -> fail Bad_user_input

let read_input input =
  let input = match input with
    | [] | ["-"] -> In_channel.input_line In_channel.stdin
    | s -> Some (String.concat s) in
  match input with
  | None -> fail No_input
  | Some input -> match String.prefix input 2 with
    | "" | "\n" -> fail No_input
    | "\\x" -> to_binary input
    | "0x" ->  to_binary ~map:escape_0x input
    | _ -> to_binary ~map:prepend_slash_x input

let create_memory arch data base =
  let endian = Arch.endian arch in
  Memory.create endian base @@
  Bigstring.of_string data |> function
  | Ok r -> Ok r
  | Error e -> fail (Create_mem e)

let print_kinds formats insn =
  List.iter formats ~f:(fun _ ->
      Dis.Insn.kinds insn |>
      List.map ~f:sexp_of_kind |>
      List.iter ~f:(printf "%a@." Sexp.pp))

let new_insn arch mem insn =
  let open KB.Syntax in
  KB.Object.create Theory.Program.cls >>= fun code ->
  KB.provide Arch.slot code arch >>= fun () ->
  KB.provide Memory.slot code (Some mem) >>= fun () ->
  KB.provide Dis.Insn.slot code (Some insn) >>| fun () ->
  code

let lift arch mem insn =
  match KB.run Theory.Program.cls (new_insn arch mem insn) KB.empty with
  | Ok (code,_) -> Ok (KB.Value.get Theory.Program.Semantics.slot code)
  | Error conflict -> fail (Inconsistency conflict)

let print_insn_size formats mem =
  List.iter formats ~f:(fun _fmt ->
      printf "%#x@\n" (Memory.length mem))

let print_insn insn_formats insn =
  List.iter insn_formats ~f:(fun fmt ->
      Insn.with_printer fmt (fun () ->
          printf "%a@." Insn.pp insn))

let print_bil formats insn =
  let bil = Insn.bil insn in
  List.iter formats ~f:(fun fmt ->
      printf "%s@." (Bytes.to_string @@ Bil.to_bytes ~fmt bil))

let print_bir formats insn  =
  let bs = Blk.from_insn insn in
  List.iter formats ~f:(fun fmt ->
      printf "%s" @@ String.concat ~sep:"\n"
        (List.map bs ~f:(fun b -> Bytes.to_string @@ Blk.to_bytes ~fmt b)))

let print_sema formats sema = match formats with
  | [] -> ()
  | ["all-slots"] -> printf "%a@\n" KB.Value.pp sema
  | some_slots ->
    let pp = KB.Value.pp_slots some_slots in
    printf "%a@\n" pp sema

let equal_output = [%compare.equal: output]

let is_enabled = function
  | [opt] -> String.equal enabled opt
  | _ -> false

let formats outputs kind =
  match List.Assoc.find outputs kind ~equal:equal_output with
  | None -> []
  | Some fmts -> fmts

let print arch mem code formats =
  lift arch mem code >>| fun insn ->
  print_insn_size (formats `size) mem;
  print_insn (formats `insn) insn;
  print_bil (formats `bil) insn;
  print_bir (formats `bir) insn;
  print_sema (formats `sema) insn;
  print_kinds (formats `kinds) code

let parse_base arch base =
  Result.map_error ~f:(function
      | Invalid_argument str -> Fail (Invalid_base str)
      | exn -> Fail (Invalid_base (Exn.to_string exn))) @@
  Result.try_with @@ fun () ->
  Word.create (Bitvec.of_string base)
    (Size.in_bits (Arch.addr_size arch))


let create_disassembler ?(backend="llvm") arch =
  Dis.create ~backend (Arch.to_string arch) |>
  Result.map_error ~f:(fun err -> Fail (Disassembler_failed err))


let module_of_kind = function
  | `insn -> "Bap.Std.Insn"
  | `bil -> "Bap.Std.Bil"
  | `bir -> "Bap.Std.Blk"


let validate_module kind formats =
  let name = module_of_kind kind in
  Data.all_writers () |>
  List.find_map ~f:(fun (modname,fmts) ->
      Option.some_if (String.equal modname name) fmts) |> function
  | None ->
    failwithf "Unable to find printers for module %s" name ()
  | Some fmts ->
    let fmts = List.map fmts ~f:(fun (n,_,_) -> n) in
    let provided = Set.of_list (module String) fmts in
    Result.all_unit @@
    List.map formats ~f:(fun fmt ->
        if Set.mem provided fmt then Ok ()
        else Error (Unknown_format (name,fmt,fmts)))

let validate_formats formats =
  Result.map_error ~f:(fun err -> Fail err) @@
  Result.all_unit @@
  List.map formats ~f:(function
      | (`insn|`bil|`bir) as kind,fmts ->
        validate_module kind fmts
      | (`kinds|`size|`invalid),[] -> Ok ()
      | (`kinds|`size|`invalid),[opt]
        when String.equal enabled opt -> Ok ()
      | `kinds,_ -> Error (No_formats_expected "kinds")
      | `size,_ -> Error (No_formats_expected "size")
      | `invalid,_ -> Error (No_formats_expected "invalid")
      | `sema,_ ->
        (* no validation right now, since the knowledge introspection
           is not yet implemented *)
        Ok ())


let print_invalid _pos =

  Format.printf "<invalid>@\n"

let run ?(only_one=false) ?(stop_on_error=false) dis arch mem formats =
  let show_invalid = is_enabled (formats `invalid) in
  Dis.run dis mem
    ~init:0
    ~return:Result.return
    ~stop_on:[`Valid]
    ~invalid:(fun state _ pos ->
        if show_invalid then print_invalid pos;
        if stop_on_error
        then bad_insn (Memory.min_addr mem) state pos
        else Dis.step state pos)
    ~hit:(fun state mem insn bytes ->
        print arch mem insn formats >>= fun () ->
        if only_one then Dis.stop state bytes
        else Dis.step state (bytes + Memory.length mem))

let () = Extension.Command.(begin
    declare ~doc:mc_man "mc"
      Spec.(args $arch $base $backend $only_one $stop_on_error $input $outputs)
  end) @@ fun arch base backend only_one stop_on_error input outputs _ctxt ->
  validate_formats outputs >>= fun () ->
  parse_base arch base >>= fun base ->
  read_input input >>= fun data ->
  create_memory arch data base >>= fun mem ->
  create_disassembler ?backend arch >>= fun dis ->
  let formats = formats outputs in
  run ~only_one ~stop_on_error dis arch mem formats >>= fun bytes ->
  Dis.close dis;
  match String.length data - bytes with
  | 0 -> Ok ()
  | _ when only_one -> Ok ()
  | n -> fail (Trailing_data n)

let () = Extension.Command.(begin
    declare ~doc:objdump_man "objdump"
      Spec.(args $backend $loader $stop_on_error $file $outputs)
  end) @@ fun backend loader stop_on_error input outputs _ctxt ->
  validate_formats outputs >>= fun () ->
  let formats = formats outputs in
  match Image.create ~backend:loader input with
  | Error err -> Error (Fail (Loader_failed err))
  | Ok (img,_warns) ->
    let arch = Image.arch img in
    create_disassembler ?backend arch >>= fun dis ->
    Image.memory img |>
    Memmap.to_sequence |>
    Seq.filter_map ~f:(fun (mem,data) ->
        Option.some_if
          (Value.is Image.code_region data) mem) |>
    Seq.map ~f:(fun mem ->
        run ~stop_on_error dis arch mem formats >>= fun _bytes ->
        Ok ()) |>
    Seq.to_list |>
    Result.all_unit


let format_info get_fmts =
  get_fmts () |> List.map ~f:fst3 |> String.concat ~sep:", "


let string_of_failure = function
  | Inconsistency conflict ->
    Format.asprintf "Lifters failed with a conflict: %a"
      KB.Conflict.pp conflict
  | Bad_user_input -> "Could not parse: malformed input"
  | No_input -> "No input was received"
  | Unknown_arch ->
    sprintf "Unknown architecture. Supported architectures:\n%s" @@
    String.concat ~sep:"\n" @@ List.map Arch.all ~f:Arch.to_string
  | Trailing_data 1 -> "the last byte wasn't disassembled"
  | Trailing_data left ->
    sprintf "%d bytes were left non disassembled" left
  | Create_mem err ->
    Format.asprintf "Unable to create a memory: %a" Error.pp err
  | Invalid_base msg ->
    sprintf "Failed to parse the base address: %s" msg
  | Disassembler_failed err ->
    Format.asprintf "Failed to create the disassembler backend: %a"
      Error. pp err
  | Bad_insn (mem,boff,stop)->
    let dump = Memory.hexdump mem |> Bytes.of_string in
    let line = boff / 16 in
    let pos off = line * 77 + (off mod 16) * 3 + 9 in
    Bytes.set dump (pos boff) '(';
    Bytes.set dump (pos stop) ')';
    sprintf "Invalid instruction at offset %d:\n%s"
      boff (Bytes.to_string dump)
  | Unknown_format (mname,fmt,fmts) ->
    let pp_sep = Format.pp_print_newline in
    Format.asprintf "@[<v2>Unknown printer %s for %s, expecting: %a@]"
      fmt mname Format.(pp_print_list ~pp_sep pp_print_string) fmts
  | No_formats_expected name ->
    sprintf "--show-%s doesn't expect any formats yet" name
  | Loader_failed err ->
    Format.asprintf "Failed to unpack the file: %a" Error.pp err


let () = Extension.Error.register_printer @@ function
  | Fail err -> Some (string_of_failure err)
  | _ -> None
OCaml

Innovation. Community. Security.