package hardcaml_c

  1. Overview
  2. Docs

Source file simulator.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
open Core
open Hardcaml
module Unix = Core_unix

let signals_per_function = 1000

type t =
  { total_words : int
  ; offsets : [ `Global of int | `Local of int ] Signal.Type.Uid_map.t
  ; circuit : Circuit.t
  ; functions : string list ref
  }

let signal_allocated_width signal =
  match signal with
  | Signal.Type.Multiport_mem { size; _ } ->
    if Signal.width signal <= 8
    then (size + Codegen.word_bytes - 1) / Codegen.word_bytes
    else Codegen.width_to_word_count (Signal.width signal) * size
  | Const _ ->
    (* only optimize out small constants to simplify codegen *)
    if Signal.width signal <= Codegen.word_size
    then 0
    else Codegen.width_to_word_count (Signal.width signal)
  | Wire { driver; _ } ->
    (* empty wires are inputs, other wires can be always eliminated *)
    if Signal.is_empty !driver
    then Codegen.width_to_word_count (Signal.width signal)
    else 0
  | Reg _ ->
    (* registers need to keep a copy of the input signal from the previous cycle *)
    2 * Codegen.width_to_word_count (Signal.width signal)
  | _ -> Codegen.width_to_word_count (Signal.width signal)
;;

module C_scheduling_deps = Signal.Type.Make_deps (struct
  let fold (t : Signal.t) ~init ~f =
    match t with
    | Mem_read_port { memory; read_address; _ } ->
      let init = f init read_address in
      f init memory
    | Reg _ -> init
    | Multiport_mem _ -> init
    | Empty | Const _ | Op2 _ | Mux _ | Not _ | Cat _ | Wire _ | Select _ | Inst _ ->
      Signal.Type.Deps.fold t ~init ~f
  ;;
end)

let schedule_signals circuit =
  (* Topologically sort signals. This is similar to Signal_graph.topological_sort, but
     ensures that portions of the graph that are trees are scheduled in postorder DFS
     fashion (which probably improves cache behaviour). *)
  let queue = Queue.of_list (Circuit.outputs circuit) in
  let visited = Hash_set.create (module Signal.Uid) in
  let result = Queue.create () in
  let rec visit signal =
    if not (Hash_set.mem visited (Signal.uid signal))
    then (
      Hash_set.add visited (Signal.uid signal);
      Signal.Type.Deps.iter signal ~f:(Queue.enqueue queue);
      (C_scheduling_deps.iter signal) ~f:visit;
      Queue.enqueue result signal)
  in
  while not (Queue.is_empty queue) do
    visit (Queue.dequeue_exn queue)
  done;
  Queue.to_list result
;;

let rec unwrap_wire (s : Signal.t) =
  match s with
  | Wire { driver; _ } -> if Signal.is_empty !driver then s else unwrap_wire !driver
  | _ -> s
;;

let allocate_offsets interesting_signals circuit =
  let ordering = schedule_signals circuit in
  let section_numbers =
    List.mapi ordering ~f:(fun i signal ->
      let section =
        if Signal.Type.is_reg signal || Signal.Type.is_mem signal
        then -1 (* sequential elements are in separate functions *)
        else i / signals_per_function
      in
      Signal.uid signal, section)
    |> Map.of_alist_exn (module Signal.Uid)
  in
  let users =
    List.concat_map ordering ~f:(fun signal ->
      Signal.Type.Deps.rev_map signal ~f:(fun d -> Signal.uid (unwrap_wire d), signal))
    |> Map.of_alist_multi (module Signal.Uid)
  in
  (* can the signal be allocated as a local function variable? *)
  let is_local signal =
    let my_section = Map.find_exn section_numbers (Signal.uid signal) in
    let all_users_of_this_signal_are_in_same_section =
      Map.find_multi users (Signal.uid signal)
      |> List.for_all ~f:(fun user ->
           Map.find_exn section_numbers (Signal.uid user) = my_section)
    in
    all_users_of_this_signal_are_in_same_section
    && (not (Set.mem interesting_signals (Signal.uid signal)))
    && Signal.width signal <= Codegen.word_size
    && (not (Signal.Type.is_reg signal))
    && not (Signal.Type.is_mem signal)
  in
  let local_counter = ref 0 in
  List.fold
    ordering
    ~init:(0, Map.empty (module Signal.Uid))
    ~f:(fun (offset, acc) signal ->
      let word_count = signal_allocated_width signal in
      let is_local = is_local signal in
      if is_local then Int.incr local_counter;
      ( (if is_local then offset else offset + word_count)
      , Map.add_exn
          acc
          ~key:(Signal.uid signal)
          ~data:(if is_local then `Local !local_counter else `Global offset) ))
;;

let create ?(interesting_signals = []) circuit =
  let interesting_signals =
    Circuit.inputs circuit @ Circuit.outputs circuit @ interesting_signals
  in
  let interesting_signals =
    List.map ~f:(fun s -> Signal.uid (unwrap_wire s)) interesting_signals
    |> Set.of_list (module Signal.Uid)
  in
  let total_words, offsets = allocate_offsets interesting_signals circuit in
  { total_words = Int.max total_words 1; offsets; functions = ref []; circuit }
;;

let rec to_signal_info t signal =
  let index = Map.find_exn t.offsets (Signal.uid signal) in
  let normal = Codegen.Normal { Codegen.width = Signal.width signal; index } in
  match signal with
  | Signal.Type.Const { constant; _ } ->
    if Bits.width constant <= 64 then Codegen.Const constant else normal
  | Wire { driver; _ } ->
    if Signal.is_empty !driver
    then normal
    else (
      match to_signal_info t !driver with
      | Codegen.Normal i -> Virtual i
      | info -> info)
  | _ -> normal
;;

let cached_to_signal_info t =
  (* speedup signal info lookup for input/output ports *)
  let cache = Hashtbl.create (module Signal.Uid) in
  fun signal ->
    Hashtbl.find_or_add cache (Signal.uid signal) ~default:(fun () ->
      to_signal_info t signal)
;;

let make_comb_code t =
  schedule_signals t.circuit
  |> List.map ~f:(fun signal ->
       Codegen.compile_comb_signal ~to_signal_info:(to_signal_info t) signal)
;;

let last_layer_of_nodes circuit =
  (*
     This is a slightly simpler version of Circuit_graph.last_layer_of_nodes.
     As in Hardcaml_c registers also generate some code in combinatorial
     section, they need to be included in last layer of nodes.
  *)
  let in_last_layer = Hash_set.create (module Signal.Uid) in
  let rec visit_signal signal =
    if not (Hash_set.mem in_last_layer (Signal.uid signal))
    then (
      Hash_set.add in_last_layer (Signal.uid signal);
      if not
           (Signal.is_empty signal
            || Signal.Type.is_mem signal
            || Signal.Type.is_reg signal)
      then C_scheduling_deps.iter signal ~f:visit_signal)
  in
  List.iter (Circuit.outputs circuit) ~f:visit_signal;
  Hash_set.to_list in_last_layer |> Set.of_list (module Signal.Uid)
;;

let make_comb_last_layer_code t =
  let last_layer = last_layer_of_nodes t.circuit in
  schedule_signals t.circuit
  |> List.filter_map ~f:(fun signal ->
       if Set.mem last_layer (Signal.uid signal)
       then Some (Codegen.compile_comb_signal ~to_signal_info:(to_signal_info t) signal)
       else None)
;;

let make_reset_code t =
  schedule_signals t.circuit
  |> List.map ~f:(fun signal ->
       Codegen.compile_reset_signal ~to_signal_info:(to_signal_info t) signal)
  |> List.filter ~f:(fun l -> not (String.equal l ""))
;;

let make_seq_code t =
  schedule_signals t.circuit
  |> List.map ~f:(fun signal ->
       Codegen.compile_seq_signal ~to_signal_info:(to_signal_info t) signal)
  |> List.filter ~f:(fun l -> not (String.equal l ""))
;;

module Instance = struct
  type t =
    { to_signal_info : Signal.t -> Codegen.signal_info
    ; memory : char Ctypes.CArray.t
    ; memory_bigstring : Bigstring.t
    ; eval_library : Dl.library ref
    ; functions : (unit -> unit) array
    }

  external caml_bigstring_get64u : Bigstring.t -> int -> int64 = "%caml_bigstring_get64u"

  external caml_bigstring_set64u
    :  Bigstring.t
    -> int
    -> int64
    -> unit
    = "%caml_bigstring_set64u"

  external caml_bytes_get64u : Bytes.t -> int -> int64 = "%caml_bytes_get64u"
  external caml_bytes_set64u : Bytes.t -> int -> int64 -> unit = "%caml_bytes_set64u"

  let read t signal =
    let signal_info = t.to_signal_info signal in
    match signal_info with
    | Const c -> c
    | _ ->
      let pos = Codegen.word_offset signal_info in
      let size_words = Codegen.word_count signal_info in
      let dst = Bits.zero (Signal.width signal) in
      let dst_underlying_repr = Bits.Expert.unsafe_underlying_repr dst in
      let offset_for_data = Bits.Expert.offset_for_data in
      for i = 0 to size_words - 1 do
        caml_bytes_set64u
          dst_underlying_repr
          (offset_for_data + (i * 8))
          (caml_bigstring_get64u t.memory_bigstring ((pos + i) * 8))
      done;
      dst
  ;;

  let read_mutable t signal dst =
    let signal_info = t.to_signal_info signal in
    match signal_info with
    | Const c -> Bits.Mutable.copy_bits ~src:c ~dst
    | _ ->
      let pos = Codegen.word_offset signal_info in
      let size_words = Codegen.word_count signal_info in
      let dst_underlying_repr = (dst :> Bytes.t) in
      let offset_for_data = Bits.Expert.offset_for_data in
      for i = 0 to size_words - 1 do
        caml_bytes_set64u
          dst_underlying_repr
          (offset_for_data + (i * 8))
          (caml_bigstring_get64u t.memory_bigstring ((pos + i) * 8))
      done
  ;;

  let write t signal bits =
    let signal_info = t.to_signal_info signal in
    (* Hardcaml_c assumes unused bits are set to zero, while in Hardcaml they can have
       arbitrary value. Mask them out. *)
    let bits_underlying_repr = Bits.Expert.unsafe_underlying_repr bits in
    let offset_for_data = Bits.Expert.offset_for_data in
    let pos = Codegen.word_offset signal_info in
    let size_words = Codegen.word_count signal_info in
    let width = Codegen.width signal_info in
    for i = 0 to size_words - 1 do
      let width_left = width - (i * Codegen.word_size) in
      let value = caml_bytes_get64u bits_underlying_repr (offset_for_data + (i * 8)) in
      let value =
        if width_left < Codegen.word_size
        then Int64.(value land ((Int64.one lsl width_left) - Int64.one))
        else value
      in
      caml_bigstring_set64u t.memory_bigstring ((pos + i) * 8) value
    done
  ;;

  let run_function t id = t.functions.(id) ()
end

let format_single_function name lines =
  sprintf
    {|
static void %s(uint64_t* memory) {
  %s
}
    |}
    name
    (String.concat ~sep:"\n  " lines)
;;

let format_function name lines =
  let blocks = List.chunks_of ~length:signals_per_function lines in
  let bodies =
    List.mapi blocks ~f:(fun i block ->
      format_single_function (sprintf "%s_%d" name i) block)
    |> String.concat ~sep:"\n"
  in
  let footer =
    List.mapi blocks ~f:(fun i _ -> sprintf "%s_%d(memory);" name i)
    |> String.concat ~sep:"\n  "
  in
  sprintf {|
%s
void %s(uint64_t* memory) {
  %s
} |} bodies name footer
;;

let make_c_source t =
  let functions = !(t.functions) |> String.concat ~sep:"\n" in
  Simulate_c_header.header ^ functions
;;

let add_function t body =
  let function_id = List.length !(t.functions) in
  t.functions := format_function (sprintf "f%d" function_id) body :: !(t.functions);
  fun instance -> Instance.run_function instance function_id
;;

let start ?(compiler_command = "gcc -O0") t =
  let dir = Filename_unix.temp_dir "hardcaml-c" "" in
  (let c_file = Out_channel.create (dir ^ "/eval.c") in
   let source = make_c_source t in
   Core.fprintf c_file "%s" source;
   Out_channel.close c_file);
  (match
     Unix.system
       (sprintf
          !"%s -fno-strict-aliasing -g %s/eval.c -shared -o %s/eval.so -fPIC"
          compiler_command
          dir
          dir)
   with
   | Ok () -> ()
   | Error e -> raise_s [%message "could not compile" (e : Unix.Exit_or_signal.error)]);
  let eval_library =
    ref (Dl.dlopen ~filename:(sprintf "%s/eval.so" dir) ~flags:[ Dl.RTLD_NOW ])
  in
  Gc.Expert.add_finalizer_exn eval_library (fun lib -> Dl.dlclose ~handle:!lib);
  Core_unix.unlink (dir ^ "/eval.c");
  Core_unix.unlink (dir ^ "/eval.so");
  Core_unix.rmdir dir;
  let memory = Ctypes.CArray.make Ctypes.char (t.total_words * Codegen.word_bytes) in
  let open Ctypes in
  let functions =
    List.mapi !(t.functions) ~f:(fun id _ ->
      let f =
        Ctypes_foreign_flat.Foreign.foreign
          ~from:!eval_library
          (sprintf "f%d" id)
          (Ctypes.ptr Ctypes.char @-> returning void)
      in
      fun () -> f (Ctypes.CArray.start memory))
    |> Array.of_list
  in
  let memory_bigstring = Ctypes.bigarray_of_array Ctypes.array1 Bigarray.Char memory in
  let instance =
    { Instance.to_signal_info = cached_to_signal_info t
    ; memory
    ; memory_bigstring
    ; eval_library
    ; functions
    }
  in
  instance
;;
OCaml

Innovation. Community. Security.