package hack_parallel

  1. Overview
  2. Docs
Parallel and shared memory library

Install

Dune Dependency

Authors

Maintainers

Sources

1.0.1.tar.gz
md5=ba7c72bc207e326b72e294fc76f6ad2c
sha512=5020d47f97bea2f88e2a40411894d03232a7f2282606926c93c7d4c96d72e94a966be852897a9b16f7e0893ba376512045abb9d93020a7c03c3def4f3d918f8e

doc/src/hack_parallel.utils/measure.ml.html

Source file measure.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
(**
 * Copyright (c) 2015, Facebook, Inc.
 * All rights reserved.
 *
 * This source code is licensed under the BSD-style license found in the
 * LICENSE file in the "hack" directory of this source tree. An additional grant
 * of patent rights can be found in the PATENTS file in the same directory.
 *
*)

(**
 * The Measure module is primarily useful for debugging. It's particularly
 * useful for gathering stats about something that happens a lot. Let's say you
 * have some code like this
 *
 *   let number_bunnies = count_bunnies () in
 *
 * If you want to debug how many bunnies are being counted, you could do
 * something like
 *
 *   let number_bunnies = count_bunnies () in
 *   Utils.prerr_endlinef "Num bunnies: %d" number_bunnies;
 *
 * but what if this code is called 1000 times? Then you end up with log spew.
 * Using the Measure module helps with this. You can now do
 *
 *   let number_bunnies = count_bunnies () in
 *   Measure.sample "num_bunnies" number_bunnies;
 *
 * and then later you do
 *
 *   Measure.print_stats ();
 *
 * which will print the number of samples, the total, the average, the
 * variance, the max and the min.
 *
 * Measure can keep track of the distribution of measurements if you give it a
 * bucket size. Before we collect our measurements, call
 *
 *   Measure.track_distribution "num_bunnies" ~bucket_size:10 =
 *   ...do logging
 *   Measure.print_distribution ();
 *
 * And this will print how many samples fall in the 0-9 bucket, how many fall
 * into the 10-19 bucket, etc
 *
 * A common use case is timing, and there's an easy helper method. Let's say we
 * wanted to see how long our code takes
 *
 *   let number_bunnies = Measure.time "count_bunnies_time" (fun () ->
 *     count_bunnies ()
 *   ) in
 *
 * now when we call print_stats we'll see how fast count_bunnies is and how
 * much total time we spend counting bunnies.
 *
 * Measurements are stored in a stateful way in a record. You can either use a
 * global record or a local record.
 *
 * Using a global record:
 *   Measure.sample "num_bunnies" number_bunnies;
 *   Measure.print_stats ();
 *
 * You can push and pop the global record. This is useful if you want to reset
 * some counters without throwing away that data
 *
 *   Measure.push_global ();
 *   ...measure stuff
 *   let record = Measure.pop_global () in
 *   Measure.print_stats ~record ();
 *
 * Using a local record:
 *   let record = Measure.create () in
 *   Measure.sample ~record "num_bunnies" number_bunnies;
 *   Measure.print_stats ~record ();
 *
 * A record does not store the individual measurements, just the aggregate
 * stats, which are updated online. Records can be serialized in order to be
 * sent across pipes.
*)

module List = Hack_core.List
module FloatMap = MyMap.Make(struct type t = float let compare = compare end)

type distribution = {
  bucket_size: float;
  buckets: int FloatMap.t;
}

type record_entry = {
  count: int;
  mean: float;
  variance_sum: float;
  max: float;
  min: float;
  distribution: distribution option;
}
type record_data = record_entry SMap.t
type record = record_data ref

(* Creates a new empty record *)
let create () = ref SMap.empty

let global: (record list) ref = ref [create ()]
let push_global _record =
  global := (create ()) :: (!global)
let pop_global () =
  match !global with
  | ret::globals ->
      global := globals;
      ret
  | _ -> failwith "Measure.pop_global called with empty stack"


let serialize record = !record
let deserialize data = ref data

let new_entry = {
  count = 0;
  mean = 0.0;
  variance_sum = 0.0;
  max = min_float;
  min = max_float;
  distribution = None;
}

let new_distribution ~bucket_size = Some {
    bucket_size;
    buckets = FloatMap.empty;
  }

let get_record = function
  | Some record -> record
  | None -> (match List.hd (!global) with
      | Some record -> record
      | None ->
          failwith ("No global record available! " ^
                    "Did you forget to call Measure.push_global?"))


(* Measure can track how the values are distributed by creating buckets and
 * keeping track of how many samples fall into each buckets. It will not track
 * distribution by default, so call this function to turn it on *)
let track_distribution ?record name ~bucket_size =
  let record = get_record record in
  let entry = match SMap.get name (!record) with
    | None -> new_entry
    | Some entry -> entry in
  let entry = { entry with distribution = new_distribution ~bucket_size; } in
  record := SMap.add name entry (!record)

let round_down ~bucket_size value =
  bucket_size *. (floor (value /. bucket_size))

let update_distribution value = function
  | None -> None
  | Some { bucket_size; buckets } ->
      let bucket = round_down ~bucket_size value in
      let bucket_count = match FloatMap.get bucket buckets with
        | None -> 1
        | Some count -> count + 1 in
      let buckets = FloatMap.add bucket bucket_count buckets in
      Some { bucket_size; buckets; }

let sample ?record name value =
  let record = get_record record in
  let {
    count;
    mean = old_mean;
    variance_sum;
    max;
    min;
    distribution;
  } = match SMap.get name (!record) with
    | None -> new_entry
    | Some entry -> entry in

  let count = count + 1 in
  let mean = old_mean +. ((value -. old_mean) /. (float count)) in
  (* Knuth's online variance approximation algorithm *)
  let variance_sum = variance_sum +. (value -. old_mean) *. (value -. mean) in

  let max = Pervasives.max max value in
  let min = Pervasives.min min value in

  let distribution = update_distribution value distribution in

  let entry = { count; mean; variance_sum; max; min; distribution; } in
  record := SMap.add name entry (!record)

let merge_entries name from into = match (from, into) with
  | None, into -> into
  | from, None -> from
  | Some from, Some into ->
      let count = from.count + into.count in

      (* Using this algorithm to combine the variance sums
       * https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
      *)
      (* d = meanB - meanA *)
      let delta = from.mean -. into.mean in
      (* mean = meanA + delta * (countB/count) *)
      let mean = into.mean +. (delta *. (float from.count) /. (float count)) in
      (* VarSum = VarSumA + VarSumB + delta * delta * countA * countB / count *)
      let variance_sum = from.variance_sum +. into.variance_sum +.
                         delta *. delta *. (float into.count) *. (float from.count) /. (float count) in

      let max = Pervasives.max from.max into.max in
      let min = Pervasives.min from.min into.min in

      let distribution = match (from.distribution, into.distribution) with
        | None, into -> into
        | from, None -> from
        | Some { bucket_size = from; _; }, Some { bucket_size = into; _; }
          when from <> into -> Printf.kprintf failwith
                                 "Merging buckets for %s failed: bucket sizes %f, %f"
                                 name from into
        | Some { bucket_size; buckets = from; }, Some { buckets = into; _; } ->
            let buckets = FloatMap.merge (fun _bucket from_count into_count ->
                match (from_count, into_count) with
                | None, into -> into
                | from, None -> from
                | Some from_count, Some into_count -> Some (from_count + into_count))
                from into in
            Some { bucket_size; buckets; } in
      Some { count; mean; variance_sum; max; min; distribution; }

(* Merges all the samples from "from" into "record". If "record" is omitted
 * then it uses the global record
 * The additional unit argument is to help the typechecker know if ?record
 * is to be expected or not (see warning 16)
 *)
let merge ?record ~from _ =
  let into = get_record record in
  into := SMap.merge merge_entries (!from) (!into)

let time (type a) ?record name (f: unit -> a) : a =
  let record = get_record record in
  let start_time = Unix.gettimeofday () in
  let ret = f () in
  let end_time = Unix.gettimeofday () in
  sample ~record name (end_time -. start_time);
  ret

let get_sum ?record name =
  let record = get_record record in
  match SMap.get name !record with
  | None -> None
  | Some { count; mean; _; } -> Some (float_of_int count *. mean)

let pretty_num f =
  if f > 1000000000.0
  then Printf.sprintf "%.3fG" (f /. 1000000000.0)
  else if f > 1000000.0
  then Printf.sprintf "%.3fM" (f /. 1000000.0)
  else if f > 1000.0
  then Printf.sprintf "%.3fK" (f /. 1000.0)
  else if f = (floor f)
  then Printf.sprintf "%d" (int_of_float f)
  else Printf.sprintf "%f" f

let print_entry_stats ?record name =
  let record = get_record record in
  Printf.eprintf "%s stats -- " name;
  match SMap.get name (!record) with
  | None
  | Some { count = 0; _; } -> prerr_endline "NO DATA"
  | Some { count; mean; variance_sum; max; min; distribution=_; } ->
      let total = (float count) *. mean in
      let std_dev = sqrt (variance_sum /. (float count)) in
      Utils.prerr_endlinef
        "samples: %s, total: %s, avg: %s, stddev: %s, max: %s, min: %s)"
        (pretty_num (float count)) (pretty_num total) (pretty_num mean)
        (pretty_num std_dev) (pretty_num max) (pretty_num min)

let print_stats ?record () =
  let record = get_record record in
  SMap.iter (fun name _ -> print_entry_stats ~record name) (!record)

let rec print_buckets ~low ~high ~bucket_size buckets =
  if low <= high
  then begin
    let count = match FloatMap.get low buckets with
      | None -> 0
      | Some count -> count in
    Printf.eprintf "[%02f: %03d]  " low count;
    let low = low +. bucket_size in
    print_buckets ~low ~high ~bucket_size buckets
  end

let print_entry_distribution ?record name =
  let record = get_record record in
  Printf.eprintf "%s distribution -- " name;
  match SMap.get name (!record) with
  | None
  | Some { count = 0; _; } -> prerr_endline "NO DATA"
  | Some { distribution = None; _; } ->
      prerr_endline "NO DATA (did you forget to call track_distribution?)"
  | Some { max; min; distribution = Some { bucket_size; buckets; }; _; } ->
      let low = round_down ~bucket_size min in
      let high = round_down ~bucket_size max in
      print_buckets ~low ~high ~bucket_size buckets;
      prerr_newline ()

let print_distributions ?record () =
  let record = get_record record in
  SMap.iter (fun name { distribution; _; } -> match distribution with
      | None -> ()
      | Some _ -> print_entry_distribution ~record name) (!record)
OCaml

Innovation. Community. Security.