package hack_parallel
Parallel and shared memory library
Install
Dune Dependency
Authors
Maintainers
Sources
1.0.1.tar.gz
md5=ba7c72bc207e326b72e294fc76f6ad2c
sha512=5020d47f97bea2f88e2a40411894d03232a7f2282606926c93c7d4c96d72e94a966be852897a9b16f7e0893ba376512045abb9d93020a7c03c3def4f3d918f8e
doc/src/hack_parallel.utils/measure.ml.html
Source file measure.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
(** * Copyright (c) 2015, Facebook, Inc. * All rights reserved. * * This source code is licensed under the BSD-style license found in the * LICENSE file in the "hack" directory of this source tree. An additional grant * of patent rights can be found in the PATENTS file in the same directory. * *) (** * The Measure module is primarily useful for debugging. It's particularly * useful for gathering stats about something that happens a lot. Let's say you * have some code like this * * let number_bunnies = count_bunnies () in * * If you want to debug how many bunnies are being counted, you could do * something like * * let number_bunnies = count_bunnies () in * Utils.prerr_endlinef "Num bunnies: %d" number_bunnies; * * but what if this code is called 1000 times? Then you end up with log spew. * Using the Measure module helps with this. You can now do * * let number_bunnies = count_bunnies () in * Measure.sample "num_bunnies" number_bunnies; * * and then later you do * * Measure.print_stats (); * * which will print the number of samples, the total, the average, the * variance, the max and the min. * * Measure can keep track of the distribution of measurements if you give it a * bucket size. Before we collect our measurements, call * * Measure.track_distribution "num_bunnies" ~bucket_size:10 = * ...do logging * Measure.print_distribution (); * * And this will print how many samples fall in the 0-9 bucket, how many fall * into the 10-19 bucket, etc * * A common use case is timing, and there's an easy helper method. Let's say we * wanted to see how long our code takes * * let number_bunnies = Measure.time "count_bunnies_time" (fun () -> * count_bunnies () * ) in * * now when we call print_stats we'll see how fast count_bunnies is and how * much total time we spend counting bunnies. * * Measurements are stored in a stateful way in a record. You can either use a * global record or a local record. * * Using a global record: * Measure.sample "num_bunnies" number_bunnies; * Measure.print_stats (); * * You can push and pop the global record. This is useful if you want to reset * some counters without throwing away that data * * Measure.push_global (); * ...measure stuff * let record = Measure.pop_global () in * Measure.print_stats ~record (); * * Using a local record: * let record = Measure.create () in * Measure.sample ~record "num_bunnies" number_bunnies; * Measure.print_stats ~record (); * * A record does not store the individual measurements, just the aggregate * stats, which are updated online. Records can be serialized in order to be * sent across pipes. *) module List = Hack_core.List module FloatMap = MyMap.Make(struct type t = float let compare = compare end) type distribution = { bucket_size: float; buckets: int FloatMap.t; } type record_entry = { count: int; mean: float; variance_sum: float; max: float; min: float; distribution: distribution option; } type record_data = record_entry SMap.t type record = record_data ref (* Creates a new empty record *) let create () = ref SMap.empty let global: (record list) ref = ref [create ()] let push_global _record = global := (create ()) :: (!global) let pop_global () = match !global with | ret::globals -> global := globals; ret | _ -> failwith "Measure.pop_global called with empty stack" let serialize record = !record let deserialize data = ref data let new_entry = { count = 0; mean = 0.0; variance_sum = 0.0; max = min_float; min = max_float; distribution = None; } let new_distribution ~bucket_size = Some { bucket_size; buckets = FloatMap.empty; } let get_record = function | Some record -> record | None -> (match List.hd (!global) with | Some record -> record | None -> failwith ("No global record available! " ^ "Did you forget to call Measure.push_global?")) (* Measure can track how the values are distributed by creating buckets and * keeping track of how many samples fall into each buckets. It will not track * distribution by default, so call this function to turn it on *) let track_distribution ?record name ~bucket_size = let record = get_record record in let entry = match SMap.get name (!record) with | None -> new_entry | Some entry -> entry in let entry = { entry with distribution = new_distribution ~bucket_size; } in record := SMap.add name entry (!record) let round_down ~bucket_size value = bucket_size *. (floor (value /. bucket_size)) let update_distribution value = function | None -> None | Some { bucket_size; buckets } -> let bucket = round_down ~bucket_size value in let bucket_count = match FloatMap.get bucket buckets with | None -> 1 | Some count -> count + 1 in let buckets = FloatMap.add bucket bucket_count buckets in Some { bucket_size; buckets; } let sample ?record name value = let record = get_record record in let { count; mean = old_mean; variance_sum; max; min; distribution; } = match SMap.get name (!record) with | None -> new_entry | Some entry -> entry in let count = count + 1 in let mean = old_mean +. ((value -. old_mean) /. (float count)) in (* Knuth's online variance approximation algorithm *) let variance_sum = variance_sum +. (value -. old_mean) *. (value -. mean) in let max = Pervasives.max max value in let min = Pervasives.min min value in let distribution = update_distribution value distribution in let entry = { count; mean; variance_sum; max; min; distribution; } in record := SMap.add name entry (!record) let merge_entries name from into = match (from, into) with | None, into -> into | from, None -> from | Some from, Some into -> let count = from.count + into.count in (* Using this algorithm to combine the variance sums * https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm *) (* d = meanB - meanA *) let delta = from.mean -. into.mean in (* mean = meanA + delta * (countB/count) *) let mean = into.mean +. (delta *. (float from.count) /. (float count)) in (* VarSum = VarSumA + VarSumB + delta * delta * countA * countB / count *) let variance_sum = from.variance_sum +. into.variance_sum +. delta *. delta *. (float into.count) *. (float from.count) /. (float count) in let max = Pervasives.max from.max into.max in let min = Pervasives.min from.min into.min in let distribution = match (from.distribution, into.distribution) with | None, into -> into | from, None -> from | Some { bucket_size = from; _; }, Some { bucket_size = into; _; } when from <> into -> Printf.kprintf failwith "Merging buckets for %s failed: bucket sizes %f, %f" name from into | Some { bucket_size; buckets = from; }, Some { buckets = into; _; } -> let buckets = FloatMap.merge (fun _bucket from_count into_count -> match (from_count, into_count) with | None, into -> into | from, None -> from | Some from_count, Some into_count -> Some (from_count + into_count)) from into in Some { bucket_size; buckets; } in Some { count; mean; variance_sum; max; min; distribution; } (* Merges all the samples from "from" into "record". If "record" is omitted * then it uses the global record * The additional unit argument is to help the typechecker know if ?record * is to be expected or not (see warning 16) *) let merge ?record ~from _ = let into = get_record record in into := SMap.merge merge_entries (!from) (!into) let time (type a) ?record name (f: unit -> a) : a = let record = get_record record in let start_time = Unix.gettimeofday () in let ret = f () in let end_time = Unix.gettimeofday () in sample ~record name (end_time -. start_time); ret let get_sum ?record name = let record = get_record record in match SMap.get name !record with | None -> None | Some { count; mean; _; } -> Some (float_of_int count *. mean) let pretty_num f = if f > 1000000000.0 then Printf.sprintf "%.3fG" (f /. 1000000000.0) else if f > 1000000.0 then Printf.sprintf "%.3fM" (f /. 1000000.0) else if f > 1000.0 then Printf.sprintf "%.3fK" (f /. 1000.0) else if f = (floor f) then Printf.sprintf "%d" (int_of_float f) else Printf.sprintf "%f" f let print_entry_stats ?record name = let record = get_record record in Printf.eprintf "%s stats -- " name; match SMap.get name (!record) with | None | Some { count = 0; _; } -> prerr_endline "NO DATA" | Some { count; mean; variance_sum; max; min; distribution=_; } -> let total = (float count) *. mean in let std_dev = sqrt (variance_sum /. (float count)) in Utils.prerr_endlinef "samples: %s, total: %s, avg: %s, stddev: %s, max: %s, min: %s)" (pretty_num (float count)) (pretty_num total) (pretty_num mean) (pretty_num std_dev) (pretty_num max) (pretty_num min) let print_stats ?record () = let record = get_record record in SMap.iter (fun name _ -> print_entry_stats ~record name) (!record) let rec print_buckets ~low ~high ~bucket_size buckets = if low <= high then begin let count = match FloatMap.get low buckets with | None -> 0 | Some count -> count in Printf.eprintf "[%02f: %03d] " low count; let low = low +. bucket_size in print_buckets ~low ~high ~bucket_size buckets end let print_entry_distribution ?record name = let record = get_record record in Printf.eprintf "%s distribution -- " name; match SMap.get name (!record) with | None | Some { count = 0; _; } -> prerr_endline "NO DATA" | Some { distribution = None; _; } -> prerr_endline "NO DATA (did you forget to call track_distribution?)" | Some { max; min; distribution = Some { bucket_size; buckets; }; _; } -> let low = round_down ~bucket_size min in let high = round_down ~bucket_size max in print_buckets ~low ~high ~bucket_size buckets; prerr_newline () let print_distributions ?record () = let record = get_record record in SMap.iter (fun name { distribution; _; } -> match distribution with | None -> () | Some _ -> print_entry_distribution ~record name) (!record)
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>