Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file measure.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309(**
* Copyright (c) 2015, Facebook, Inc.
* All rights reserved.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the "hack" directory of this source tree. An additional grant
* of patent rights can be found in the PATENTS file in the same directory.
*
*)(**
* The Measure module is primarily useful for debugging. It's particularly
* useful for gathering stats about something that happens a lot. Let's say you
* have some code like this
*
* let number_bunnies = count_bunnies () in
*
* If you want to debug how many bunnies are being counted, you could do
* something like
*
* let number_bunnies = count_bunnies () in
* Utils.prerr_endlinef "Num bunnies: %d" number_bunnies;
*
* but what if this code is called 1000 times? Then you end up with log spew.
* Using the Measure module helps with this. You can now do
*
* let number_bunnies = count_bunnies () in
* Measure.sample "num_bunnies" number_bunnies;
*
* and then later you do
*
* Measure.print_stats ();
*
* which will print the number of samples, the total, the average, the
* variance, the max and the min.
*
* Measure can keep track of the distribution of measurements if you give it a
* bucket size. Before we collect our measurements, call
*
* Measure.track_distribution "num_bunnies" ~bucket_size:10 =
* ...do logging
* Measure.print_distribution ();
*
* And this will print how many samples fall in the 0-9 bucket, how many fall
* into the 10-19 bucket, etc
*
* A common use case is timing, and there's an easy helper method. Let's say we
* wanted to see how long our code takes
*
* let number_bunnies = Measure.time "count_bunnies_time" (fun () ->
* count_bunnies ()
* ) in
*
* now when we call print_stats we'll see how fast count_bunnies is and how
* much total time we spend counting bunnies.
*
* Measurements are stored in a stateful way in a record. You can either use a
* global record or a local record.
*
* Using a global record:
* Measure.sample "num_bunnies" number_bunnies;
* Measure.print_stats ();
*
* You can push and pop the global record. This is useful if you want to reset
* some counters without throwing away that data
*
* Measure.push_global ();
* ...measure stuff
* let record = Measure.pop_global () in
* Measure.print_stats ~record ();
*
* Using a local record:
* let record = Measure.create () in
* Measure.sample ~record "num_bunnies" number_bunnies;
* Measure.print_stats ~record ();
*
* A record does not store the individual measurements, just the aggregate
* stats, which are updated online. Records can be serialized in order to be
* sent across pipes.
*)moduleList=Hack_core.ListmoduleFloatMap=MyMap.Make(structtypet=floatletcompare=compareend)typedistribution={bucket_size:float;buckets:intFloatMap.t;}typerecord_entry={count:int;mean:float;variance_sum:float;max:float;min:float;distribution:distributionoption;}typerecord_data=record_entrySMap.ttyperecord=record_dataref(* Creates a new empty record *)letcreate()=refSMap.emptyletglobal:(recordlist)ref=ref[create()]letpush_global_record=global:=(create())::(!global)letpop_global()=match!globalwith|ret::globals->global:=globals;ret|_->failwith"Measure.pop_global called with empty stack"letserializerecord=!recordletdeserializedata=refdataletnew_entry={count=0;mean=0.0;variance_sum=0.0;max=min_float;min=max_float;distribution=None;}letnew_distribution~bucket_size=Some{bucket_size;buckets=FloatMap.empty;}letget_record=function|Somerecord->record|None->(matchList.hd(!global)with|Somerecord->record|None->failwith("No global record available! "^"Did you forget to call Measure.push_global?"))(* Measure can track how the values are distributed by creating buckets and
* keeping track of how many samples fall into each buckets. It will not track
* distribution by default, so call this function to turn it on *)lettrack_distribution?recordname~bucket_size=letrecord=get_recordrecordinletentry=matchSMap.getname(!record)with|None->new_entry|Someentry->entryinletentry={entrywithdistribution=new_distribution~bucket_size;}inrecord:=SMap.addnameentry(!record)letround_down~bucket_sizevalue=bucket_size*.(floor(value/.bucket_size))letupdate_distributionvalue=function|None->None|Some{bucket_size;buckets}->letbucket=round_down~bucket_sizevalueinletbucket_count=matchFloatMap.getbucketbucketswith|None->1|Somecount->count+1inletbuckets=FloatMap.addbucketbucket_countbucketsinSome{bucket_size;buckets;}letsample?recordnamevalue=letrecord=get_recordrecordinlet{count;mean=old_mean;variance_sum;max;min;distribution;}=matchSMap.getname(!record)with|None->new_entry|Someentry->entryinletcount=count+1inletmean=old_mean+.((value-.old_mean)/.(floatcount))in(* Knuth's online variance approximation algorithm *)letvariance_sum=variance_sum+.(value-.old_mean)*.(value-.mean)inletmax=Pervasives.maxmaxvalueinletmin=Pervasives.minminvalueinletdistribution=update_distributionvaluedistributioninletentry={count;mean;variance_sum;max;min;distribution;}inrecord:=SMap.addnameentry(!record)letmerge_entriesnamefrominto=match(from,into)with|None,into->into|from,None->from|Somefrom,Someinto->letcount=from.count+into.countin(* Using this algorithm to combine the variance sums
* https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm
*)(* d = meanB - meanA *)letdelta=from.mean-.into.meanin(* mean = meanA + delta * (countB/count) *)letmean=into.mean+.(delta*.(floatfrom.count)/.(floatcount))in(* VarSum = VarSumA + VarSumB + delta * delta * countA * countB / count *)letvariance_sum=from.variance_sum+.into.variance_sum+.delta*.delta*.(floatinto.count)*.(floatfrom.count)/.(floatcount)inletmax=Pervasives.maxfrom.maxinto.maxinletmin=Pervasives.minfrom.mininto.mininletdistribution=match(from.distribution,into.distribution)with|None,into->into|from,None->from|Some{bucket_size=from;_;},Some{bucket_size=into;_;}whenfrom<>into->Printf.kprintffailwith"Merging buckets for %s failed: bucket sizes %f, %f"namefrominto|Some{bucket_size;buckets=from;},Some{buckets=into;_;}->letbuckets=FloatMap.merge(fun_bucketfrom_countinto_count->match(from_count,into_count)with|None,into->into|from,None->from|Somefrom_count,Someinto_count->Some(from_count+into_count))fromintoinSome{bucket_size;buckets;}inSome{count;mean;variance_sum;max;min;distribution;}(* Merges all the samples from "from" into "record". If "record" is omitted
* then it uses the global record
* The additional unit argument is to help the typechecker know if ?record
* is to be expected or not (see warning 16)
*)letmerge?record~from_=letinto=get_recordrecordininto:=SMap.mergemerge_entries(!from)(!into)lettime(typea)?recordname(f:unit->a):a=letrecord=get_recordrecordinletstart_time=Unix.gettimeofday()inletret=f()inletend_time=Unix.gettimeofday()insample~recordname(end_time-.start_time);retletget_sum?recordname=letrecord=get_recordrecordinmatchSMap.getname!recordwith|None->None|Some{count;mean;_;}->Some(float_of_intcount*.mean)letpretty_numf=iff>1000000000.0thenPrintf.sprintf"%.3fG"(f/.1000000000.0)elseiff>1000000.0thenPrintf.sprintf"%.3fM"(f/.1000000.0)elseiff>1000.0thenPrintf.sprintf"%.3fK"(f/.1000.0)elseiff=(floorf)thenPrintf.sprintf"%d"(int_of_floatf)elsePrintf.sprintf"%f"fletprint_entry_stats?recordname=letrecord=get_recordrecordinPrintf.eprintf"%s stats -- "name;matchSMap.getname(!record)with|None|Some{count=0;_;}->prerr_endline"NO DATA"|Some{count;mean;variance_sum;max;min;distribution=_;}->lettotal=(floatcount)*.meaninletstd_dev=sqrt(variance_sum/.(floatcount))inUtils.prerr_endlinef"samples: %s, total: %s, avg: %s, stddev: %s, max: %s, min: %s)"(pretty_num(floatcount))(pretty_numtotal)(pretty_nummean)(pretty_numstd_dev)(pretty_nummax)(pretty_nummin)letprint_stats?record()=letrecord=get_recordrecordinSMap.iter(funname_->print_entry_stats~recordname)(!record)letrecprint_buckets~low~high~bucket_sizebuckets=iflow<=highthenbeginletcount=matchFloatMap.getlowbucketswith|None->0|Somecount->countinPrintf.eprintf"[%02f: %03d] "lowcount;letlow=low+.bucket_sizeinprint_buckets~low~high~bucket_sizebucketsendletprint_entry_distribution?recordname=letrecord=get_recordrecordinPrintf.eprintf"%s distribution -- "name;matchSMap.getname(!record)with|None|Some{count=0;_;}->prerr_endline"NO DATA"|Some{distribution=None;_;}->prerr_endline"NO DATA (did you forget to call track_distribution?)"|Some{max;min;distribution=Some{bucket_size;buckets;};_;}->letlow=round_down~bucket_sizemininlethigh=round_down~bucket_sizemaxinprint_buckets~low~high~bucket_sizebuckets;prerr_newline()letprint_distributions?record()=letrecord=get_recordrecordinSMap.iter(funname{distribution;_;}->matchdistributionwith|None->()|Some_->print_entry_distribution~recordname)(!record)