Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
bentov.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
type bin = { center : float; count : int } type histogram = { max_bins : int; num_bins : int; bins : bin list; range : (float * float) option; (* (min * max) *) total_count : int; } let max_bins h = h.max_bins let num_bins h = h.num_bins let bins h = h.bins let range h = h.range let total_count h = h.total_count (* not tail rec! *) let rec insert value = function | [] -> [{ center = value ; count = 1 }], true | h :: t -> if value < h.center then { center = value ; count = 1 } :: h :: t, true else if value = h.center then { h with count = h.count + 1 } :: t, false else let t, num_bins_is_incr = insert value t in h :: t, num_bins_is_incr let rec min_diff_index i index min_diff = function | a :: b :: t -> let diff = b.center -. a.center in assert ( diff > 0. ); if diff < min_diff then min_diff_index (i+1) i diff (b :: t) else (* no change *) min_diff_index (i+1) index min_diff (b :: t) | [ _ ] -> index | [] -> assert false let min_diff_index = function | a :: b :: t -> let diff = b.center -. a.center in assert ( diff > 0. ); min_diff_index 1 0 diff (b :: t) | [ _ ] | [] -> assert false let merge_bins lo hi = assert (lo.center < hi.center); let sum_count = lo.count + hi.count in let center = (* average of centers, weighted by their height *) (lo.center *. (float lo.count) +. hi.center *. (float hi.count)) /. (float sum_count) in { center; count = sum_count } (* not tail rec! *) let merge_bins_at_index = let rec loop i index = function | a :: b :: t -> if i = index then let bin = merge_bins a b in bin :: t else a :: (loop (i + 1) index (b :: t)) | [ _ ] | [] -> assert false in fun index bins -> loop 0 index bins let create max_bins = if max_bins < 2 then raise (Invalid_argument (Printf.sprintf "max_bins: %d" max_bins)) else { max_bins; num_bins = 0; bins = []; total_count = 0; range = None } let add value histogram = let range = match histogram.range with | Some (mn, mx) -> Some (min value mn, max value mx) | None -> Some (value, value) in let total_count = histogram.total_count + 1 in let bins, is_augmented = insert value histogram.bins in if histogram.num_bins = histogram.max_bins then if is_augmented then (* merge bins, so as to keep their number at [max_bins] *) let index = min_diff_index bins in let bins = merge_bins_at_index index bins in { histogram with bins; range; total_count } else { histogram with bins; range; total_count } else if is_augmented then { histogram with bins; range; total_count; num_bins = histogram.num_bins + 1; } else { histogram with bins; range; total_count } (* merge two sorted bin lists; not tail rec! *) let rec binary_merge a b = match a, b with | a_h :: a_t, b_h :: b_t -> if a_h.center < b_h.center then a_h :: (binary_merge a_t b) else if a_h.center > b_h.center then b_h :: (binary_merge a b_t) else (* a_h.center = b_h.center: merge the two cells into one *) let merged = { a_h with count = a_h.count + b_h.count } in merged :: (binary_merge a_t b_t) | [], _ :: _ -> b | _ :: _, [] -> a | [], [] -> [] let rec k_ary_merge_half accu = function | a :: b :: t -> let ab = binary_merge a b in k_ary_merge_half (ab :: accu) t | [a] -> (a :: accu) | [] -> accu let rec k_ary_merge t = match k_ary_merge_half [] t with | [a] -> a | t -> k_ary_merge t let rec reduce bins ~num_bins ~max_bins = if num_bins > max_bins then let index = min_diff_index bins in let bins = merge_bins_at_index index bins in reduce bins ~num_bins:(num_bins - 1) ~max_bins else bins let merge h_list max_bins = let bins, _, total_count, range = List.fold_left (fun (t_bins, t_num_bins, t_total_count, t_range) { bins; num_bins; total_count; range; _} -> let t_range = match t_range, range with | Some (t_mn, t_mx), Some (mn, mx) -> Some ((min t_mn mn), (max t_mx mx)) | None, Some _ -> range | Some _, None -> t_range | None, None -> None in bins :: t_bins, t_num_bins + num_bins, t_total_count + total_count, t_range ) ([], 0, 0, None) h_list in (* even if [num_bins <= output_max_bins], we have to apply [k_ary_merge] to combine indentical bin centers *) let merged_bins = k_ary_merge bins in let num_bins = List.length merged_bins in let bins = reduce merged_bins ~num_bins ~max_bins in let num_bins = List.length bins in { bins; num_bins; max_bins; total_count; range } (* add a value with a count; equivalent to calling [add value hist] [count] times *) let addc value count hist = let singleton = { bins = [{ center = value ; count }]; total_count = count; range = Some (value, value); num_bins = 1; max_bins = 1; (* benign *) } in merge [hist; singleton] hist.max_bins let pos_quadratic_root ~a ~b ~c = if a = 0.0 then -.c /. b else let discriminant = b *. b -. 4. *. a *. c in ((sqrt discriminant) -. b) /. (2. *. a) exception Empty let sum = let rec find_i b i sum = function | ({ center = p_i; count = m_i } as bin_i) :: ({ center = p_i1; _ } as bin_i1) :: t -> if p_i <= b && b < p_i1 then bin_i, bin_i1, sum else find_i b (i+1) (sum +. (float m_i)) (bin_i1 :: t) | _ -> raise Not_found in fun histogram b -> let {center = p_i; count = m_i}, {center = p_i1; count = m_i1 }, sum_i0 = find_i b 0 0.0 histogram.bins in let m_i = float m_i in let m_i1 = float m_i1 in let bpp = (b -. p_i) /. (p_i1 -. p_i) in let m_b = m_i +. (m_i1 -. m_i) *. bpp in let s = (m_i +. m_b) *. bpp /. 2. in s +. sum_i0 +. m_i /. 2. let uniform = let rec loop span cum_sum_at_centers j accu = let s = (float j) *. span in match cum_sum_at_centers with | (cum_sum_0, {center = p_0; count = m_0}) :: ((cum_sum_1, {center = p_1; count = m_1}) as bin_1) :: rest -> if s < cum_sum_0 then loop span cum_sum_at_centers (j + 1) accu else if cum_sum_0 <= s && s < cum_sum_1 then let d = s -. cum_sum_0 in let c = -2. *. d in let b = float (2 * m_0) in let a = float (m_1 - m_0) in let z = pos_quadratic_root ~a ~b ~c in let u = p_0 +. (p_1 -. p_0) *. z in loop span cum_sum_at_centers (j + 1) ((j, u) :: accu) else loop span (bin_1 :: rest) j accu | [ _ ] -> List.rev accu | [] -> assert false in let cum_sum_at_centers hist = let bin, hist_rest, cum_sum = match hist with | ({count; _} as bin) :: rest -> bin, rest, (float count) /. 2. | _ -> raise Empty in let _, _, cum_sum_at_centers = List.fold_left ( fun (cum_sum, prev_count, cum_sum_at_centers) ({count; _} as bin) -> let cum_sum = cum_sum +. (float (prev_count + count)) /. 2. in let cum_sum_at_centers = (cum_sum, bin) :: cum_sum_at_centers in cum_sum, count, cum_sum_at_centers ) (cum_sum, bin.count, [cum_sum, bin]) hist_rest in List.rev cum_sum_at_centers in fun hist b -> if b < 1 then raise (Invalid_argument "uniform") else let cum_sum_at_centers = cum_sum_at_centers hist.bins in let span = (float hist.total_count) /. (float b) in loop span cum_sum_at_centers 0 [] let mean { bins; total_count; _ } = if total_count = 0 then raise Empty else let m = List.fold_left ( fun sum { center; count } -> sum +. center *. (float count) ) 0.0 bins in m /. (float total_count) let mean_stdev histogram = if histogram.total_count = 0 then raise Empty else let mean = mean histogram in let v = List.fold_left ( fun sum { center; count } -> let diff = center -. mean in sum +. diff *. diff *. (float count) ) 0.0 histogram.bins in let stdev = sqrt (v /. (float histogram.total_count)) in mean, stdev