package bentov

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file bentov.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
type bin = {
  center : float;
  count : int
}

type histogram = {
  max_bins : int;
  num_bins : int;
  bins : bin list;
  range : (float * float) option; (* (min * max) *)
  total_count : int;
}

let max_bins h =
  h.max_bins

let num_bins h =
  h.num_bins

let bins h =
  h.bins

let range h =
  h.range

let total_count h =
  h.total_count

(* not tail rec! *)
let rec insert value = function
  | [] -> [{ center = value ; count = 1 }], true
  | h :: t ->
    if value < h.center then
      { center = value ; count = 1 } :: h :: t, true
    else if value = h.center then
      { h with count = h.count + 1 } :: t, false
    else
      let t, num_bins_is_incr = insert value t in
      h :: t, num_bins_is_incr

let rec min_diff_index i index min_diff = function
  | a :: b :: t ->
    let diff = b.center -. a.center in
    assert ( diff > 0. );
    if diff < min_diff then
      min_diff_index (i+1) i diff (b :: t)
    else
      (* no change *)
      min_diff_index (i+1) index min_diff (b :: t)

  | [ _ ] -> index
  | [] -> assert false

let min_diff_index = function
  | a :: b :: t ->
    let diff = b.center -. a.center in
    assert ( diff > 0. );
    min_diff_index 1 0 diff (b :: t)

  | [ _ ]
  | [] -> assert false

let merge_bins lo hi =
  assert (lo.center < hi.center);
  let sum_count = lo.count + hi.count in
  let center =
    (* average of centers, weighted by their height *)
    (lo.center *. (float lo.count) +. hi.center *. (float hi.count)) /.
    (float sum_count)
  in
  { center; count = sum_count }

(* not tail rec! *)
let merge_bins_at_index =
  let rec loop i index = function
    | a :: b :: t ->
      if i = index then
        let bin = merge_bins a b in
        bin :: t
      else
        a :: (loop (i + 1) index (b :: t))

    | [ _ ]
    | [] -> assert false
  in
  fun index bins ->
    loop 0 index bins

let create max_bins =
  if max_bins < 2 then
    raise (Invalid_argument (Printf.sprintf "max_bins: %d" max_bins))
  else {
    max_bins;
    num_bins = 0;
    bins = [];
    total_count = 0;
    range = None
  }

let add value histogram =
  let range =
    match histogram.range with
      | Some (mn, mx) -> Some (min value mn, max value mx)
      | None -> Some (value, value)
  in
  let total_count = histogram.total_count + 1 in
  let bins, is_augmented = insert value histogram.bins in
  if histogram.num_bins = histogram.max_bins then
    if is_augmented then
      (* merge bins, so as to keep their number at [max_bins] *)
      let index = min_diff_index bins in
      let bins = merge_bins_at_index index bins in
      { histogram with bins; range; total_count }
    else
      { histogram with bins; range; total_count }
  else
    if is_augmented then
      { histogram with bins; range; total_count;
                       num_bins = histogram.num_bins + 1; }
    else
      { histogram with bins; range; total_count }

(* merge two sorted bin lists; not tail rec! *)
let rec binary_merge a b =
  match a, b with
    | a_h :: a_t, b_h :: b_t ->
      if a_h.center < b_h.center then
        a_h :: (binary_merge a_t b)
      else if a_h.center > b_h.center then
        b_h :: (binary_merge a b_t)
      else
        (* a_h.center = b_h.center: merge the two cells into one *)
        let merged = { a_h with count = a_h.count + b_h.count } in
        merged :: (binary_merge a_t b_t)

    | [], _ :: _ -> b
    | _ :: _, [] -> a
    | [], [] -> []

let rec k_ary_merge_half accu = function
  | a :: b :: t ->
    let ab = binary_merge a b in
    k_ary_merge_half  (ab :: accu) t

  | [a] -> (a :: accu)
  | [] -> accu

let rec k_ary_merge t =
  match k_ary_merge_half [] t with
    | [a] -> a
    | t -> k_ary_merge t


let rec reduce bins ~num_bins ~max_bins =
  if num_bins > max_bins then
    let index = min_diff_index bins in
    let bins = merge_bins_at_index index bins in
    reduce bins ~num_bins:(num_bins - 1) ~max_bins
  else
    bins



let merge h_list max_bins =
  let bins, _, total_count, range = List.fold_left
      (fun (t_bins, t_num_bins, t_total_count, t_range)
        { bins; num_bins; total_count; range; _} ->
        let t_range =
          match t_range, range with
            | Some (t_mn, t_mx), Some (mn, mx) ->
              Some ((min t_mn mn), (max t_mx mx))
            | None, Some _ -> range
            | Some _, None -> t_range
            | None, None -> None
        in
        bins :: t_bins,
        t_num_bins + num_bins,
        t_total_count + total_count,
        t_range
      ) ([], 0, 0, None) h_list in

  (* even if [num_bins <= output_max_bins], we have to apply
     [k_ary_merge] to combine indentical bin centers *)
  let merged_bins = k_ary_merge bins in
  let num_bins = List.length merged_bins in
  let bins = reduce merged_bins ~num_bins ~max_bins in
  let num_bins = List.length bins in
  { bins;
    num_bins;
    max_bins;
    total_count;
    range }

(* add a value with a count; equivalent to calling [add value hist]
   [count] times *)
let addc value count hist =
  let singleton = {
    bins = [{ center = value ; count }];
    total_count = count;
    range = Some (value, value);
    num_bins = 1;
    max_bins = 1; (* benign *)
  } in
  merge [hist; singleton] hist.max_bins

let pos_quadratic_root ~a ~b ~c =
  if a = 0.0 then
    -.c /. b
  else
    let discriminant = b *. b -. 4. *. a *. c in
    ((sqrt discriminant) -. b) /. (2. *. a)

exception Empty

let sum =
  let rec find_i b i sum = function
    | ({ center = p_i; count = m_i } as bin_i) ::
        ({ center = p_i1; _ } as bin_i1) :: t ->
      if p_i <= b && b < p_i1 then
        bin_i, bin_i1, sum
      else
        find_i b (i+1) (sum +. (float m_i)) (bin_i1 :: t)

    | _ -> raise Not_found
  in

  fun histogram b ->
    let {center = p_i; count = m_i}, {center = p_i1; count = m_i1 }, sum_i0 =
      find_i b 0 0.0 histogram.bins in
    let m_i = float m_i in
    let m_i1 = float m_i1 in
    let bpp = (b -. p_i) /. (p_i1 -. p_i) in
    let m_b = m_i +. (m_i1 -. m_i) *. bpp in
    let s = (m_i +. m_b) *. bpp /. 2. in
    s +. sum_i0 +. m_i /. 2.

let uniform =
  let rec loop span cum_sum_at_centers j accu =
    let s = (float j) *. span in
    match cum_sum_at_centers with
    | (cum_sum_0, {center = p_0; count = m_0}) ::
        ((cum_sum_1, {center = p_1; count = m_1}) as bin_1) :: rest ->
      if s < cum_sum_0 then
        loop span cum_sum_at_centers (j + 1) accu
      else if cum_sum_0 <= s && s < cum_sum_1 then
        let d = s -. cum_sum_0 in
        let c = -2. *. d in
        let b = float (2 * m_0) in
        let a = float (m_1 - m_0) in
        let z = pos_quadratic_root ~a ~b ~c in
        let u = p_0 +. (p_1 -. p_0) *. z in
        loop span cum_sum_at_centers (j + 1) ((j, u) :: accu)
      else
        loop span (bin_1 :: rest) j accu
    | [ _ ] -> List.rev accu
    | [] -> assert false
  in
  let cum_sum_at_centers hist =
    let bin, hist_rest, cum_sum =
      match hist with
      | ({count; _} as bin) :: rest -> bin, rest, (float count) /. 2.
      | _ -> raise Empty
    in
    let _, _, cum_sum_at_centers = List.fold_left (
      fun (cum_sum, prev_count, cum_sum_at_centers) ({count; _} as bin) ->
        let cum_sum = cum_sum +. (float (prev_count + count)) /. 2. in
        let cum_sum_at_centers = (cum_sum, bin) :: cum_sum_at_centers in
        cum_sum, count, cum_sum_at_centers
    ) (cum_sum, bin.count, [cum_sum, bin]) hist_rest in
    List.rev cum_sum_at_centers
  in
  fun hist b ->
    if b < 1 then
      raise (Invalid_argument "uniform")
    else
      let cum_sum_at_centers = cum_sum_at_centers hist.bins in
      let span = (float hist.total_count) /. (float b) in
      loop span cum_sum_at_centers 0 []


let mean { bins; total_count; _ } =
  if total_count = 0 then
    raise Empty
  else
    let m = List.fold_left (
      fun sum { center; count } ->
        sum +. center *. (float count)
    ) 0.0 bins in
    m /. (float total_count)

let mean_stdev histogram =
  if histogram.total_count = 0 then
    raise Empty
  else
    let mean = mean histogram in
    let v = List.fold_left (
        fun sum { center; count } ->
          let diff = center -. mean in
          sum +. diff *. diff *. (float count)
      ) 0.0 histogram.bins
    in
    let stdev = sqrt (v /. (float histogram.total_count)) in
    mean, stdev

OCaml

Innovation. Community. Security.