package picos

  1. Overview
  2. Docs

Source file picos_htbl.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
type 'k hashed_type = (module Stdlib.Hashtbl.HashedType with type t = 'k)

type ('k, 'v, _) tdt =
  | Nil : ('k, 'v, [> `Nil ]) tdt
  | Cons : {
      key : 'k;
      value : 'v;
      rest : ('k, 'v, [ `Nil | `Cons ]) tdt;
    }
      -> ('k, 'v, [> `Cons ]) tdt
  | Resize : {
      spine : ('k, 'v, [ `Nil | `Cons ]) tdt;
    }
      -> ('k, 'v, [> `Resize ]) tdt
      (** During resizing and snapshotting target buckets will be initialized
          with a physically unique [Resize] value and the source buckets will
          then be gradually updated to [Resize] values and the target buckets
          updated with data from the source buckets. *)

type ('k, 'v) bucket =
  | B : ('k, 'v, [< `Nil | `Cons | `Resize ]) tdt -> ('k, 'v) bucket
[@@unboxed]

type ('k, 'v) pending =
  | Nothing
  | Resize of { buckets : ('k, 'v) bucket Atomic.t array }

type ('k, 'v) state = {
  hash : 'k -> int;
  buckets : ('k, 'v) bucket Atomic.t array;
  equal : 'k -> 'k -> bool;
  non_linearizable_size : int Atomic.t array;
  pending : ('k, 'v) pending;
}

type ('k, 'v) t = ('k, 'v) state Atomic.t

let min_buckets = 16

let max_buckets =
  let n = Sys.max_array_length lsr 1 in
  let n = n lor (n lsr 1) in
  let n = n lor (n lsr 2) in
  let n = n lor (n lsr 4) in
  let n = n lor (n lsr 8) in
  let n = n lor (n lsr 16) in
  let n = if Sys.int_size <= 32 then n else n lor (n lsr 32) in
  let n = n + 1 in
  Int.min n (1 lsl 30 (* Limit of [hash] *))

let create (type k) ?hashed_type () =
  let equal, hash =
    match hashed_type with
    | None -> (( = ), Stdlib.Hashtbl.hash)
    | Some ((module Hashed_type) : k hashed_type) ->
        (Hashed_type.equal, Hashed_type.hash)
  in
  let buckets = Array.init min_buckets @@ fun _ -> Atomic.make (B Nil) in
  let non_linearizable_size =
    [| Atomic.make 0 |> Multicore_magic.copy_as_padded |]
  in
  let pending = Nothing in
  { hash; buckets; equal; non_linearizable_size; pending }
  |> Multicore_magic.copy_as_padded |> Atomic.make
  |> Multicore_magic.copy_as_padded

(* *)

let[@tail_mod_cons] rec filter t msk chk = function
  | Nil -> Nil
  | Cons r ->
      if t r.key land msk = chk then
        Cons { r with rest = filter t msk chk r.rest }
      else filter t msk chk r.rest

let split_hi r target i t spine =
  let high = Array.length r.buckets in
  let b = Array.unsafe_get target (i + high) in
  match Atomic.get b with
  | B (Resize _ as before) ->
      (* The [before] value is physically different for each resize and so
         checking that the resize has not finished is sufficient to ensure that
         the [compare_and_set] below does not disrupt the next resize. *)
      if Atomic.get t == r then
        let ((Nil | Cons _) as after) = filter r.hash high high spine in
        Atomic.compare_and_set b (B before) (B after) |> ignore
  | B (Nil | Cons _) -> ()

let split_lo r target i t spine =
  let b = Array.unsafe_get target i in
  match Atomic.get b with
  | B (Resize _ as before) ->
      (* The [before] value is physically different for each resize and so
         checking that the resize has not finished is sufficient to ensure that
         the [compare_and_set] below does not disrupt the next resize. *)
      if Atomic.get t == r then begin
        let ((Nil | Cons _) as after) =
          filter r.hash (Array.length r.buckets) 0 spine
        in
        Atomic.compare_and_set b (B before) (B after) |> ignore;
        split_hi r target i t spine
      end
  | B (Nil | Cons _) -> split_hi r target i t spine

let rec split_at r target i t backoff =
  let b = Array.unsafe_get r.buckets i in
  match Atomic.get b with
  | B ((Nil | Cons _) as spine) ->
      if Atomic.compare_and_set b (B spine) (B (Resize { spine })) then
        split_lo r target i t spine
      else split_at r target i t (Backoff.once backoff)
  | B (Resize spine_r) -> split_lo r target i t spine_r.spine

let rec split_all r target i t step =
  Atomic.get t == r
  &&
  let i = (i + step) land (Array.length r.buckets - 1) in
  split_at r target i t Backoff.default;
  i = 0 || split_all r target i t step

(* *)

let[@tail_mod_cons] rec merge rest = function
  | Nil -> rest
  | Cons r -> Cons { r with rest = merge rest r.rest }

let merge_at r target i t spine_lo spine_hi =
  let b = Array.unsafe_get target i in
  match Atomic.get b with
  | B (Resize _ as before) ->
      (* The [before] value is physically different for each resize and so
         checking that the resize has not finished is sufficient to ensure that
         the [compare_and_set] below does not disrupt the next resize. *)
      if Atomic.get t == r then
        let ((Nil | Cons _) as after) = merge spine_lo spine_hi in
        Atomic.compare_and_set b (B before) (B after) |> ignore
  | B (Nil | Cons _) -> ()

let rec merge_hi r target i t spine_lo backoff =
  let b = Array.unsafe_get r.buckets (i + Array.length target) in
  match Atomic.get b with
  | B ((Nil | Cons _) as spine) ->
      if Atomic.compare_and_set b (B spine) (B (Resize { spine })) then
        merge_at r target i t spine_lo spine
      else merge_hi r target i t spine_lo (Backoff.once backoff)
  | B (Resize spine_r) -> merge_at r target i t spine_lo spine_r.spine

let rec merge_lo r target i t backoff =
  let b = Array.unsafe_get r.buckets i in
  match Atomic.get b with
  | B ((Nil | Cons _) as spine) ->
      if Atomic.compare_and_set b (B spine) (B (Resize { spine })) then
        merge_hi r target i t spine Backoff.default
      else merge_lo r target i t (Backoff.once backoff)
  | B (Resize spine_r) -> merge_hi r target i t spine_r.spine Backoff.default

let rec merge_all r target i t step =
  Atomic.get t == r
  &&
  let i = (i + step) land (Array.length target - 1) in
  merge_lo r target i t Backoff.default;
  i = 0 || merge_all r target i t step

(* *)

let copy_to r target i t
    ((Nil | Cons _) as spine : (_, _, [ `Nil | `Cons ]) tdt) =
  let b = Array.unsafe_get target i in
  match Atomic.get b with
  | B (Resize _ as before) ->
      (* The [before] value is physically different for each resize and so
         checking that the resize has not finished is sufficient to ensure that
         the [compare_and_set] below does not disrupt the next resize. *)
      if Atomic.get t == r then
        Atomic.compare_and_set b (B before) (B spine) |> ignore
  | B (Nil | Cons _) -> ()

let rec copy_at r target i t backoff =
  let b = Array.unsafe_get r.buckets i in
  match Atomic.get b with
  | B ((Nil | Cons _) as spine) ->
      if Atomic.compare_and_set b (B spine) (B (Resize { spine })) then
        copy_to r target i t spine
      else copy_at r target i t (Backoff.once backoff)
  | B (Resize spine_r) -> copy_to r target i t spine_r.spine

let rec copy_all r target i t step =
  Atomic.get t == r
  &&
  let i = (i + step) land (Array.length target - 1) in
  copy_at r target i t Backoff.default;
  i = 0 || copy_all r target i t step

(* *)

let[@inline never] rec finish t r =
  match r.pending with
  | Nothing -> r
  | Resize { buckets } ->
      let high_source = Array.length r.buckets in
      let high_target = Array.length buckets in
      (* We step by random amount to better allow cores to work in parallel.
         The number of buckets is always a power of two, so any odd number is
         relatively prime or coprime. *)
      let step = Random.bits () lor 1 in
      if
        if high_source < high_target then begin
          (* We are growing the table. *)
          split_all r buckets 0 t step
        end
        else if high_target < high_source then begin
          (* We are shrinking the table. *)
          merge_all r buckets 0 t step
        end
        else begin
          (* We are snaphotting the table. *)
          copy_all r buckets 0 t step
        end
      then
        let new_r =
          { r with buckets; pending = Nothing }
          |> Multicore_magic.copy_as_padded
        in
        if Atomic.compare_and_set t r new_r then new_r
        else finish t (Atomic.get t)
      else finish t (Atomic.get t)

(* *)

let rec estimated_size cs n sum =
  let n = n - 1 in
  if 0 <= n then estimated_size cs n (sum + Atomic.get (Array.unsafe_get cs n))
  else sum

(** This only gives an "estimate" of the size, which can be off by one or more
    and even be negative, so this must be used with care. *)
let estimated_size r =
  let cs = r.non_linearizable_size in
  let n = Array.length cs - 1 in
  estimated_size cs n (Atomic.get (Array.unsafe_get cs n))

let[@inline never] try_resize t r new_capacity =
  (* We must make sure that on every resize we use a physically different
     [Resize _] value to indicate unprocessed target buckets.  The use of
     [Sys.opaque_identity] below ensures that a new value is allocated. *)
  let resize_avoid_aba = B (Resize { spine = Sys.opaque_identity Nil }) in
  let buckets =
    Array.init new_capacity @@ fun _ -> Atomic.make resize_avoid_aba
  in
  let new_r = { r with pending = Resize { buckets } } in
  Atomic.compare_and_set t r new_r
  && begin
       finish t new_r |> ignore;
       true
     end

let rec adjust_estimated_size t r mask delta =
  let i = Multicore_magic.instantaneous_domain_index () in
  let n = Array.length r.non_linearizable_size in
  if i < n then begin
    Atomic.fetch_and_add (Array.unsafe_get r.non_linearizable_size i) delta
    |> ignore;
    (* Reading the size is potentially expensive, so we only check it
       occasionally.  The bigger the table the less frequently we should need to
       resize. *)
    if Random.bits () land mask = 0 && Atomic.get t == r then begin
      let estimated_size = estimated_size r in
      let capacity = Array.length r.buckets in
      if capacity < estimated_size && capacity < max_buckets then
        try_resize t r (capacity + capacity) |> ignore
      else if
        min_buckets < capacity
        && estimated_size + estimated_size + estimated_size < capacity
      then try_resize t r (capacity lsr 1) |> ignore
    end;
    true
  end
  else
    let new_cs =
      (* We use [n + n + 1] as it keeps the length of the array as a power of 2
         minus 1 and so the size of the array/block including header word will
         be a power of 2. *)
      Array.init (n + n + 1) @@ fun i ->
      if i < n then Array.unsafe_get r.non_linearizable_size i
      else Atomic.make 0 |> Multicore_magic.copy_as_padded
    in
    let new_r =
      { r with non_linearizable_size = new_cs }
      |> Multicore_magic.copy_as_padded
    in
    let r = if Atomic.compare_and_set t r new_r then new_r else Atomic.get t in
    adjust_estimated_size t r mask delta

(* *)

(** [get] only returns with a state where [pending = Nothing]. *)
let[@inline] get t =
  let r = Atomic.get t in
  if r.pending == Nothing then r else finish t r

(* *)

let rec assoc_node t key = function
  | Nil -> (Nil : (_, _, [< `Nil | `Cons ]) tdt)
  | Cons r as cons -> if t r.key key then cons else assoc_node t key r.rest

let find_node t key =
  (* Reads can proceed in parallel with writes. *)
  let r = Atomic.get t in
  let h = r.hash key in
  let mask = Array.length r.buckets - 1 in
  let i = h land mask in
  match Atomic.get (Array.unsafe_get r.buckets i) with
  | B Nil -> Nil
  | B (Cons cons_r as cons) ->
      if r.equal cons_r.key key then cons
      else assoc_node r.equal key cons_r.rest
  | B (Resize resize_r) ->
      (* A resize is in progress.  The spine of the resize still holds what was
         in the bucket before resize reached that bucket. *)
      assoc_node r.equal key resize_r.spine

(* *)

let find_exn t key =
  match find_node t key with
  | Nil -> raise_notrace Not_found
  | Cons r -> r.value

let mem t key = find_node t key != Nil

(* *)

let rec try_add t key value backoff =
  let r = get t in
  let h = r.hash key in
  let mask = Array.length r.buckets - 1 in
  let i = h land mask in
  let b = Array.unsafe_get r.buckets i in
  match Atomic.get b with
  | B Nil ->
      let after = Cons { key; value; rest = Nil } in
      if Atomic.compare_and_set b (B Nil) (B after) then
        adjust_estimated_size t r mask 1
      else try_add t key value (Backoff.once backoff)
  | B (Cons _ as before) ->
      if assoc_node r.equal key before != Nil then false
      else
        let after = Cons { key; value; rest = before } in
        if Atomic.compare_and_set b (B before) (B after) then
          adjust_estimated_size t r mask 1
        else try_add t key value (Backoff.once backoff)
  | B (Resize _) -> try_add t key value Backoff.default

let try_add t key value = try_add t key value Backoff.default

(* *)

let[@tail_mod_cons] rec dissoc t key = function
  | Nil -> raise_notrace Not_found
  | Cons r ->
      if t key r.key then r.rest else Cons { r with rest = dissoc t key r.rest }

let rec try_remove t key backoff =
  let r = get t in
  let h = r.hash key in
  let mask = Array.length r.buckets - 1 in
  let i = h land mask in
  let b = Array.unsafe_get r.buckets i in
  match Atomic.get b with
  | B Nil -> false
  | B (Cons cons_r as before) -> begin
      if r.equal cons_r.key key then
        if Atomic.compare_and_set b (B before) (B cons_r.rest) then
          adjust_estimated_size t r mask (-1)
        else try_remove t key (Backoff.once backoff)
      else
        match dissoc r.equal key cons_r.rest with
        | (Nil | Cons _) as rest ->
            if
              Atomic.compare_and_set b (B before)
                (B (Cons { cons_r with rest }))
            then adjust_estimated_size t r mask (-1)
            else try_remove t key (Backoff.once backoff)
        | exception Not_found -> false
    end
  | B (Resize _) -> try_remove t key Backoff.default

let try_remove t key = try_remove t key Backoff.default

(* *)

let rec to_seq t backoff =
  let r = get t in
  if try_resize t r (Array.length r.buckets) then begin
    (* At this point the resize has been completed and a new array is used for
       buckets and [r.buckets] now has an immutable copy of what was in the hash
       table. *)
    let snapshot = r.buckets in
    let rec loop i kvs () =
      match kvs with
      | Nil ->
          if i = Array.length snapshot then Seq.Nil
          else
            loop (i + 1)
              (match Atomic.get (Array.unsafe_get snapshot i) with
              | B (Resize spine_r) -> spine_r.spine
              | B (Nil | Cons _) ->
                  (* After resize only [Resize] values should be left in the old
                     buckets. *)
                  assert false)
              ()
      | Cons r -> Seq.Cons ((r.key, r.value), loop i r.rest)
    in
    loop 0 Nil
  end
  else to_seq t (Backoff.once backoff)

let to_seq t = to_seq t Backoff.default
OCaml

Innovation. Community. Security.