package SZXX

  1. Overview
  2. Docs

Source file parsing.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
open! Core
open Angstrom

type storage = {
  add: Bigstring.t -> int -> unit;
  finalize: unit -> unit;
}

let slice_size = 1024

let slice_bits = 10

(* Boyer–Moore–Horspool algorithm *)
type bmh =
  | Found
  | Shift   of int
  | Restart

let make_table ~pattern len =
  let table = Array.create ~len:256 Restart in
  String.foldi pattern ~init:() ~f:(fun i () c ->
      if i < len - 1 then table.(Char.to_int c) <- Shift (len - (i + 1)));
  table

let same ~pattern table s =
  let rec loop s = function
    | -1 -> Found
    | i when Char.( = ) s.[i] pattern.[i] -> loop s (i - 1)
    | _ -> table.(Char.to_int s.[String.length pattern - 1])
  in
  loop s

let skip_until_pattern ~pattern =
  let len = String.length pattern in
  let table = make_table ~pattern len in
  let rec loop window =
    match same ~pattern table window (len - 1) with
    | Found -> return ()
    | Shift by -> take by >>= fun more -> (loop [@tailcall]) (String.drop_prefix window by ^ more)
    | Restart -> take len >>= fun s -> (loop [@tailcall]) s
  in
  take len >>= loop

let take_until_pattern ~pattern =
  let buf = Buffer.create 32 in
  let len = String.length pattern in
  let table = make_table ~pattern len in
  let rec loop window =
    match same ~pattern table window (len - 1) with
    | Found ->
      let s = Buffer.contents buf in
      if Buffer.length buf > slice_size then Buffer.reset buf else Buffer.clear buf;
      return s
    | Shift by ->
      take by >>= fun more ->
      Buffer.add_substring buf window ~pos:0 ~len:by;
      (loop [@tailcall]) (String.drop_prefix window by ^ more)
    | Restart ->
      take len >>= fun more ->
      Buffer.add_string buf window;
      (loop [@tailcall]) more
  in
  take len >>= loop

let bounded_file_reader ~pattern { add; finalize } =
  let len = String.length pattern in
  let buf = Bigstring.create (slice_size + len) in
  let partial = Bytes.create len in
  let pos = ref 0 in
  let table = make_table ~pattern len in
  let flush ~src_len src =
    Bigstring.From_string.unsafe_blit ~src ~src_pos:0 ~dst:buf ~dst_pos:!pos ~len:src_len;
    let len = !pos + src_len in
    pos := len;
    if len > slice_size
    then (
      add buf len;
      pos := 0)
  in
  let rec loop window =
    match same ~pattern table window (len - 1) with
    | Found ->
      if !pos > 0 then add buf !pos;
      pos := 0;
      finalize ();
      commit
    | Shift by ->
      take by >>= fun more ->
      flush ~src_len:by window;
      commit >>= fun () ->
      let diff = len - by in
      Bytes.From_string.unsafe_blit ~src:window ~src_pos:by ~dst:partial ~dst_pos:0 ~len:diff;
      Bytes.From_string.unsafe_blit ~src:more ~src_pos:0 ~dst:partial ~dst_pos:diff ~len:by;
      (loop [@tailcall]) (Bytes.unsafe_to_string ~no_mutation_while_string_reachable:partial)
    | Restart ->
      take len >>= fun more ->
      flush ~src_len:len window;
      commit >>= fun () -> (loop [@tailcall]) more
  in
  take len >>= loop
OCaml

Innovation. Community. Security.