package comby-kernel

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file regexp.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
open Vangstrom

let debug =
  match Sys.getenv "DEBUG_COMBY" with
  | exception Not_found -> false
  | _ -> true

module type Regexp_engine_intf = sig
  type t
  type substrings

  val make: string -> t

  val get_substring: substrings -> int -> string option

  val get_all_substrings: substrings -> string array

  val exec: rex:t -> pos:int -> Bytes.t -> substrings option
end

type t =
  { buffer_pos : int
  ; buffer : bytes
  }

(* I think I should just implement the analog of string_ for regex with some bounded buffer size. *)

module Make (Regexp: Regexp_engine_intf) = struct
  (* https://sourcegraph.com/github.com/comby-tools/mparser/-/blob/src/mParser_Char_Stream.ml#L231:8 *)
  let match_regexp s pos rex =
    Regexp.exec ~rex ~pos:(pos - s.buffer_pos) s.buffer

  let make_regexp pat =
    Regexp.make pat

  (* TODO: tests and blit thing below *)

  (* FIXME: size. about advance => want to use internal unsafe_apply_opt
     actually. cf. string_ in angstrom.ml. instead, trying "do peek, then
     advance/commit." *)
  let regexp rex =
    (* Why do Unsafe if I can just do peek_string? => So I don't allocate on copy of buffer. *)
    (* But it looks like we can't avoid allocation in converting bigstringaf to bytes *)
    Unsafe.peek 1 (fun buffer ~off ~len:_ -> Bigstringaf.length buffer - off) >>= fun n ->
    Unsafe.peek n (fun buffer ~off ~len ->
        (* This still does a copy :( *)
        let bytes = Bytes.create len in
        Bigstringaf.unsafe_blit_to_bytes buffer ~src_off:off bytes ~dst_off:0 ~len;
        if debug then Format.printf "Matching regex against string: %S@." @@ Bytes.to_string bytes;
        match Regexp.exec ~rex ~pos:0 bytes with
        | None ->
          if debug then Format.printf "None (1)@.";
          None
        | Some substrings ->
          match Regexp.get_substring substrings 0 with
          | None ->
            if debug then Format.printf "None (2)@.";
            None
          | Some result ->
            if debug then Format.printf "Matchy Matchy (3)@.";
            Some (result, String.length result))
    >>= function
    | Some (result, n) ->
      (* if empty string matches, this hole like for optionals (x?), advance 1. *)
      (* we want to advance one so parsing can continue, but if we advance 1 here we will think
         that the match context is at least length 1 and not 0 if this hole is the only thing
         defining the match context *)
      (* let n = if n > 0 then n else 1 in
         advance n >>= fun () -> *)
      if debug then Format.printf "Result indeed: %S len %d@." result n;
      advance n >>= fun () ->
      return result
    | None ->
      fail "No match"
end

module PCRE = struct
  module Engine : Regexp_engine_intf = struct
    type t = Pcre.regexp
    type substrings = Pcre.substrings

    let compile_flags =
      Pcre.cflags [ `ANCHORED ]

    let make pattern =
      Pcre.regexp ~iflags:compile_flags pattern

    let get_substring s idx =
      match Pcre.get_substring s idx with
      | result -> Some result
      | exception Not_found
      | exception Invalid_argument _ -> None

    let get_all_substrings s =
      Pcre.get_substrings s

    let exec ~rex ~pos b =
      match Pcre.exec ~pos ~rex (Bytes.unsafe_to_string b) with
      | result -> Some result
      | exception Not_found -> None
  end

  include Make(Engine)
end

module RE = struct
  module Engine : Regexp_engine_intf = struct
    type t = Re.re
    type substrings = Re.substrings

    let compile_flags =
      [ `Anchored ]

    let make pattern =
      Re.Perl.(compile (re ~opts:compile_flags pattern))

    let get_substring s idx =
      match Re.get s idx with
      | result -> Some result
      | exception Not_found -> None

    let get_all_substrings s =
      Re.get_all s

    let exec ~rex ~pos b =
      match Re.exec ~pos rex (Bytes.unsafe_to_string b) with
      | result -> Some result
      | exception Not_found -> None
  end

  include Make(Engine)
end
OCaml

Innovation. Community. Security.