package earley
Parsing library based on Earley Algorithm
Install
Dune Dependency
Authors
Maintainers
Sources
3.0.0.tar.gz
md5=6b666c0392dc5b153f81c27d6ef49b12
sha512=a81d2bcf05088a3aaa5c3c0fb3a38306061a624ddf6d8bbefee1b4a17d7a5961ad1b12c0af9bd8dce86aa14b6f05f1956b3f7b5731f3c552bec7f4550182c398
doc/src/earley.core/regexp.ml.html
Source file regexp.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
(* ====================================================================== Copyright Christophe Raffalli & Rodolphe Lepigre LAMA, UMR 5127 CNRS, Université Savoie Mont Blanc christophe.raffalli@univ-savoie.fr rodolphe.lepigre@univ-savoie.fr This software contains a parser combinator library for the OCaml lang- uage. It is intended to be used in conjunction with pa_ocaml (an OCaml parser and syntax extention mechanism) to provide a fully-integrated way of building parsers using an extention of OCaml's syntax. This software is governed by the CeCILL-B license under French law and abiding by the rules of distribution of free software. You can use, modify and/or redistribute the software under the terms of the CeCILL- B license as circulated by CEA, CNRS and INRIA at the following URL. http://www.cecill.info As a counterpart to the access to the source code and rights to copy, modify and redistribute granted by the license, users are provided only with a limited warranty and the software's author, the holder of the economic rights, and the successive licensors have only limited liability. In this respect, the user's attention is drawn to the risks associated with loading, using, modifying and/or developing or reproducing the software by the user in light of its specific status of free software, that may mean that it is complicated to manipulate, and that also therefore means that it is reserved for developers and experienced professionals having in-depth computer knowledge. Users are therefore encouraged to load and test the software's suitability as regards their requirements in conditions enabling the security of their sys- tems and/or data to be ensured and, more generally, to use and operate it in the same conditions as regards security. The fact that you are presently reading this means that you have had knowledge of the CeCILL-B license and that you accept its terms. ====================================================================== *) (* Type of a regular expression. *) type regexp = | Chr of char (* Single character. *) | Set of Charset.t (* Any character in a charset. *) | Seq of regexp list (* Sequence of regexps. *) | Alt of regexp list (* Alternative between regexps. *) | Opt of regexp (* Optional regexp. *) | Str of regexp (* Zero or more times the regexp. *) | Pls of regexp (* One or more times the regexp. *) | Sav of regexp * string ref (* Save what is read. *) let print_regexp ch re = let rec pregexp ch = function | Chr(c) -> Printf.fprintf ch "Chr(%C)" c | Set(s) -> Printf.fprintf ch "Set(%a)" Charset.print s | Seq(l) -> Printf.fprintf ch "Seq([%a])" pregexps l | Alt(l) -> Printf.fprintf ch "Alt([%a])" pregexps l | Opt(r) -> Printf.fprintf ch "Opt(%a)" pregexp r | Str(r) -> Printf.fprintf ch "Str(%a)" pregexp r | Pls(r) -> Printf.fprintf ch "Pls(%a)" pregexp r | Sav(r,_) -> Printf.fprintf ch "Sav(%a,<ref>)" pregexp r and pregexps ch = function | [] -> () | [r] -> pregexp ch r | r::rs -> Printf.fprintf ch "%a;%a" pregexp r pregexps rs in pregexp ch re let rec accept_empty = function | Chr(_) -> false | Set(_) -> false | Seq(l) -> List.for_all accept_empty l | Alt(l) -> List.exists accept_empty l | Opt(_) -> true | Str(_) -> true | Pls(r) -> accept_empty r | Sav(r,_) -> accept_empty r let accepted_first_chars : regexp -> Charset.t = let open Charset in let rec aux = function | Chr(c) -> singleton c | Set(s) -> s | Seq(l) -> begin match l with | [] -> empty | r::rs -> if accept_empty r then union (aux r) (aux (Seq(rs))) else aux r end | Alt(l) -> List.fold_left (fun cs r -> union cs (aux r)) empty l | Opt(r) -> aux r | Str(r) -> aux r | Pls(r) -> aux r | Sav(r,_) -> aux r in aux type construction = Acc of regexp list | Par of regexp list * regexp list let push x = function | Acc l -> Acc (x::l) | Par (l1, l2) -> Acc(Alt(x :: l1)::l2) let pop = function | Acc l -> l | Par _ -> invalid_arg "Regexp: final bar." let regexp_from_string : string -> regexp * string ref array = fun s -> let cs = let cs = ref [] in for i = String.length s - 1 downto 0 do cs := s.[i] :: !cs done; !cs in let read_range cs = let rec read_range acc = function | [] -> invalid_arg "Regexp: open charset." | ']'::cs -> (acc, cs) | c1::'-'::c2::cs -> let r = Charset.range c1 c2 in read_range (Charset.union acc r) cs | c::cs -> read_range (Charset.add acc c) cs in read_range Charset.empty cs in let rec tokens cs = let is_spe c = List.mem c ['\\';'.';'*';'+';'?';'[';']'] in match cs with | '.' ::cs -> `Set(Charset.full) :: tokens cs | '*' ::cs -> `Str :: tokens cs | '+' ::cs -> `Pls :: tokens cs | '?' ::cs -> `Opt :: tokens cs | '\\'::'('::cs -> `Opn :: tokens cs | '\\'::')'::cs -> `Cls :: tokens cs | '\\'::'|'::cs -> `Alt :: tokens cs | '\\'::c ::cs -> if is_spe c then `Chr(c) :: tokens cs else invalid_arg "Regexp: invalid escape." | '\\'::[] -> invalid_arg "Regexp: nothing to escape." | '[' ::'^':: ']'::cs -> let (rng, cs) = read_range cs in let rng = Charset.add rng ']' in `Set(Charset.complement rng) :: tokens cs | '[' ::']':: cs -> let (rng, cs) = read_range cs in `Set(Charset.add rng ']') :: tokens cs | '[' ::'^'::cs -> let (rng, cs) = read_range cs in `Set(Charset.complement rng) :: tokens cs | '[' ::cs -> let (rng, cs) = read_range cs in `Set(rng) :: tokens cs | c ::cs -> `Chr(c) :: tokens cs | [] -> [] in let ts = tokens cs in let refs = ref [] in let rec build_re stk acc ts = match (stk, acc, ts) with | (stk , acc , `Chr(c)::ts) -> build_re stk (push (Chr c) acc) ts | (stk , acc , `Set(s)::ts) -> build_re stk (push (Set s) acc) ts | (stk , Acc(Alt (re::l)::acc), `Str ::ts) -> build_re stk (Acc(Alt(Str re::l) :: acc)) ts | (stk , Acc(Alt (re::l)::acc), `Pls ::ts) -> build_re stk (Acc(Alt(Pls re::l) :: acc)) ts | (stk , Acc(Alt (re::l)::acc), `Opt ::ts) -> build_re stk (Acc(Alt(Opt re::l) :: acc)) ts | (stk , Acc(re::acc), `Str ::ts) -> build_re stk (Acc(Str re :: acc)) ts | (stk , Acc(re::acc), `Pls ::ts) -> build_re stk (Acc(Pls re :: acc)) ts | (stk , Acc(re::acc), `Opt ::ts) -> build_re stk (Acc(Opt re :: acc)) ts | (_ , _ , `Str ::_ ) | (_ , _ , `Pls ::_ ) | (_ , _ , `Opt ::_ ) -> invalid_arg "Regexp: modifier error." | (stk , acc , `Opn ::ts) -> build_re (pop acc::stk) (Acc []) ts | ([] , _ , `Cls ::_ ) -> invalid_arg "Regexp: group not opened." | (s::stk, acc , `Cls ::ts) -> let re = match List.rev (pop acc) with | [re] -> re | l -> Seq(l) in let r = ref "" in refs := r :: !refs; build_re stk (Acc(Sav(re,r)::s)) ts | (stk , Acc(re::acc), `Alt ::ts) -> build_re stk (Par([re],acc)) ts | (_ , Acc [] , `Alt ::_ ) -> invalid_arg "Regexp: initial bar." | (_ , Par _ , `Alt ::_ ) -> invalid_arg "Regexp: consecutive bar." | ([] , acc , [] ) -> begin match List.rev (pop acc) with | [re] -> re | l -> Seq(l) end | (_ , _ , [] ) -> invalid_arg "Regexp: group error." in let re = build_re [] (Acc []) ts in (re, Array.of_list (List.rev !refs)) (* Exception raised when a regexp cannot be parsed. *) exception Regexp_error of Input.buffer * int let regexp_error : type a. Input.buffer -> int -> a = fun buf pos -> raise (Regexp_error(buf, pos)) let string_of_char_list : char list -> string = fun cs -> let b = Buffer.create 10 in List.iter (Buffer.add_char b) cs; Buffer.contents b (* Input characters according to the given regexp. *) let read_regexp : regexp -> Input.buffer -> int -> Input.buffer * int = fun re buf pos -> let rec sread_regexp re buf pos cs = match re with | Chr(ch) -> let (c, buf, pos) = Input.read buf pos in if c = ch then (c::cs, buf, pos) else regexp_error buf pos | Set(chs) -> let (c, buf, pos) = Input.read buf pos in if Charset.mem chs c then (c::cs, buf, pos) else regexp_error buf pos | Seq(r::rs) -> let (cs, buf, pos) = sread_regexp r buf pos cs in sread_regexp (Seq(rs)) buf pos cs | Seq([]) -> (cs, buf, pos) | Alt(r::rs) -> begin try sread_regexp r buf pos cs with Regexp_error(_,_) -> sread_regexp (Alt(rs)) buf pos cs end | Alt([]) -> regexp_error buf pos | Opt(r) -> begin try sread_regexp r buf pos cs with Regexp_error(_,_) -> (cs, buf, pos) end | Str(r) -> begin try let (cs, buf, pos) = sread_regexp r buf pos cs in sread_regexp re buf pos cs with Regexp_error(_,_) -> (cs, buf, pos) end | Pls(r) -> let (cs, buf, pos) = sread_regexp r buf pos cs in sread_regexp (Str(r)) buf pos cs | Sav(r,ptr) -> let cs0 = cs in let rec fn acc = function | cs when cs == cs0 -> string_of_char_list acc | c::cs -> fn (c::acc) cs | [] -> assert false in let (cs, _, _ as res) = sread_regexp r buf pos cs in ptr := fn [] cs; res in let rec read_regexp re buf pos = match re with | Chr(ch) -> let (c, buf, pos) = Input.read buf pos in if c = ch then (buf, pos) else regexp_error buf pos | Set(chs) -> let (c, buf, pos) = Input.read buf pos in if Charset.mem chs c then (buf, pos) else regexp_error buf pos | Seq(r::rs) -> let (buf, pos) = read_regexp r buf pos in read_regexp (Seq(rs)) buf pos | Seq([]) -> (buf, pos) | Alt(r::rs) -> begin try read_regexp r buf pos with Regexp_error(_,_) -> read_regexp (Alt(rs)) buf pos end | Alt([]) -> regexp_error buf pos | Opt(r) -> begin try read_regexp r buf pos with Regexp_error(_,_) -> (buf, pos) end | Str(r) -> begin try let (buf, pos) = read_regexp r buf pos in read_regexp re buf pos with Regexp_error(_,_) -> (buf, pos) end | Pls(r) -> let (buf, pos) = read_regexp r buf pos in read_regexp (Str(r)) buf pos | Sav(r,ptr) -> let (cs, buf, pos) = sread_regexp r buf pos [] in ptr := string_of_char_list (List.rev cs); (buf, pos) in read_regexp re buf pos
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>