package earley

  1. Overview
  2. Docs
Parsing library based on Earley Algorithm

Install

Dune Dependency

Authors

Maintainers

Sources

3.0.0.tar.gz
md5=6b666c0392dc5b153f81c27d6ef49b12
sha512=a81d2bcf05088a3aaa5c3c0fb3a38306061a624ddf6d8bbefee1b4a17d7a5961ad1b12c0af9bd8dce86aa14b6f05f1956b3f7b5731f3c552bec7f4550182c398

doc/src/earley.core/earley.ml.html

Source file earley.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
(*
  ======================================================================
  Copyright Christophe Raffalli & Rodolphe Lepigre
  LAMA, UMR 5127 CNRS, Université Savoie Mont Blanc

  christophe.raffalli@univ-savoie.fr
  rodolphe.lepigre@univ-savoie.fr

  This software contains a parser combinator library for the OCaml lang-
  uage. It is intended to be used in conjunction with pa_ocaml (an OCaml
  parser and syntax extention mechanism) to provide  a  fully-integrated
  way of building parsers using an extention of OCaml's syntax.

  This software is governed by the CeCILL-B license under French law and
  abiding by the rules of distribution of free software.  You  can  use,
  modify and/or redistribute the software under the terms of the CeCILL-
  B license as circulated by CEA, CNRS and INRIA at the following URL.

      http://www.cecill.info

  As a counterpart to the access to the source code and  rights to copy,
  modify and redistribute granted by the  license,  users  are  provided
  only with a limited warranty  and the software's author, the holder of
  the economic rights, and the successive licensors  have  only  limited
  liability.

  In this respect, the user's attention is drawn to the risks associated
  with loading, using, modifying and/or developing  or  reproducing  the
  software by the user in light of its specific status of free software,
  that may mean that it is complicated  to  manipulate,  and  that  also
  therefore means that it is reserved  for  developers  and  experienced
  professionals having in-depth computer knowledge. Users are  therefore
  encouraged to load and test  the  software's  suitability  as  regards
  their requirements in conditions enabling the security of  their  sys-
  tems and/or data to be ensured and, more generally, to use and operate
  it in the same conditions as regards security.

  The fact that you are presently reading this means that you  have  had
  knowledge of the CeCILL-B license and that you accept its terms.
  ======================================================================
*)

open Utils
open Internals

(** a few values imported from Internals *)

type blank = Internals.blank
type 'a grammar = 'a Internals.grammar
type 'a fpos = 'a Internals.fpos
exception Parse_error = Internals.Parse_error
let warn_merge = Internals.warn_merge
let debug_lvl = Internals.debug_lvl
let keep_all_names = Internals.keep_all_names

(** The user visible function to reject a parsing rule from its action *)
let give_up () = raise Error

(** Three predefined blank functions *)

let no_blank : Input.buffer -> int -> Input.buffer * int =
  fun str pos -> (str, pos)

let blank_regexp : string -> blank =
  fun str ->
    let (re, _) = Regexp.regexp_from_string str in
    Regexp.read_regexp re

(** blank grammar take another blank function. This is usefull
    to define blank as list of comments separated by blanks *)
let blank_grammar : unit grammar -> blank -> blank
  = fun grammar blank buf pos ->
      let save_debug = !debug_lvl in
      debug_lvl := !debug_lvl / 10;
      let (_,buf,pos) =
        internal_parse_buffer ~blank_after:true blank grammar buf pos
      in
      debug_lvl := save_debug;
      (buf,pos)

(** Smart constructors for rules *)
let nonterm name info rules =
  NonTerm{info;rules;name;memo=Container.Ref.create ()}

let next_aux s r = mkrule (Next(compose_info s r, s, Arg r))
let next_pos_aux s r = mkrule (Next(compose_info s r, s, Pos r))
let next_ign_aux s r = mkrule (Next(compose_info s r, s, Ign r))

let next : type a c. a grammar -> (a -> c) rule -> c rule =
  fun ((i,rs) as g) r ->
    match rs with
    | [{rule = Next(_,s0, Arg {rule = Empty Idt; _}); _}] ->  next_aux s0 r
    | _                                                   ->
        next_aux (nonterm (grammar_name ~delim:true g) i rs) r

let next_pos : type a c. a grammar -> (a -> c) fpos rule -> c rule =
  fun (i,rs as g) r ->
    match rs with
    | [{rule = Next(_,s0, Arg {rule = Empty Idt; _}); _}] -> next_pos_aux s0 r
    | _                                                   ->
        next_pos_aux (nonterm (grammar_name ~delim:true g) i rs) r

let next_ign : type a c. a grammar -> c rule -> c rule = fun (i,rs as g) r ->
  match rs with
  | [{rule = Next(_,s0, Arg {rule = Empty Idt; _}); _}] -> next_ign_aux s0 r
  | _                                                   ->
      next_ign_aux (nonterm (grammar_name ~delim:true g) i rs) r

let emp f = mkrule (Empty f)
let ems f = emp (Simple f)

let mkterm name info input =
  Term{input;info;memo=Container.Ref.create ();name}

let mkter2 name info input =
  Ter2{input;info;memo=Container.Ref.create ();name}

let mktest name info input =
  Test{input;info;memo=Container.Ref.create ();name}

let mkgrammar s = (grammar_info s, s)

(** Helper to build a terminal symbol *)
let solo : string -> ?accept_empty:bool -> Charset.t -> 'a input
    -> 'a grammar = fun name ?(accept_empty=false) set s ->
  let j = Fixpoint.from_val (accept_empty,set) in
  (j, [mkrule (Next(j,mkterm name j s, Arg(idtEmpty ())))])

(** Function used to call a grammar as a terminal. Its input
    takes more arguments, in particular to record error position *)
let solo2 =
  fun name i s ->
    let s = fun errpos blank b p b' p' ->
      s errpos blank b p b' p'
    in
    (i, [mkrule (Next(i,mkter2 name i s, Arg (idtEmpty ())))])

(** Combinator for test at current position *)
let test : ?name:string -> Charset.t
           -> (Input.buffer -> int -> 'a * bool) -> 'a grammar =
  fun ?(name="") set f ->
    let j = Fixpoint.from_val (true,set) in
    (j, [mkrule (Next(j,mktest name j (fun _ _ -> f), Arg (idtEmpty ())))])

(** Combinator for test blank before the current position *)
let blank_test : ?name:string -> Charset.t -> 'a test -> 'a grammar =
  fun ?(name="") set f ->
  let j = Fixpoint.from_val (true,set) in
  (j, [mkrule (Next(j,mktest name j f, Arg(idtEmpty ())))])

(** A test that always pass *)
let success a = test ~name:"SUCCESS" Charset.full (fun _ _ -> (a, true))

(** A test that blank exists before the current position *)
let with_blank_test a = blank_test ~name:"BLANK" Charset.full
  (fun buf' pos' buf pos -> (a, not (Input.buffer_equal buf' buf) || pos' <> pos))

(** A test that blank do not exists before the current position *)
let no_blank_test a = blank_test ~name:"NOBLANK" Charset.full
  (fun buf' pos' buf pos -> (a, Input.buffer_equal buf' buf && pos' = pos))

(** Used for unset recursive grammars *)
let unset : string -> 'a grammar = fun msg ->
  let fn _ _ = failwith msg in
  solo msg Charset.empty fn (* make sure we have the message *)

(** Alternatives between many grammars *)
let alternatives : 'a grammar list -> 'a grammar = fun g ->
  mkgrammar (List.flatten (List.map snd g))

(** Declare a recusive grammar *)
let declare_grammar name =
  let g = snd (unset (name ^ " not set")) in
  let nt = nonterm name (Fixpoint.from_val (false, Charset.empty)) g in
  let j =
    Fixpoint.from_ref nt (
      function
      | NonTerm{rules; _} -> grammar_info rules
      | _                 -> assert false)
  in
  begin
    match nt with
    | NonTerm r -> r.info <- j
    | _ -> assert false
  end;
  mkgrammar [mkrule (Next(j,nt, Arg(idtEmpty ())))]

(** Set the value of a recursive grammar *)
let set_grammar : type a.a grammar -> a grammar -> unit = fun p1 (_,rules2) ->
      match snd p1 with
      | [{rule=Next(_,NonTerm({info; _} as r),Arg {rule=Empty Idt; _}); _}] ->
         r.rules <- rules2; Fixpoint.update info;
      (*Printf.eprintf "setting %s %b %a\n%!" name ae Charset.print set;*)
      | _ -> invalid_arg "set_grammar"

let grammar_family ?(param_to_string=(fun _ -> "<...>")) name =
  let tbl = EqHashtbl.create 8 in
  let is_set = ref None in
  (fun p ->
    try EqHashtbl.find tbl p
    with Not_found ->
      let g = declare_grammar (name^"_"^param_to_string p) in
      EqHashtbl.add tbl p g;
      (match !is_set with None -> ()
      | Some f ->
         set_grammar g (f p);
      );
      g),
  (fun f ->
    is_set := Some f;
    EqHashtbl.iter (fun p r ->
      set_grammar r (f p);
    ) tbl)

let grammar_prio ?(param_to_string=(fun _ -> "<...>")) name =
  let tbl = EqHashtbl.create 8 in
  let is_set = ref None in
  (fun p ->
    try EqHashtbl.find tbl p
    with Not_found ->
      let g = declare_grammar (name^"_"^param_to_string p) in
      EqHashtbl.add tbl p g;
      (match !is_set with None -> ()
      | Some f ->
         set_grammar g (f p);
      );
      g),
  (fun (gs,gp) ->
    let f = fun p ->
      alternatives (List.map snd (List.filter (fun (f,_) -> f p) gs) @ (gp p))
    in
    is_set := Some f;
    EqHashtbl.iter (fun p r ->
      set_grammar r (f p);
    ) tbl)

let grammar_prio_family ?(param_to_string=(fun _ -> "<...>")) name =
  let tbl = EqHashtbl.create 8 in
  let tbl2 = EqHashtbl.create 8 in
  let is_set = ref None in
  (fun args p ->
    try EqHashtbl.find tbl (args,p)
    with Not_found ->
      let g = declare_grammar (name^"_"^param_to_string (args,p)) in
      EqHashtbl.add tbl (args, p) g;
      (match !is_set with None -> ()
      | Some f ->
         set_grammar g (f args p);
      );
      g),
  (fun f ->
    let f = fun args ->
      (* NOTE: to make sure the tbl2 is filled soon enough *)
      let (gs, gp) = f args in
      try
        EqHashtbl.find tbl2 args
      with Not_found ->
        let g = fun p ->
            alternatives (List.map snd (List.filter (fun (f,_) -> f p) gs) @ gp p)
        in
        EqHashtbl.add tbl2 args g;
        g
    in
    is_set := Some f;
    EqHashtbl.iter (fun (args,p) r ->
      set_grammar r (f args p);
    ) tbl)

(** Parse the end of file *)
let eof : 'a -> 'a grammar
  = fun a ->
    let fn buf pos =
      if Input.is_empty buf pos then (a,buf,pos) else raise Error
    in
    solo "EOF" (Charset.singleton '\255') fn

(** Give a name to a grammar *)
let give_name name (i,_ as g) =
  (i, [grammar_to_rule ~name g])

(** Change the action of the grammar by applying a function *)
let apply : type a b. (a -> b) -> a grammar -> b grammar =
  fun f g -> mkgrammar [next g (emp (Simple f))]

(** Idem, with positions *)
let apply_position : type a b. (a -> b) fpos
                          -> a grammar -> b grammar =
  fun f g ->
    mkgrammar [next g (emp (WithPos f))]

(** Build a tuple with positions *)
let position g =
  apply_position (fun buf pos buf' pos' a ->
    (Input.filename buf, Input.line_num buf, pos, Input.line_num buf', pos', a)) g


(** An always failing grammar *)
let fail : unit -> 'a grammar = fun () ->
  let fn _ _= raise Error in
  solo "FAIL" Charset.empty fn

(** Accept only one char *)
let char : ?name:string -> char -> 'a -> 'a grammar
  = fun ?name c a ->
    let msg = Printf.sprintf "%C" c in
    let name = match name with None -> msg | Some n -> n in
    let fn buf pos =
      let c', buf', pos' = Input.read buf pos in
      if c = c' then (a,buf',pos') else give_up ()
    in
    solo name (Charset.singleton c) fn

(** Accept any char in a given char set *)
let in_charset : ?name:string -> Charset.t -> char grammar
  = fun ?name cs ->
    let msg = Printf.sprintf "[%s]" (Charset.show cs) in
    let name = match name with None -> msg | Some n -> n in
    let fn buf pos =
      let c, buf', pos' = Input.read buf pos in
      if Charset.mem cs c then (c,buf',pos') else give_up ()
    in
    solo name cs fn

(** Test that the current char is not in a given char set (do not parse it) *)
let not_in_charset : ?name:string -> Charset.t -> unit grammar
  = fun ?name cs ->
    let msg = Printf.sprintf "^[%s]" (Charset.show cs) in
    let name = match name with None -> msg | Some n -> n in
    let fn buf pos =
      let c = Input.get buf pos in
      if Charset.mem cs c then ((), false) else ((), true)
    in
    test ~name (Charset.complement cs) fn

(** Test the charactere at the beginning of the blank.
    TODO: should not it test all blank char ? *)
let blank_not_in_charset : ?name:string -> Charset.t -> unit grammar
  = fun ?name cs ->
    let msg = Printf.sprintf "^[%s]" (Charset.show cs) in
    let name = match name with None -> msg | Some n -> n in
    let fn buf pos _ _ =
      let c = Input.get buf pos in
      if Charset.mem cs c then ((), false) else ((), true)
    in
    blank_test ~name (Charset.complement cs) fn

(** Accept exactly one char *)
let any : char grammar
  = let fn buf pos =
      let c, buf', pos' = Input.read buf pos in
      if c = '\255' then give_up ();
      (c,buf',pos')
    in
    solo "ANY" Charset.(del full '\255') fn

(** Print a debugging message, with the position *)
let debug msg : unit grammar
    = let fn buf pos =
        log "%s file:%s line:%d col:%d\n%!"
            msg (Input.filename buf) (Input.line_num buf) pos;
        ((), true)
      in
      test ~name:msg Charset.empty fn

(** Accept a string *)
let string : ?name:string -> string -> 'a -> 'a grammar
  = fun ?name s a ->
    let name = match name with None -> s | Some n -> n in
    let fn buf pos =
      let buf = ref buf in
      let pos = ref pos in
      let len_s = String.length s in
      for i = 0 to len_s - 1 do
        let c, buf', pos' = Input.read !buf !pos in
        if c <> s.[i] then give_up ();
        buf := buf'; pos := pos'
      done;
      (a,!buf,!pos)
    in
    solo name ~accept_empty:(s="") (Charset.singleton s.[0]) fn

(** Accept a keyword: the charter after the parsed string should
    return false for the given function *)
let keyword : ?name:string -> string -> (char -> bool) -> 'a -> 'a grammar
  = fun ?name s test a ->
    let name = match name with None -> s | Some n -> n in
    let fn buf pos =
      let buf = ref buf in
      let pos = ref pos in
      let len_s = String.length s in
      for i = 0 to len_s - 1 do
        let c, buf', pos' = Input.read !buf !pos in
        if c <> s.[i] then give_up ();
        buf := buf'; pos := pos'
      done;
      let c, _, _ = Input.read !buf !pos in
      if test c then give_up ();
      (a,!buf,!pos)
    in
    solo name ~accept_empty:(s="") (Charset.singleton s.[0]) fn

(** option combinator *)
let option : 'a -> 'a grammar -> 'a grammar
  = fun a (_,l) -> mkgrammar (mkrule (Empty (Simple a))::l)

(** Regexp (use our own regexp, look at [Earley_str] for Str regexp support *)
let regexp : ?name:string -> string -> string array grammar =
  fun ?name str ->
    let name = match name with None -> String.escaped str | Some n -> n in
    let (re, grps) = Regexp.regexp_from_string str in
    let fn buf pos =
      let (buf, pos) =
        try Regexp.read_regexp re buf pos
        with Regexp.Regexp_error(_,_) -> give_up ()
      in
      (Array.map (!) grps, buf, pos)
    in
    let accept_empty = Regexp.accept_empty re in
    let charset = Regexp.accepted_first_chars re in
    solo name ~accept_empty charset fn

(** Allow to write any terminal, by supplying a function *)
let black_box : (Input.buffer -> int -> 'a * Input.buffer * int) -> Charset.t -> bool
                  -> string -> 'a grammar
  = fun fn set accept_empty name -> solo name ~accept_empty set fn

(** Parse the empty string *)
let empty : 'a -> 'a grammar = fun a -> (iempty,[ems a])

let empty_pos : 'a fpos -> 'a grammar
  = fun f -> (iempty,[emp (WithPos f)])

(** Various wy to make sequence of parsing *)
let sequence : 'a grammar -> 'b grammar -> ('a -> 'b -> 'c) -> 'c grammar
  = fun l1 l2 f ->
    mkgrammar [next l1 (next l2 (ems (fun b a -> f a b)))]

let sequence_position : 'a grammar -> 'b grammar
                        -> ('a -> 'b -> 'c) fpos -> 'c grammar
  = fun l1 l2 f ->
  mkgrammar [next l1 (next l2
    (emp (WithPos (fun b p b' p' a a' -> f b p b' p' a' a))))]

let sequence3 : 'a grammar -> 'b grammar -> 'c grammar
                -> ('a -> 'b -> 'c -> 'd) -> 'd grammar
  = fun l1 l2 l3 f ->
    sequence l1 (sequence l2 l3 (fun x y z -> f z x y)) (fun z f -> f z)

let fsequence : 'a grammar -> ('a -> 'b) grammar -> 'b grammar
  = fun l1 l2 -> mkgrammar [next l1 (grammar_to_rule l2)]

let fsequence_position : 'a grammar -> ('a -> 'b) fpos grammar -> 'b grammar
  = fun l1 l2 -> mkgrammar [next_pos l1 (grammar_to_rule l2)]

let fsequence_ignore : 'a grammar -> 'b grammar -> 'b grammar
  = fun l1 l2 -> mkgrammar [next_ign l1 (grammar_to_rule l2)]

let simple_dependent_sequence
    : 'a grammar -> ('a -> 'b grammar) -> 'b grammar
  = fun l1 f2 ->
      mkgrammar [next l1 (mkrule (Dep (fun a -> grammar_to_rule (f2 a))))]

let dependent_sequence
    : ('a * 'b) grammar -> ('a -> ('b -> 'c) grammar) -> 'c grammar
  = fun f1 f2 ->
        simple_dependent_sequence f1 (fun (a,b) -> apply (fun g -> g b) (f2 a))

(** A nice one !*)
let iter : 'a grammar grammar -> 'a grammar
  = fun g -> simple_dependent_sequence g (fun f -> f)

(** Various fixpoints *)
let fixpoint :  'a -> ('a -> 'a) grammar -> 'a grammar
  = fun a f1 ->
    let name = grammar_delim_name f1 ^ "*" in
    let res = declare_grammar name in
    let _ = set_grammar res
      (mkgrammar [ems a; next res (next f1 (idtEmpty ()))]) in
    res

let fixpoint' :  type a b.a -> b grammar -> (b -> a -> a) -> a grammar
  = fun a f1 f ->
    let name = grammar_delim_name f1 ^ "*" in
    let res = declare_grammar name in
    let _ = set_grammar res
      (mkgrammar [ems a; next res (next f1 (ems f))]) in
    res

let fixpoint1 :  'a -> ('a -> 'a) grammar -> 'a grammar
  = fun a f1 ->
    let name = grammar_delim_name f1 ^ "+" in
    let res = declare_grammar name in
    let _ = set_grammar res
      (mkgrammar [next f1 (ems (fun f -> f a));
       next res (next f1 (idtEmpty ()))]) in
    res

let fixpoint1' :  'a -> 'b grammar -> ('b -> 'a -> 'a) -> 'a grammar
  = fun a f1 f ->
    let name = grammar_delim_name f1 ^ "+" in
    let res = declare_grammar name in
    let _ = set_grammar res
      (mkgrammar [next f1 (ems (fun b -> f b a));
       next res (next f1 (ems f))]) in
    res


(** General lists with seprator *)
let list1 g sep =
  fsequence g
    (apply (fun xs x -> x :: xs [])
       (fixpoint' (fun l -> l)
                  (fsequence_ignore sep g)
                  (fun x f l -> f (x::l))))

let list0 g sep =
  option [] (list1 g sep)

let list2 g sep =
  fsequence g
    (apply (fun xs x -> x :: xs [])
       (fixpoint1' (fun l -> l)
                   (fsequence_ignore sep g)
                   (fun x f l -> f (x::l))))

(** A combinator to change the notion of blank *)
let change_layout : ?old_blank_before:bool -> ?new_blank_after:bool
                      -> 'a grammar -> blank -> 'a grammar
  = fun ?(old_blank_before=true) ?(new_blank_after=true) l1 blank1 ->
    let i = Fixpoint.from_val (false, Charset.full) in
    (* compose with a test with a full charset to pass the final charset test in
       internal_parse_buffer *)
    let l1 = mkgrammar [next l1 (next (success ()) (ems (fun _ a -> a)))] in
    let fn errpos _ buf pos buf' pos' =
      let buf,pos = if old_blank_before then buf', pos' else buf, pos in
      let (a,buf,pos) = internal_parse_buffer ~errpos
        ~blank_after:new_blank_after blank1 l1 buf pos in
      (a,buf,pos)
    in
    let name = grammar_name l1 in
    solo2 name i fn

(** A combinator to parse with no blank at all *)
let no_blank_layout : 'a grammar -> 'a grammar
  = fun l1 ->
    (* compose with a test with a full charset to pass the final charset test in
       internal_parse_buffer *)
    let l1 = mkgrammar [next l1 (next (success ()) (ems (fun _ a -> a)))] in
    let fn errpos _ _ _ buf pos =
      let (a,buf,pos) = internal_parse_buffer ~errpos
        ~blank_after:false no_blank l1 buf pos in
      (a,buf,pos)
    in
    let name = grammar_name l1 in
    solo2 name (fst l1) fn

(** Calls a grammar "greedy": retains only the longuest match *)
let greedy : 'a grammar -> 'a grammar
  = fun l1 ->
    (* compose with a test with a full charset to pass the final charset test in
       internal_parse_buffer *)
    let l1 = mkgrammar [next l1 (next (success ()) (ems (fun _ a -> a)))] in
    (* FIXME: blank are parsed twice. internal_parse_buffer should have one
              more argument *)
    let fn errpos blank buf pos _ _ =
      let (a,buf,pos) = internal_parse_buffer ~errpos blank l1 buf pos in
      (a,buf,pos)
    in
    let name = grammar_delim_name l1 ^ "$" in
    solo2 name (fst l1) fn

(** How to call the parser *)

let partial_parse_buffer
    : type a.a grammar -> blank -> ?blank_after:bool -> Input.buffer -> int
           -> a * Input.buffer * int
   = fun g bl ?(blank_after=false) buf pos ->
       parse_buffer_aux blank_after bl g buf pos

let parse_buffer : 'a grammar -> blank -> Input.buffer -> 'a =
  fun g blank buf ->
    let g = sequence g (eof ()) (fun x _ -> x) in
    let (a, _, _) = partial_parse_buffer g blank buf 0 in
    a

let parse_string ?(filename="") grammar blank str =
  let str = Input.from_string ~filename str in
  parse_buffer grammar blank str

let parse_channel ?(filename="") grammar blank ic  =
  let str = Input.from_channel ~filename ic in
  parse_buffer grammar blank str

let parse_file grammar blank filename  =
  let str = Input.from_file filename in
  parse_buffer grammar blank str

(** A helper to hangle exceptions *)
let fail_no_parse () = exit 1

let handle_exception ?(error=fail_no_parse) f a =
  try f a with Parse_error(buf, pos) ->
    let red fmt = "\027[31m" ^^ fmt ^^ "\027[0m%!" in
    Printf.eprintf (red "Parse error: file %S, line %d, character %d.\n")
      (Input.filename buf) (Input.line_num buf) (Input.utf8_col_num buf pos);
    error ()

(** A module to call a parser with a preprocessor (see Input) *)
module WithPP(PP : Input.Preprocessor) =
  struct
    module InPP = Input.WithPP(PP)

    let parse_string ?(filename="") grammar blank str =
      let str = InPP.from_string ~filename str in
      parse_buffer grammar blank str

    let parse_channel ?(filename="") grammar blank ic  =
      let str = InPP.from_channel ~filename ic in
      parse_buffer grammar blank str

    let parse_file grammar blank filename  =
      let str = InPP.from_file filename in
      parse_buffer grammar blank str
  end

(** Collect info a bout grammars *)
let grammar_info : type a. a grammar -> bool * Charset.t
  = fun g -> (force (fst g))

(** A test on grammar *)
let accept_empty : 'a grammar -> bool
  = fun grammar -> fst (grammar_info grammar)
OCaml

Innovation. Community. Security.