Source file blanks.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
let none : Earley.blank = fun buf pos ->
(buf, pos)
let from_charset : Charset.t -> Earley.blank = fun cs ->
let rec blank_cs buf pos =
let (c, buf', pos') = Input.read buf pos in
if Charset.mem cs c then blank_cs buf' pos' else (buf, pos)
in blank_cs
let default_charset : Charset.t =
List.fold_left Charset.add Charset.empty [' '; '\t'; '\n'; '\r']
let default : Earley.blank = from_charset default_charset
let list_of_string : string -> char list = fun s ->
let l = ref [] in
String.iter (fun c -> l := c :: !l) s;
List.rev !l
let : ?blanks:Charset.t -> string -> Earley.blank =
fun ?(blanks=default_charset) s ->
let c1 =
let blanks = Charset.add blanks c1 in
let rec buf pos =
let (c, buf', pos') = Input.read buf pos in
if Charset.mem blanks c then line_comments1 buf' pos' else (buf, pos)
in line_comments1
in
let c1 c2 =
let rec buf pos =
let (c, buf', pos') = Input.read buf pos in
if Charset.mem blanks c then line_comments2 buf' pos' else
if c = c1 && Input.get buf' pos' = c2 then
let (buf, pos) = Input.normalize buf' (Input.line_length buf') in
line_comments2 buf pos
else
(buf, pos)
in line_comments2
in
let cc ccs =
let rec state buf pos =
let (c, buf', pos') = Input.read buf pos in
match state with
| None when Charset.mem blanks c ->
line_commentsn None buf' pos'
| None when c = cc ->
line_commentsn (Some((buf,pos),ccs)) buf' pos'
| None ->
(buf, pos)
| Some(_,[]) ->
let (buf, pos) = Input.normalize buf' (Input.line_length buf') in
line_commentsn None buf pos
| Some(p,d::cs) when c = d ->
line_commentsn (Some(p,cs)) buf' pos'
| Some(p,_) ->
p
in line_commentsn None
in
match list_of_string s with
| [] -> invalid_arg "empty delimiter"
| c::_ when Charset.mem blanks c -> invalid_arg "invalid delimiter"
| [c1] -> line_comments1 c1
| [c1;c2] -> line_comments2 c1 c2
| c::cs -> line_commentsn c cs
let ocaml_blank : Earley.blank = fun buf pos ->
let p =
raise (Bad_ocaml_comment("unclosed string", p))
in
let p =
raise (Bad_ocaml_comment("unclosed comment", p))
in
let rec fn state stack prev curr =
let (buf, pos) = curr in
let (c, buf', pos') = Input.read buf pos in
let next = (buf', pos') in
match (state, stack, c) with
| (`Ini , [] , ' ' )
| (`Ini , [] , '\t' )
| (`Ini , [] , '\r' )
| (`Ini , [] , '\n' ) -> fn `Ini stack curr next
| (`Ini , _ , '(' ) -> fn (`Opn(curr)) stack curr next
| (`Ini , [] , _ ) -> curr
| (`Opn(p) , [] , '*' ) ->
let (c, buf', pos') = Input.read buf' pos' in
let (c',_,_) = Input.read buf' pos' in
if c = '*' && c' <> '*' then fn `Cls (p::stack) curr (buf',pos')
else fn `Ini (p::stack) curr next
| (`Opn(p) , _ , '*' ) -> fn `Ini (p::stack) curr next
| (`Opn(_) , _::_, '"' ) -> fn (`Str(curr)) stack curr next
| (`Opn(_) , _::_, '{' ) -> fn (`SOp([],curr)) stack curr next
| (`Opn(_) , _::_, '(' ) -> fn (`Opn(curr)) stack curr next
| (`Opn(_) , [] , _ ) -> prev
| (`Opn(_) , _ , _ ) -> fn `Ini stack curr next
| (`Ini , _::_, '"' ) -> fn (`Str(curr)) stack curr next
| (`Str(_) , _::_, '"' ) -> fn `Ini stack curr next
| (`Str(p) , _::_, '\\' ) -> fn (`Esc(p)) stack curr next
| (`Esc(p) , _::_, _ ) -> fn (`Str(p)) stack curr next
| (`Str(p) , _::_, '\255' ) -> unclosed_comment_string p
| (`Str(_) , _::_, _ ) -> fn state stack curr next
| (`Str(_) , [] , _ ) -> assert false
| (`Esc(_) , [] , _ ) -> assert false
| (`Ini , _::_, '{' ) -> fn (`SOp([],curr)) stack curr next
| (`SOp(l,p) , _::_, 'a'..'z')
| (`SOp(l,p) , _::_, '_' ) -> fn (`SOp(c::l,p)) stack curr next
| (`SOp(_,_) , p::_, '\255' ) -> unclosed_comment p
| (`SOp(l,p) , _::_, '|' ) -> fn (`SIn(List.rev l,p)) stack curr next
| (`SOp(_,_) , _::_, _ ) -> fn `Ini stack curr next
| (`SIn(l,p) , _::_, '|' ) -> fn (`SCl(l,(l,p))) stack curr next
| (`SIn(_,p) , _::_, '\255' ) -> unclosed_comment_string p
| (`SIn(_,_) , _::_, _ ) -> fn state stack curr next
| (`SCl([],b), _::_, '}' ) -> fn `Ini stack curr next
| (`SCl([],b), _::_, '\255' ) -> unclosed_comment_string (snd b)
| (`SCl([],b), _::_, _ ) -> fn (`SIn(b)) stack curr next
| (`SCl(l,b) , _::_, c ) ->
if c = List.hd l then fn (`SCl(List.tl l, b)) stack curr next
else fn (`SIn(b)) stack curr next
| (`SOp(_,_) , [] , _ ) -> assert false
| (`SIn(_,_) , [] , _ ) -> assert false
| (`SCl(_,_) , [] , _ ) -> assert false
| (`Ini , _::_, '*' ) -> fn `Cls stack curr next
| (`Cls , _::_, '*' ) -> fn `Cls stack curr next
| (`Cls , _::_, '"' ) -> fn (`Str(curr)) stack curr next
| (`Cls , _::_, '{' ) -> fn (`SOp([],curr)) stack curr next
| (`Cls , p::s, ')' ) -> fn `Ini s curr next
| (`Cls , _::_, _ ) -> fn `Ini stack curr next
| (`Cls , [] , _ ) -> assert false
| (`Ini , p::_, '\255' ) -> unclosed_comment p
| (`Ini , _::_, _ ) -> fn `Ini stack curr next
in
fn `Ini [] (buf, pos) (buf, pos)