Source file Lexer.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
module Location = struct
include Location
let pp = print_loc
end
type token =
| WHILE
| VSWITCH
| VPORT
| VLINK
| VLANPCP
| VLAN
| VFABRIC
| VAR
| TRUE
| THEN
| TCPSRCPORT
| TCPDSTPORT
| SWITCH
| STRING of (string)
| STAR
| SLASH
| SEMICOLON
| RPAR
| QUERY
| PORT
| PLUS
| PIPE
| OR
| NOT
| METAID of (string)
| MAC of (string)
| LPAR
| LINK
| LET
| IVERSON of (string * Location.t)
| IPPROTO
| IP4SRC
| IP4DST
| IP4ADDR of (string)
| INT of (string)
| IN
| IF
| ID
| FROM
| FILTER
| FALSE
| ETHTYPE
| ETHSRC
| ETHDST
| EQUALS
| EOF
| END
| ELSE
| DUP
| DROP
| DO
| BEGIN
| AT
| ASSIGN
| ANTIQ of (string * Location.t)
| AND
| ABSTRACTLOC
[@@deriving show]
module Sedlexing = LexBuffer
open LexBuffer
(** Signals a lexing error at the provided source location. *)
exception LexError of (Lexing.position * string)
(** Signals a parsing error at the provided token and its start and end locations. *)
exception ParseError of (token * Lexing.position * Lexing.position)
(** Register exceptions for pretty printing *)
let _ =
Location.register_error_of_exn (function
| LexError (pos, msg) ->
let loc = Location.{ loc_start = pos; loc_end = pos; loc_ghost = false} in
let main = Location.mkloc (fun fmt -> Format.fprintf fmt "%s" msg) loc in
let err = Location.{ kind = Report_error; main = main; sub=[] } in
Some err
| ParseError (token, loc_start, loc_end) ->
let loc = Location.{ loc_start; loc_end; loc_ghost = false} in
let msg =
show_token token
|> Printf.sprintf "parse error while reading token '%s'" in
let main = Location.mkloc (fun fmt -> Format.fprintf fmt "%s" msg) loc in
let err = Location.{ kind = Report_error; main = main; sub=[] } in
Some err
| _ -> None)
let failwith buf s = raise (LexError (buf.pos, s))
let illegal buf c =
Char.escaped c
|> Printf.sprintf "unexpected character in NetKAT expression: '%s'"
|> failwith buf
(** regular expressions *)
let letter = [%sedlex.regexp? 'A'..'Z' | 'a'..'z']
let digit = [%sedlex.regexp? '0'..'9']
let id_init = [%sedlex.regexp? letter | '_']
let id_cont = [%sedlex.regexp? id_init | Chars ".\'" | digit ]
let id = [%sedlex.regexp? id_init, Star id_cont ]
let hex = [%sedlex.regexp? digit | 'a'..'f' | 'A'..'F' ]
let hexnum = [%sedlex.regexp? '0', 'x', Plus hex ]
let decnum = [%sedlex.regexp? Plus digit]
let decbyte = [%sedlex.regexp? (digit,digit,digit) | (digit,digit) | digit ]
let hexbyte = [%sedlex.regexp? hex,hex ]
let blank = [%sedlex.regexp? ' ' | '\t' ]
let newline = [%sedlex.regexp? '\r' | '\n' | "\r\n" ]
(** swallows whitespace and comments *)
let rec garbage buf =
match%sedlex buf with
| newline -> garbage buf
| Plus blank -> garbage buf
| "(*" -> comment 1 buf
| _ -> ()
and depth buf =
if depth = 0 then garbage buf else
match%sedlex buf with
| eof -> failwith buf "Unterminated comment at EOF"
| "(*" -> comment (depth + 1) buf
| "*)" -> comment (depth - 1) buf
| any -> comment depth buf
| _ -> assert false
(** anitquotation brackets *)
let antiq ~loc_start buf =
match%sedlex buf with
| Star (Compl '}') ->
let code = ascii buf in
begin match%sedlex buf with
| '}' -> ()
| _ -> failwith buf "unterminated anitquotation brace"
end;
let loc_end = next_loc buf in
let loc = Location.{ loc_start; loc_end; loc_ghost = false } in
ANTIQ (code, loc)
| _ -> assert false
(** iverson brackets *)
let iverson ~loc_start buf =
match%sedlex buf with
| Star (Compl ']') ->
let code = ascii buf in
begin match%sedlex buf with
| ']' -> ()
| _ -> failwith buf "unterminated iverson bracket"
end;
let loc_end = next_loc buf in
let loc = Location.{ loc_start; loc_end; loc_ghost = false } in
IVERSON (code, loc)
| _ -> assert false
(** returns the next token *)
let token ~ppx ~loc_start buf =
garbage buf;
match%sedlex buf with
| eof -> EOF
| decbyte,'.',decbyte,'.',decbyte,'.',decbyte ->
IP4ADDR (ascii buf)
| hexbyte,':',hexbyte,':',hexbyte,':',hexbyte,':',hexbyte,':',hexbyte ->
MAC (ascii buf)
| (hexnum | decnum) -> INT (ascii buf)
| (hexnum | decnum), 'l' -> INT (ascii buf)
| (hexnum | decnum), 'L' -> INT (ascii buf)
| "pipe" -> PIPE
| "query" -> QUERY
| '"', Star (Compl '"'), '"' -> STRING (ascii ~skip:1 ~drop:1 buf)
| "dup" -> DUP
| '{' ->
if not ppx then
illegal buf '{'
else
antiq ~loc_start buf
| '[' ->
if not ppx then
illegal buf '['
else
iverson ~loc_start buf
| "true" -> TRUE
| "false" -> FALSE
| "and" -> AND
| "or" -> OR
| "not" -> NOT
| '=' -> EQUALS
| "id" -> ID
| "drop" -> DROP
| "filter" -> FILTER
| ":=" -> ASSIGN
| ';' -> SEMICOLON
| '+' -> PLUS
| '*' -> STAR
| "=>" -> LINK
| "=>>" -> VLINK
| "@" -> AT
| '/' -> SLASH
| "switch" -> SWITCH
| "port" -> PORT
| "vswitch" -> VSWITCH
| "vport" -> VPORT
| "vfabric" -> VFABRIC
| "ethSrc" -> ETHSRC
| "ethDst" -> ETHDST
| "vlanId" -> VLAN
| "vlanPcp" -> VLANPCP
| "ethTyp" -> ETHTYPE
| "ipProto" -> IPPROTO
| "ip4Src" -> IP4SRC
| "ip4Dst" -> IP4DST
| "tcpSrcPort" -> TCPSRCPORT
| "tcpDstPort" -> TCPDSTPORT
| "from" -> FROM
| "loc" -> ABSTRACTLOC
| "if" -> IF
| "then" -> THEN
| "else" -> ELSE
| "while" -> WHILE
| "do" -> DO
| '(' -> LPAR
| ')' -> RPAR
| "begin" -> BEGIN
| "end" -> END
| "let" -> LET
| "var" -> VAR
| "in" -> IN
| '`', id -> METAID (ascii buf ~skip:1)
| _ -> illegal buf (next buf |> Base.Option.value_exn |> Uchar.to_char)
(** wrapper around `token` that records start and end locations *)
let loc_token ~ppx buf =
let () = garbage buf in
let loc_start = next_loc buf in
let t = token ~ppx ~loc_start buf in
let loc_end = next_loc buf in
(t, loc_start, loc_end)
(** menhir interface *)
type ('token, 'a) parser = ('token, 'a) MenhirLib.Convert.traditional
let parse ?(ppx=false) buf p =
let last_token = ref Lexing.(EOF, dummy_pos, dummy_pos) in
let next_token () = last_token := loc_token ~ppx buf; !last_token in
try MenhirLib.Convert.Simplified.traditional2revised p next_token with
| e ->
begin match e with
| LexError _ | Syntaxerr.Error _ -> raise e
| _ -> raise (ParseError (!last_token))
end
let parse_string ?ppx ?pos s p =
parse ?ppx (LexBuffer.of_ascii_string ?pos s) p
let parse_file ?ppx ~file p =
parse ?ppx (LexBuffer.of_ascii_file file) p