Source file sexp.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
type t = Atom of string | Node of t list
type parsing_error =
| Nonterminated_node of int
| Nonterminated_atom of int
| Expected_number_or_colon of char * int
| Expected_number of char * int
| Unexepected_character of char * int
| Premature_end_of_atom of int * int
type invalid = Invalid_sexp of t * string
let atom x = Atom x
let node x = Node x
let rec equal a b =
match (a, b) with
| Atom a, Atom b -> String.equal a b
| Node a, Node b -> List.equal equal a b
| _ -> false
let rec pp ppf = function
| Atom x -> Format.fprintf ppf {|Atom "%s"|} x
| Node x -> Format.fprintf ppf {|Node [@[%a@]]|} (Format.pp_print_list pp) x
let rec pp_pretty ppf = function
| Atom x -> Format.fprintf ppf "%s" x
| Node x -> Format.fprintf ppf "@[<hov 1>(%a)@]" pp_pretty_list x
and pp_pretty_list ppf = function
| x :: (_ :: _ as xs) ->
let () = Format.fprintf ppf "%a@ " pp_pretty x in
pp_pretty_list ppf xs
| x :: xs ->
let () = Format.fprintf ppf "%a" pp_pretty x in
pp_pretty_list ppf xs
| [] -> ()
let to_string = Format.asprintf "%a" pp_pretty
let char_to_int c = int_of_char c - int_of_char '0'
module Canonical = struct
let length sexp =
let rec aux acc = function
| Node x -> 2 + List.fold_left aux acc x
| Atom x ->
let len = String.length x in
let ilen = String.length (Int.to_string len) in
acc + ilen + 1 + len
in
aux 0 sexp
let to_buffer buf sexp =
let rec aux = function
| Atom x ->
let len = String.length x |> Int.to_string in
let () = Buffer.add_string buf len in
let () = Buffer.add_char buf ':' in
Buffer.add_string buf x
| Node x ->
let () = Buffer.add_char buf '(' in
let () = List.iter aux x in
Buffer.add_char buf ')'
in
aux sexp
let to_string sexp =
let len = length sexp in
let buf = Buffer.create len in
let () = to_buffer buf sexp in
Buffer.contents buf
let collect_string len seq =
let buf = Buffer.create len in
let rec aux i seq =
if i = len then Ok (atom @@ Buffer.contents buf, seq)
else
match Seq.uncons seq with
| None -> Error (Premature_end_of_atom (len, i))
| Some (c, xs) ->
let () = Buffer.add_char buf c in
aux (i + 1) xs
in
aux 0 seq
let parse_atom lex_pos seq =
let rec aux lex_pos acc seq =
match (Seq.uncons seq, acc) with
| None, _ -> Error (Nonterminated_atom lex_pos)
| Some (':', xs), Some x ->
Result.map (fun (a, xs) -> (a, lex_pos + x, xs)) (collect_string x xs)
| Some (('0' .. '9' as c), xs), acc ->
let acc = (Option.value ~default:0 acc * 10) + char_to_int c in
aux (lex_pos + 1) (Some acc) xs
| Some (c, _), Some _ -> Error (Expected_number_or_colon (c, lex_pos))
| Some (c, _), None -> Error (Expected_number (c, lex_pos))
in
aux lex_pos None seq
let from_seq seq =
let rec aux level lex_pos acc seq =
match Seq.uncons seq with
| None ->
if level = 0 then Ok (List.rev acc, lex_pos, Seq.empty)
else Error (Nonterminated_node lex_pos)
| Some (('0' .. '9' as c), xs) ->
Result.bind
(parse_atom lex_pos (Seq.cons c xs))
(fun (a, lex_pos, xs) -> aux level (lex_pos + 1) (a :: acc) xs)
| Some (')', xs) -> Ok (List.rev acc, lex_pos + 1, xs)
| Some ('(', xs) ->
Result.bind
(aux (level + 1) lex_pos [] xs)
(fun (n, lex_pos, xs) -> aux level (lex_pos + 1) (node n :: acc) xs)
| Some (c, _) -> Error (Unexepected_character (c, lex_pos))
in
Result.map
(fun (r, _, _) -> match r with [ e ] -> e | _ -> node r)
(aux 0 0 [] seq)
let from_string str = str |> String.to_seq |> from_seq
end
let from_seq seq =
let parse_atom lex_pos seq =
let buf = Buffer.create 1 in
let rec aux escaped lex_pos seq =
match Seq.uncons seq with
| None ->
(buf |> Buffer.to_bytes |> Bytes.to_string, lex_pos + 1, Seq.empty)
| Some ('\\', xs) -> aux true (lex_pos + 1) xs
| Some (((' ' | '\t' | '\n' | ')' | '(') as c), xs) when not escaped ->
(buf |> Buffer.to_bytes |> Bytes.to_string, lex_pos, Seq.cons c xs)
| Some (c, xs) ->
let () = Buffer.add_char buf c in
aux false (lex_pos + 1) xs
in
aux false lex_pos seq
in
let rec aux level lex_pos acc seq =
match Seq.uncons seq with
| None ->
if level = 0 then Ok (List.rev acc, lex_pos, Seq.empty)
else Error (Nonterminated_node lex_pos)
| Some (('\t' | ' ' | '\n'), xs) -> aux level (lex_pos + 1) acc xs
| Some (')', xs) -> Ok (List.rev acc, lex_pos + 1, xs)
| Some ('(', xs) ->
Result.bind
(aux (level + 1) lex_pos [] xs)
(fun (n, lex_pos, xs) -> aux level (lex_pos + 1) (node n :: acc) xs)
| Some (c, xs) ->
let atm, lex_pos, xs = parse_atom lex_pos (Seq.cons c xs) in
aux level lex_pos (atom atm :: acc) xs
in
Result.map
(fun (r, _, _) -> match r with [ e ] -> e | _ -> node r)
(aux 0 0 [] seq)
let from_string str = str |> String.to_seq |> from_seq