package yocaml

  1. Overview
  2. Docs

Source file sexp.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
(* YOCaml a static blog generator.
   Copyright (C) 2024 The Funkyworkers and The YOCaml's developers

   This program is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   This program is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program.  If not, see <https://www.gnu.org/licenses/>. *)

type t = Atom of string | Node of t list

type parsing_error =
  | Nonterminated_node of int
  | Nonterminated_atom of int
  | Expected_number_or_colon of char * int
  | Expected_number of char * int
  | Unexepected_character of char * int
  | Premature_end_of_atom of int * int

type invalid = Invalid_sexp of t * string

let atom x = Atom x
let node x = Node x

let rec equal a b =
  match (a, b) with
  | Atom a, Atom b -> String.equal a b
  | Node a, Node b -> List.equal equal a b
  | _ -> false

let rec pp ppf = function
  | Atom x -> Format.fprintf ppf {|Atom "%s"|} x
  | Node x -> Format.fprintf ppf {|Node [@[%a@]]|} (Format.pp_print_list pp) x

let rec pp_pretty ppf = function
  | Atom x -> Format.fprintf ppf "%s" x
  | Node x -> Format.fprintf ppf "@[<hov 1>(%a)@]" pp_pretty_list x

and pp_pretty_list ppf = function
  | x :: (_ :: _ as xs) ->
      let () = Format.fprintf ppf "%a@ " pp_pretty x in
      pp_pretty_list ppf xs
  | x :: xs ->
      let () = Format.fprintf ppf "%a" pp_pretty x in
      pp_pretty_list ppf xs
  | [] -> ()

let to_string = Format.asprintf "%a" pp_pretty
let char_to_int c = int_of_char c - int_of_char '0'

module Canonical = struct
  let length sexp =
    let rec aux acc = function
      | Node x -> 2 + List.fold_left aux acc x
      | Atom x ->
          let len = String.length x in
          let ilen = String.length (Int.to_string len) in
          acc + ilen + 1 + len
    in
    aux 0 sexp

  let to_buffer buf sexp =
    let rec aux = function
      | Atom x ->
          let len = String.length x |> Int.to_string in
          let () = Buffer.add_string buf len in
          let () = Buffer.add_char buf ':' in
          Buffer.add_string buf x
      | Node x ->
          let () = Buffer.add_char buf '(' in
          let () = List.iter aux x in
          Buffer.add_char buf ')'
    in
    aux sexp

  let to_string sexp =
    let len = length sexp in
    let buf = Buffer.create len in
    let () = to_buffer buf sexp in
    Buffer.contents buf

  let collect_string len seq =
    let buf = Buffer.create len in
    let rec aux i seq =
      if i = len then Ok (atom @@ Buffer.contents buf, seq)
      else
        match Seq.uncons seq with
        | None -> Error (Premature_end_of_atom (len, i))
        | Some (c, xs) ->
            let () = Buffer.add_char buf c in
            aux (i + 1) xs
    in
    aux 0 seq

  let parse_atom lex_pos seq =
    let rec aux lex_pos acc seq =
      match (Seq.uncons seq, acc) with
      | None, _ -> Error (Nonterminated_atom lex_pos)
      | Some (':', xs), Some x ->
          Result.map (fun (a, xs) -> (a, lex_pos + x, xs)) (collect_string x xs)
      | Some (('0' .. '9' as c), xs), acc ->
          let acc = (Option.value ~default:0 acc * 10) + char_to_int c in
          aux (lex_pos + 1) (Some acc) xs
      | Some (c, _), Some _ -> Error (Expected_number_or_colon (c, lex_pos))
      | Some (c, _), None -> Error (Expected_number (c, lex_pos))
    in
    aux lex_pos None seq

  let from_seq seq =
    let rec aux level lex_pos acc seq =
      match Seq.uncons seq with
      | None ->
          if level = 0 then Ok (List.rev acc, lex_pos, Seq.empty)
          else Error (Nonterminated_node lex_pos)
      | Some (('0' .. '9' as c), xs) ->
          Result.bind
            (parse_atom lex_pos (Seq.cons c xs))
            (fun (a, lex_pos, xs) -> aux level (lex_pos + 1) (a :: acc) xs)
      | Some (')', xs) -> Ok (List.rev acc, lex_pos + 1, xs)
      | Some ('(', xs) ->
          Result.bind
            (aux (level + 1) lex_pos [] xs)
            (fun (n, lex_pos, xs) -> aux level (lex_pos + 1) (node n :: acc) xs)
      | Some (c, _) -> Error (Unexepected_character (c, lex_pos))
    in
    Result.map
      (fun (r, _, _) -> match r with [ e ] -> e | _ -> node r)
      (aux 0 0 [] seq)

  let from_string str = str |> String.to_seq |> from_seq
end

let from_seq seq =
  let parse_atom lex_pos seq =
    let buf = Buffer.create 1 in
    let rec aux escaped lex_pos seq =
      match Seq.uncons seq with
      | None ->
          (buf |> Buffer.to_bytes |> Bytes.to_string, lex_pos + 1, Seq.empty)
      | Some ('\\', xs) -> aux true (lex_pos + 1) xs
      | Some (((' ' | '\t' | '\n' | ')' | '(') as c), xs) when not escaped ->
          (buf |> Buffer.to_bytes |> Bytes.to_string, lex_pos, Seq.cons c xs)
      | Some (c, xs) ->
          let () = Buffer.add_char buf c in
          aux false (lex_pos + 1) xs
    in
    aux false lex_pos seq
  in

  let rec aux level lex_pos acc seq =
    match Seq.uncons seq with
    | None ->
        if level = 0 then Ok (List.rev acc, lex_pos, Seq.empty)
        else Error (Nonterminated_node lex_pos)
    | Some (('\t' | ' ' | '\n'), xs) -> aux level (lex_pos + 1) acc xs
    | Some (')', xs) -> Ok (List.rev acc, lex_pos + 1, xs)
    | Some ('(', xs) ->
        Result.bind
          (aux (level + 1) lex_pos [] xs)
          (fun (n, lex_pos, xs) -> aux level (lex_pos + 1) (node n :: acc) xs)
    | Some (c, xs) ->
        let atm, lex_pos, xs = parse_atom lex_pos (Seq.cons c xs) in
        aux level lex_pos (atom atm :: acc) xs
  in
  Result.map
    (fun (r, _, _) -> match r with [ e ] -> e | _ -> node r)
    (aux 0 0 [] seq)

let from_string str = str |> String.to_seq |> from_seq
OCaml

Innovation. Community. Security.