package ldp

  1. Overview
  2. Docs

Source file ct.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
(*********************************************************************************)
(*                OCaml-LDP                                                      *)
(*                                                                               *)
(*    Copyright (C) 2016-2024 Institut National de Recherche en Informatique     *)
(*    et en Automatique. All rights reserved.                                    *)
(*                                                                               *)
(*    This program is free software; you can redistribute it and/or modify       *)
(*    it under the terms of the GNU Lesser General Public License version        *)
(*    3 as published by the Free Software Foundation.                            *)
(*                                                                               *)
(*    This program is distributed in the hope that it will be useful,            *)
(*    but WITHOUT ANY WARRANTY; without even the implied warranty of             *)
(*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *)
(*    GNU General Public License for more details.                               *)
(*                                                                               *)
(*    You should have received a copy of the GNU General Public License          *)
(*    along with this program; if not, write to the Free Software                *)
(*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA                   *)
(*    02111-1307  USA                                                            *)
(*                                                                               *)
(*    Contact: Maxence.Guesdon@inria.fr                                          *)
(*                                                                               *)
(*********************************************************************************)

type token = string

type ty =
  | Application
  | Audio
  | Ietf of token
  | Image
  | Message
  | Multipart
  | Text
  | Video
  | X of token

type subty = token (** subtypes are stored in lowercase ascii *)
type value = T of token | Quoted of string
type parameter = token * value (** name are stored in lowercase ascii *)

type t = { ty : ty ; subty : subty ; parameters : parameter list }
type mime = ty * subty

let to_mime t = (t.ty, t.subty)
let of_mime (ty, subty) = { ty ; subty ; parameters = [] }

let create ?(parameters=[]) ty subty = { ty ; subty ; parameters }
let default = { ty = Text ; subty = "plain"; parameters = [ "charset", T "us-ascii" ] }

(* Parsing using grammar from
  https://www.w3.org/Protocols/rfc1341/4_Content-Type.html *)

type error = string * Lexing.position

let string_of_pos pos =
  let open Lexing in
  Printf.sprintf "%sline %d, character %d"
    (match pos.pos_fname with
     | "" -> ""
     | _ -> Printf.sprintf "File %S, " pos.pos_fname)
    pos.pos_lnum
    pos.pos_cnum

let string_of_error (str, pos) =
  Printf.sprintf "%s: Invalid character in %s" (string_of_pos pos) str

let nl_char = Uchar.of_char '\n'

exception Lex_error of Lexing.position

let update_pos pos str =
  let open Lexing in
  let f pos i = function
  | `Malformed _ -> raise (Lex_error pos)
  | `Uchar c when Uchar.equal c nl_char ->
      let bol = pos.pos_cnum in
      { pos with
        pos_lnum = pos.pos_lnum + 1;
        pos_bol = bol ;
        pos_cnum = pos.pos_cnum + 1 ;
      }
  | _ -> { pos with pos_cnum = pos.pos_cnum + 1}
  in
  Uutf.String.fold_utf_8 f pos str

let lexeme pos lexbuf =
  try Sedlexing.Utf8.lexeme lexbuf
  with Sedlexing.MalFormed -> raise (Lex_error pos)

let upd pos lexbuf = update_pos pos (lexeme pos lexbuf)

let tspecials = [%sedlex.regexp?
   '(' | ')' | '<' | '>' | '@'  |  ',' | ';' | ':' | '\\'
  | '"' | '/' | '[' | ']' | '?' | '.' |  '=' ]

let ctls = [%sedlex.regexp? 0x00 .. 0x1F | 0x7F] (* controls *)
let htab = [%sedlex.regexp? 0x09] (* horizontal tab *)
let sp = [%sedlex.regexp? ' '] (* space, \x20 *)
let space = [%sedlex.regexp? sp | htab] (* white space *)
let token = [%sedlex.regexp? Plus(Compl(ctls|space|tspecials))]

let ascii_char = [%sedlex.regexp? 0x00 .. 0xFF]

let rec quoted_string b pos lb =
  match%sedlex lb with
  | '"' -> (upd pos lb, Buffer.contents b)
  | '\\', ascii_char ->
      let str = lexeme pos lb in
      let len = String.length str in
      let pos = upd pos lb in
      Buffer.add_substring b str 1 (len - 1) ;
      quoted_string b pos lb
  | '\n' -> raise (Lex_error pos)
  | ascii_char ->
      let str = lexeme pos lb in
      let pos = upd pos lb in
      Buffer.add_string b str ;
      quoted_string b pos lb
  | eof -> raise (Lex_error pos)
  | _ -> raise (Lex_error pos)

let rec lex_params acc pos lb =
  match%sedlex lb with
  | Star(space),';',Star(space) ->
     begin
       let pos = upd pos lb in
       match%sedlex lb with
       | token,'=' ->
           begin
             let str = lexeme pos lb in
              let len = String.length str in
              let pos = upd pos lb in
             let name = String.lowercase_ascii (String.sub str 0 (len - 1)) in
             match%sedlex lb with
             | token ->
                 let v = lexeme pos lb in
                 let pos = upd pos lb in
                 lex_params ((name,T v)::acc) pos lb
             | '"' ->
                 let (pos, v) = quoted_string (Buffer.create 256) (upd pos lb) lb in
                 lex_params ((name,Quoted v)::acc) pos lb
             | _ -> raise (Lex_error pos)
           end
       | _ -> raise (Lex_error pos)
     end
  | Star(space),eof -> List.rev acc
  | _ -> raise (Lex_error pos)


let params pos ty subty lb =
 let parameters = lex_params [] pos lb in
 { ty ; subty ; parameters }

let subty pos ty lb =
  match%sedlex lb with
  | '/', token ->
      let str = String.lowercase_ascii (lexeme pos lb) in
      let len = String.length str in
      let pos = upd pos lb in
      let t = String.sub str 1 (len - 1) in
      params pos ty t lb
  | _ ->
      (*prerr_endline (Printf.sprintf "lexeme=%S" (lexeme pos lb));*)
      raise (Lex_error pos)

let rec ty pos lb =
  match%sedlex lb with
  | ('a'|'A'), ('p'|'P'), ('p'|'P'), ('l'|'L'), ('i'|'I'), ('c'|'C'), ('a'|'A'),
      ('t'|'T'), ('i'|'I'), ('o'|'O'), ('n'|'N')  -> subty (upd pos lb) Application lb

  | ('a'|'A'), ('u'|'U'), ('d'|'D'), ('i'|'I'), ('o'|'O') -> subty (upd pos lb) Audio lb
  | ('i'|'I'), ('m'|'M'), ('a'|'A'), ('g'|'G'), ('e'|'E') -> subty (upd pos lb) Image lb

  | ('m'|'M'), ('e'|'E'), ('s'|'S'), ('s'|'S'), ('a'|'A'),
      ('g'|'G'), ('e'|'E') -> subty (upd pos lb) Message lb

  | ('m'|'M'), ('u'|'U'), ('l'|'L'), ('t'|'T'), ('i'|'I'), ('p'|'P'),
      ('a'|'A'), ('r'|'R'), ('t'|'T') -> subty (upd pos lb) Multipart lb

  | ('t'|'T'), ('e'|'E'), ('x'|'X'), ('t'|'T') -> subty (upd pos lb) Text lb
  | ('v'|'V'), ('i'|'I'), ('d'|'D'), ('e'|'E'), ('o'|'O') -> subty (upd pos lb) Video lb
  | "X-", token ->
      let str = lexeme pos lb in
      let len = String.length str in
      let pos = upd pos lb in
      let t = String.sub str 2 (len - 2) in
      subty pos (X t) lb
  | token ->
      let str = lexeme pos lb in
      let pos = upd pos lb in
      subty pos (Ietf str) lb
  | Plus(space) ->
      ty (upd pos lb) lb
  | _ -> raise (Lex_error pos)

let of_string str =
  let pos = Lexing.{ pos_fname = ""; pos_lnum = 1; pos_bol = 1; pos_cnum = 1; } in
  let lexbuf = Sedlexing.Utf8.from_string str in
  try Result.ok (ty pos lexbuf)
  with Lex_error pos -> Result.error (str, pos)

let string_of_ty = function
| Application -> "application"
| Audio -> "audio"
| Ietf s -> s
| Image -> "image"
| Message -> "message"
| Multipart -> "multipart"
| Text -> "text"
| Video -> "video"
| X s -> Printf.sprintf "X-%s" s

let quoted_char = function
  | '\r' | '\n' | '"' | '\\' -> true
  | _ -> false

let to_string =
  let print_param b (name,value) =
    Buffer.add_string b "; ";
    Buffer.add_string b name ;
    Buffer.add_string b "=" ;
    match value with
    | T s -> Buffer.add_string b s
    | Quoted s ->
        Buffer.add_char b '"';
        String.iter
          (fun c ->
             if quoted_char c then
               Buffer.add_char b '\\';
             Buffer.add_char b c;
          )
          s;
        Buffer.add_char b '"';
  in
  fun t ->
  let b = Buffer.create 256 in
  let ty = string_of_ty t.ty in
  Printf.bprintf b "%s/%s" ty t.subty ;
  List.iter (print_param b) t.parameters ;
  Buffer.contents b

let mime_to_string (ty, subty) =
  let ty = string_of_ty ty in
  Printf.sprintf "%s/%s" ty subty

let mime_of_string s =
  match of_string s with
  | Ok t -> Result.ok (to_mime t)
  | Error x -> Result.error x

let value param =
  let param = String.lowercase_ascii param in
  fun t ->
    match List.assoc param t.parameters with
    | exception Not_found -> None
    | (T s | Quoted s) -> Some s

let value_def v param t =
  match value param t with
  | None -> v
  | Some v -> v

let charset = value "charset"
let charset_def v = value_def v "charset"

let has_mime ct (ty, subty) = ct.ty = ty && ct.subty = subty

(*
let () =
  match of_string "apPlIcation/PostScript; Charset=iso-8859-1; toto=\"xx\\\"hello world!\"; foo=bar" with
  | Result.Ok ct ->
      begin
        prerr_endline (to_string ct);
        match charset ct with
        | None -> assert false
        | Some s -> prerr_endline (Printf.sprintf "charset=%s" s)
      end
  | Error e -> prerr_endline (string_of_error e)
*)

let mk_ct s =
  match of_string s with
  | Ok t -> t
  | Error _ -> assert false
let mk_mime s =
  match mime_of_string s with
  | Ok t -> t
  | Error _ -> assert false
let mime_turtle = mk_mime "text/turtle"
let ct_turtle = mk_ct "text/turtle"
let mime_xmlrdf = mk_mime "application/rdf+xml"
let ct_xmlrdf = mk_ct "application/rdf+xml"
let mime_sparql_update = mk_mime "application/sparql-update"
let ct_sparql_update = mk_ct "application/sparql-update"
let mime_text = mk_mime "text/plain"
let ct_text = mk_ct "text/plain"
let mime_xhtml = mk_mime "application/xhtml+xml"
let ct_xhtml = mk_ct "application/xhtml+xml"


OCaml

Innovation. Community. Security.