package rdf

  1. Overview
  2. Docs
OCaml library to manipulate RDF graphs; implements SPARQL

Install

Dune Dependency

Authors

Maintainers

Sources

ocaml-rdf-1.1.0.tar.bz2
md5=fdca8ab06da34d9d76fe273f654ec6a1
sha512=fff3ad6cb5978e43ac3c509cc25a01d16be6e21b04df607e0595ec0e7226ba7b6e4e2ec86bbeae4aa3d6a181fa399c7c00a4b1c788ddc98486f5c8badf8867f7

doc/src/rdf/xml.ml.html

Source file xml.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
(*********************************************************************************)
(*                OCaml-RDF                                                      *)
(*                                                                               *)
(*    Copyright (C) 2012-2024 Institut National de Recherche en Informatique     *)
(*    et en Automatique. All rights reserved.                                    *)
(*                                                                               *)
(*    This program is free software; you can redistribute it and/or modify       *)
(*    it under the terms of the GNU Lesser General Public License version        *)
(*    3 as published by the Free Software Foundation.                            *)
(*                                                                               *)
(*    This program is distributed in the hope that it will be useful,            *)
(*    but WITHOUT ANY WARRANTY; without even the implied warranty of             *)
(*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *)
(*    GNU General Public License for more details.                               *)
(*                                                                               *)
(*    You should have received a copy of the GNU General Public License          *)
(*    along with this program; if not, write to the Free Software                *)
(*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA                   *)
(*    02111-1307  USA                                                            *)
(*                                                                               *)
(*    Contact: Maxence.Guesdon@inria.fr                                          *)
(*                                                                               *)
(*********************************************************************************)

(** *)

open Graph;;
open Term;;

(** {2 Using trees for XML docs}
  Code taken from Xmlm examples.
  Thanks to Xmlm, namespaces are already handled by the parser :-)
*)

type tree = E of Xmlm.tag * tree list | D of string

let in_tree i =
  let el tag childs = E (tag, childs)  in
  let data d = D d in
  try
    Xmlm.input_doc_tree ~el ~data i
  with
    Xmlm.Error ((line, col), error) ->
      let msg = Printf.sprintf "Line %d, column %d: %s"
        line col (Xmlm.error_message error)
      in
      failwith msg

let out_tree o t =
  let frag = function
  | E (tag, childs) -> `El (tag, childs)
  | D d -> `Data d
  in
  Xmlm.output_doc_tree frag o t

let apply_namespaces = Dot.apply_namespaces;;

let output_doc_tree ns ?(decl=true) dest tree =
  let map (pref, s) =
    match pref with
      "" -> apply_namespaces ns s
    | _ -> (pref, s)
  in
  let tree =
    match tree with
      D _ -> tree
    | E ((tag,atts),subs) ->
        let atts = List.fold_left
          (fun acc (((pref,s),v) as att) ->
             if pref = Xmlm.ns_xmlns then acc else att :: acc
          )
          []
          atts
        in
        let ns_atts = List.map (fun (pref,iri) -> ((Xmlm.ns_xmlns, pref), iri)) ns in
        E ((tag, ns_atts @ atts), subs)
  in
  let ns_prefix s = Some s in
  let output = Xmlm.make_output ~ns_prefix ~decl dest in
  let frag = function
  | D d -> `Data d
  | E (((pref,s),atts), childs) ->
      let (pref, s) = map (pref, s) in
      let atts = List.map
        (fun ((pref,s),v) -> (map (pref, s), v)) atts
      in
      `El (((pref,s),atts), childs)
  in
  Xmlm.output_doc_tree frag output (None, tree);
;;

let string_of_xmls namespaces trees =
  try
    let b = Buffer.create 256 in
    List.iter (output_doc_tree namespaces ~decl: false (`Buffer b)) trees;
    Buffer.contents b
  with
    Xmlm.Error ((line, col), error) ->
      let msg = Printf.sprintf "Line %d, column %d: %s"
        line col (Xmlm.error_message error)
      in
      failwith msg
;;

let xml_of_string str =
  try
    let i = Xmlm.make_input ~strip: true (`String (0, str)) in
    let (_,tree) = in_tree i in
    (*prerr_endline "parse ok";*)
    tree
  with
    Xmlm.Error ((line, col), error) ->
      let msg = Printf.sprintf "Line %d, column %d: %s\n%s"
        line col (Xmlm.error_message error) str
      in
      failwith msg
;;

let xmls_of_string str =
  (*prerr_endline "xmls_of_string";*)
  let str = "<foo__>"^str^"</foo__>" in
  match xml_of_string str with
    E ((("","foo__"),_),subs) -> subs
  | _ -> assert false
;;

let get_first_child xml tag =
  match xml with
    D _ -> None
  | E ((_,_),subs) ->
      try Some (List.find (function E ((t,_),_) -> t = tag | _ -> false) subs)
      with Not_found -> None
;;

let is_element iri (pref,loc) =
  let iri2 = Iri.of_string (pref^loc) in
  let b = Iri.compare iri iri2 = 0 in
  (*prerr_endline (Printf.sprintf "is_element %s %s: %b"
    (Iri.to_string iri) (Iri.to_string iri2) b);*)
   b
;;


(** {2 Input} *)

module SMap = Types.SMap;;
module Irimap = Iri.Map

type state =
  { subject : Term.term option ;
    predicate : Iri.t option ;
    xml_base : Iri.t ;
    xml_lang : string option ;
    datatype : Iri.t option ;
    namespaces : string Irimap.t ;
  }

type global_state =
  {
    blanks : Term.blank_id SMap.t ;
    gnamespaces : string Irimap.t ;
  }

exception Invalid_rdf of string
let error s = raise (Invalid_rdf s);;

let () = Printexc.register_printer
  (function
   | Invalid_rdf str ->
       Some (Printf.sprintf "Invalid RDF: %s" str)
   | _ -> None)


let get_att att l = try Some (List.assoc att l) with Not_found -> None;;
let get_att_iri =
  let rec iter pred = function
    [] -> None
  | (x,v) :: q ->
    if pred x then Some v else iter pred q
  in
  fun iri l -> iter (is_element iri) l
;;

(*
let abs_iri state iri =
  prerr_endline (Printf.sprintf "resolve base=%s  iri=%s"
    (Iri.to_string state.xml_base) (Iri.ref_to_string iri));

  let iri = Iri.resolve ~base: state.xml_base iri in
  prerr_endline (Printf.sprintf "=> %s" (Iri.to_string iri));
  iri
*)

let abs_iri state iri =
  (*prerr_endline (Printf.sprintf "resolve base=%s  iri=%s"
   (Iri.to_string state.xml_base) (Iri.to_string iri));*)
  let iri =
     match Iri.is_relative iri with
       false -> iri
     | true ->
       Iri.resolve ~base: state.xml_base iri
   in
   (*prerr_endline (Printf.sprintf "=> %s" (Iri.to_string iri));*)
   iri

let set_xml_base state = function
  D _ -> state
| E ((_,atts),_) ->
    match get_att (Xmlm.ns_xml, "base") atts with
      None -> state
    | Some s ->
        let r = Iri.of_string s in
        let xml_base = abs_iri state r in
        { state with xml_base }
;;
let set_xml_lang state = function
  D _ -> state
| E ((_,atts),_) ->
    match get_att (Xmlm.ns_xml, "lang") atts with
      None -> state
    | Some s ->
        (*prerr_endline ("setting lang to "^s);*)
        { state with xml_lang = Some s }
;;
let set_namespaces gstate state = function
  D _ -> (gstate, state)
| E ((_,atts),_) ->
    let f (gstate, state) ((pref,s),v) =
      if pref = Xmlm.ns_xmlns then
        begin
          let iri = Iri.of_string v in
          let gstate = { gstate with gnamespaces = Irimap.add iri s gstate.gnamespaces } in
          let state = { state with namespaces = Irimap.add iri s state.namespaces } in
          (gstate, state)
        end
      else
        (gstate, state)
    in
    List.fold_left f (gstate, state) atts
;;

let update_state gstate state t =
  set_namespaces gstate (set_xml_lang (set_xml_base state t) t) t;;

let get_blank_node g gstate id =
  try (Blank_ (SMap.find id gstate.blanks), gstate)
  with Not_found ->
    (*prerr_endline (Printf.sprintf "blank_id for %s not found, forging one" id);*)
    let bid = g.new_blank_id () in
    let gstate = { gstate with blanks = SMap.add id bid gstate.blanks } in
    (Blank_ bid, gstate)

let rec input_node g state gstate t =
  let (gstate, state) = update_state gstate state t in
  match t with
    D s when state.predicate = None ->
      let msg = Printf.sprintf "Found (Data %S) with no current predicate." s in
      error msg
  | D s ->
      let obj = Term.term_of_literal_string ?typ: state.datatype ?lang: state.xml_lang s in
      let sub = match state.subject with None -> assert false | Some s -> s in
      let pred = match state.predicate with None -> assert false | Some u -> u in
      g.add_triple ~sub ~pred ~obj;
      gstate
  | E (((pref,s), atts), children) ->
      let (node, gstate) =
        match get_att_iri Rdf_.about atts with
          Some s -> (Iri (abs_iri state (Iri.of_string s)), gstate)
        | None ->
            match get_att_iri Rdf_.id atts with
              Some id -> (Iri (Iri.of_string ((Iri.to_string state.xml_base)^"#"^id)), gstate)
            | None ->
                match get_att_iri Rdf_.nodeID atts with
                  Some id -> get_blank_node g gstate id
                | None -> (Blank_ (g.new_blank_id ()), gstate)
      in
      begin
        match state.subject, state.predicate with
          Some sub, Some pred ->
            g.add_triple ~sub ~pred ~obj: node
        | _ -> ()
      end;
      let state = { state with subject = Some node ; predicate = None } in
      (* add a type arc if the node is not introduced with rdf:Description *)
      if not (is_element Rdf_.description (pref,s)) then
        begin
          let type_iri = Iri.of_string (pref^s) in
          g.add_triple ~sub: node ~pred: Rdf_.type_ ~obj: (Iri type_iri)
        end;
      (* all remaining attributes define triples with literal object values *)
      let f ((pref, s), v) =
        if pref <> Xmlm.ns_xml && pref <> Xmlm.ns_xmlns then
          begin
            let iri_prop = Iri.of_string (pref^s) in
            if not (List.exists (Iri.equal iri_prop) [ Rdf_.about ; Rdf_.id; Rdf_.nodeID ]) then
              begin
                let obj = Term.term_of_literal_string ?lang: state.xml_lang v in
                g.add_triple ~sub: node ~pred: iri_prop ~obj
              end
          end
      in
      List.iter f atts;
      let (gstate, _) = List.fold_left (input_prop g state) (gstate, 1) children in
      gstate

(* FIXME: handle rdf:ID *)
and input_prop g state (gstate, li) t =
  let (gstate, state) = update_state gstate state t in
  match t with
    D s ->
      let msg = Printf.sprintf "Found (Data %S) but expected a property node." s in
      error msg
  | E (((pref,s),atts),children) ->
      let sub = match state.subject with None -> assert false | Some sub -> sub in
      let prop_iri = Iri.of_string (pref^s) in
      let (prop_iri, li) =
        if Iri.equal prop_iri Rdf_.li then
          (Rdf_.n li, li + 1)
        else
          (prop_iri, li)
      in
      match get_att_iri Rdf_.resource atts with
        Some s ->
          let iri = Iri.of_string s in
          let obj = Iri (abs_iri state iri) in
          g.add_triple ~sub ~pred: prop_iri ~obj ;
          (gstate, li)
      | None ->
          match get_att_iri Rdf_.nodeID atts with
            Some id ->
              let (obj, gstate) = get_blank_node g gstate id in
              g.add_triple ~sub ~pred: prop_iri ~obj ;
              (gstate, li)
          | None ->
          match get_att_iri Rdf_.parseType atts with
            Some "Literal" ->
              let xml = string_of_xmls
                (Irimap.fold (fun iri s acc -> (s, Iri.to_string iri) :: acc) state.namespaces [])
                children
              in
              let obj = Term.term_of_literal_string ~typ: Rdf_.dt_XMLLiteral xml in
              g.add_triple ~sub ~pred: prop_iri ~obj;
              (gstate, li)
          | Some "Resource" ->
              begin
                 let node = Blank_ (g.new_blank_id ()) in
                 g.add_triple ~sub ~pred: prop_iri ~obj: node ;
                 let state = { state with subject = Some node ; predicate = None } in
                 List.fold_left (input_prop g state) (gstate, 1) children
              end
          | Some "Collection" ->
              begin
                let rec f (gstate, previous) = function
                  [] -> assert false
                | first :: rest ->
                   let state = { state with
                     subject = Some previous ;
                     predicate = Some Rdf_.first }
                   in
                   let gstate = input_node g state gstate first in
                   match rest with
                     [] -> g.add_triple ~sub: previous
                        ~pred: Rdf_.rest ~obj: (Iri Rdf_.nil);
                        (gstate, previous)
                   | _ ->
                      let blank = Term.Blank_ (g.new_blank_id ()) in
                      g.add_triple ~sub: previous ~pred: Rdf_.rest ~obj: blank;
                      f (gstate, blank) rest
                in
                let gstate =
                  match children with
                    [] -> gstate
                  | _ ->
                    let blank = Term.Blank_ (g.new_blank_id ()) in
                    g.add_triple ~sub ~pred: prop_iri ~obj: blank;
                    fst (f (gstate, blank) children)
                in
                (gstate, li)
              end
          | Some s -> error (Printf.sprintf "Unknown parseType %S" s)
          | None ->
              match get_att_iri Rdf_.datatype atts, children with
              | Some s, [D lit] ->
                  let typ = abs_iri state (Iri.of_string s) in
                  let obj = Term.term_of_literal_string ~typ ?lang: state.xml_lang lit in
                  g.add_triple ~sub ~pred: prop_iri ~obj;
                  (gstate, li)
              | Some s, _ ->
                  let msg = Printf.sprintf "Property %S with datatype %S but no data"
                    (Iri.to_string prop_iri) s
                  in
                  error msg
              | None, _ ->
                  (* if we have other attributes than the ones filtered above, they
                    are property relations, with ommited blank nodes *)
                  let pred ((pref,s),v) =
                    pref <> Xmlm.ns_xml && pref <> Xmlm.ns_xmlns &&
                    (let iri = Iri.of_string (pref^s) in not (Iri.equal iri Rdf_.id))
                  in
                  match List.filter pred atts with
                    [] ->
                      let state = { state with predicate = Some prop_iri } in
                      let gstate = List.fold_left (input_node g state) gstate children in
                      (gstate, li)
                  | l ->
                      let node = Blank_ (g.new_blank_id ()) in
                      g.add_triple ~sub ~pred: prop_iri ~obj: node ;
                      let f ((pref,s),lit) =
                        let obj = Term.term_of_literal_string ?lang: state.xml_lang lit in
                        let iri_prop = Iri.of_string (pref^s) in
                        g.add_triple ~sub: node ~pred: iri_prop ~obj
                      in
                      List.iter f l;
                      (gstate, li)
;;

let input_tree g ?(base=g.Graph.name()) t =
  let state = {
      subject = None ; predicate = None ;
      xml_base = base ; xml_lang = None ;
      datatype = None ; namespaces = Irimap.empty ;
    }
  in
  let gstate = { gnamespaces = Irimap.empty ; blanks = SMap.empty } in
  let (gstate, state) = update_state gstate state t in
  let gstate =
    match t with
      D _ -> assert false
    | E ((e,_),children) when is_element Rdf_._RDF e ->
        List.fold_left (input_node g state) gstate children
    | t -> input_node g state gstate t
  in
  (* add namespaces *)
  let add_ns iri prefix = g.add_namespace iri prefix in
  Irimap.iter add_ns gstate.gnamespaces
;;

let from_input g ?base i =
  let (_, tree) = in_tree i in
  input_tree g ?base tree
;;

let from_xml = input_tree;;

let from_string g ?base s =
  let i = Xmlm.make_input ~strip: true (`String (0, s)) in
  from_input g ?base i
;;

let from_file g ?base file =
  let ic = open_in file in
  let i = Xmlm.make_input ~strip: true (`Channel ic) in
  let (_,tree) =
    try let t = in_tree i in close_in ic; t
    with e -> close_in ic; raise e
  in
  input_tree g ?base tree
;;

(** {2 Output} *)

let output ?(compact=true) g =
  let xml_prop pred_iri obj =
    let (atts, children) =
      match obj with
      | Iri iri -> ([("", Iri.to_string Rdf_.resource), Iri.to_string iri], [])
      | Blank_ id -> ([("", Iri.to_string Rdf_.nodeID), Term.string_of_blank_id id], [])
      | Blank -> assert false
      | Literal lit ->
          let (atts, subs) =
            match lit.lit_type with
              None -> ([], [D lit.lit_value])
            | Some iri when Iri.equal iri Rdf_.dt_XMLLiteral ->
                let subs = xmls_of_string lit.lit_value in
                (
                 [("",Iri.to_string Rdf_.parseType), "Literal"],
                 subs
                )
            | Some iri ->
                (
                 [("",Iri.to_string Rdf_.datatype), Iri.to_string iri],
                 [D lit.lit_value]
                )
          in
          let atts = atts @
            (match lit.lit_language with
               None -> []
             | Some lang -> [(Xmlm.ns_xml, "lang"), lang])
          in
          (atts, subs)
    in
    E ((("",Iri.to_string pred_iri),atts),children)
  in
  let subject_atts = function
  | Iri iri -> [("", Iri.to_string Rdf_.about), Iri.to_string iri]
  | Blank_ id -> [("", Iri.to_string Rdf_.nodeID), Term.string_of_blank_id id]
  | Blank -> assert false
  | Literal _ -> assert false
  in
  let fold_props map =
    let f iri set acc =
      let fo obj acc =
        let n = xml_prop iri obj in
        n :: acc
      in
      Term.TSet.fold fo set acc
    in
    Iri.Map.fold f map []
  in
  let xmls =
    match g.folder () with
    | Some map when compact ->
        let f sub map acc =
          let xml_props = fold_props map in
          let atts = subject_atts sub in
          let node = E ((("",Iri.to_string Rdf_.description), atts), xml_props) in
          node :: acc
        in
        Term.TMap.fold f map []
    | _ ->
        let f_triple acc (sub, pred, obj) =
          let atts = subject_atts sub in
          let xml_prop = xml_prop pred obj in
          let node = E ((("",Iri.to_string Rdf_.description), atts), [xml_prop]) in
          node :: acc
        in
        List.fold_left f_triple [] (g.find ())
  in
  E ((("", Iri.to_string Rdf_._RDF),[]), xmls)


let to_ ?compact ?namespaces g dest =
  let namespaces = Dot.build_namespaces ?namespaces g in
  try
    let tree = output ?compact g in
    output_doc_tree namespaces ~decl: true dest tree
  with
    Xmlm.Error ((line, col), error) ->
      let msg = Printf.sprintf "Line %d, column %d: %s"
        line col (Xmlm.error_message error)
      in
      failwith msg
;;

let to_string ?compact ?namespaces g =
  let buf = Buffer.create 256 in
  let dest = `Buffer buf in
  to_ ?compact ?namespaces g dest;
  Buffer.contents buf
;;

let to_file ?compact ?namespaces g file =
  let oc = open_out file in
  try
    to_ ?compact ?namespaces g (`Channel oc);
    close_out oc
  with e ->
      close_out oc ; raise e
;;


OCaml

Innovation. Community. Security.