package caisar

  1. Overview
  2. Docs

Source file input.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
(**************************************************************************)
(*                                                                        *)
(*  This file is part of CAISAR.                                          *)
(*                                                                        *)
(*  Copyright (C) 2025                                                    *)
(*    CEA (Commissariat à l'énergie atomique et aux énergies              *)
(*         alternatives)                                                  *)
(*                                                                        *)
(*  You can redistribute it and/or modify it under the terms of the GNU   *)
(*  Lesser General Public License as published by the Free Software       *)
(*  Foundation, version 2.1.                                              *)
(*                                                                        *)
(*  It is distributed in the hope that it will be useful,                 *)
(*  but WITHOUT ANY WARRANTY; without even the implied warranty of        *)
(*  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the          *)
(*  GNU Lesser General Public License for more details.                   *)
(*                                                                        *)
(*  See the GNU Lesser General Public License version 2.1                 *)
(*  for more details (enclosed in the file licenses/LGPLv2.1).            *)
(*                                                                        *)
(**************************************************************************)

type t = Float.Array.t

let get t n =
  let x = Float.Array.get t n in
  if Float.is_infinite x then None else Some x

let of_feature_values (t : Parser.t) features : t =
  let n = int_of_string t.learner.learner_model_param.num_feature in
  let a = Float.Array.make n Float.infinity in
  let h = Hashtbl.create 10 in
  Array.iteri (fun i s -> Hashtbl.add h s i) t.learner.feature_names;
  List.iter
    (fun (s, f) ->
      let i =
        match Hashtbl.find_opt h s with
        | None -> invalid_arg (Printf.sprintf "Feature %s not found" s)
        | Some i -> i
      in
      Float.Array.set a i f)
    features;
  a

let of_csv_file t filename =
  let cin = open_in filename in
  Fun.protect
    ~finally:(fun () -> close_in cin)
    (fun () ->
      let cin = Csv.of_channel ~has_header:true cin in
      let acc =
        Csv.Rows.fold_left
          ~f:(fun acc row ->
            let row = Csv.Row.to_assoc row in
            let row = List.tl row in
            let row = List.map (fun (s, f) -> (s, Float.of_string f)) row in
            of_feature_values t row :: acc)
          ~init:[] cin
      in
      List.rev acc)

let of_csv_file_no_header filename =
  let cin = open_in filename in
  Fun.protect
    ~finally:(fun () -> close_in cin)
    (fun () ->
      let cin = Csv.of_channel ~has_header:false cin in
      let acc =
        Csv.Rows.fold_left
          ~f:(fun acc row ->
            let row = Csv.Row.to_list row in
            let row = List.map Float.of_string row in
            Float.Array.of_list row :: acc)
          ~init:[] cin
      in
      List.rev acc)

let of_svm_file (t : Parser.t) filename =
  let cin = open_in filename in
  let n = int_of_string t.learner.learner_model_param.num_feature in
  let rec aux acc =
    match input_line cin with
    | exception End_of_file -> acc
    | s -> (
      match String.split_on_char ' ' s with
      | _output :: features ->
        let a = Float.Array.make n Float.infinity in
        List.iter
          (fun feature ->
            match String.split_on_char ':' feature with
            | [ id; v ] ->
              (* Indices seems to start at 1 but corresponds to feature 0*)
              let id = int_of_string id - 1 in
              Float.Array.set a id (Float.of_string v)
            | _ ->
              invalid_arg
                (Printf.sprintf "invalid svm in file %s: %s" filename feature))
          features;
        aux (a :: acc)
      | [] -> invalid_arg ("invalid svm file " ^ filename))
  in
  List.rev (aux [])

let of_filename (t : Parser.t) filename =
  match Filename.extension filename with
  | ".csv" ->
    if Array.length t.learner.feature_names = 0
    then of_csv_file_no_header filename
    else of_csv_file t filename
  | ".svm" -> of_svm_file t filename
  | s ->
    invalid_arg
      (Printf.sprintf "Unknown extension %s known are  .csv and .svm" s)
OCaml

Innovation. Community. Security.