package ocaml-r

  1. Overview
  2. Docs

Source file OCamlR_base.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
open OCamlR

module Stubs2 = OCamlR_base_stubs2

let subset_symbol = symbol "["
let subset2_symbol = symbol ~generic:true "[["
let missing_arg = (Symsxp.missing_arg () :> sexp)

let gen_raw_subset2 label_dec x label =
  call subset2_symbol [
      arg Enc.sexp x ;
      arg label_dec label ;
  ]

let raw_subset2 = gen_raw_subset2 Enc.string
let raw_subset2_i = gen_raw_subset2 Enc.int

let inherits_symbol = symbol "inherits"
let inherits x s =
  call inherits_symbol [
    arg (fun x -> x) x ;
    arg Enc.string s
  ]
  |> Dec.bool

module Environment = struct
  include Envsxp

  let new_env_symbol = symbol "new.env"
  let create () =
    call new_env_symbol []
    |> unsafe_of_sexp

  let get env ~class_ x =
    let y = raw_subset2 (to_sexp env) x in
    let cls = Sexp._class_ y in
    if List.mem class_ cls then Some y
    else None
end

module type Matrix = sig
  include Atomic_vector
  type vector
  val dim : t -> int * int
  val as_vector : t -> vector
  val of_arrays : repr array array -> t
  val get2 : t -> int -> int -> repr
  val get_row : t -> int -> vector
  val get_col : t -> int -> vector
end

module Make_matrix(V : Atomic_vector) = struct
  include V

  let dim (x : t) =
    match Stubs2.dim (to_sexp x) |> Dec.ints with
    | [| i ; j |] -> (i, j)
    | _ -> assert false

  let as_vector x = x

  let matrix_symbol = symbol "matrix"

  let of_arrays m =
    let data =
      Array.to_list m
      |> Array.concat
      |> V.of_array
      |> to_sexp
    in
    call matrix_symbol Enc.[
      arg Fun.id data ;
      arg int ~name:"nrow" (Array.length m) ;
      arg bool ~name:"byrow" true ;
    ]
    |> unsafe_of_sexp

  let get_row m i =
    call subset_symbol [
      arg V.to_sexp m  ;
      arg Enc.int i ;
      arg Enc.sexp missing_arg ;
    ]
    |> V.unsafe_of_sexp

  let get_col m j =
    call subset_symbol [
      arg V.to_sexp m  ;
      arg Enc.sexp missing_arg ;
      arg Enc.int j ;
    ]
    |> V.unsafe_of_sexp
end

module type Vector = sig
  include Atomic_vector
  val c : t list -> t
  module Matrix : Matrix with type repr := repr
                          and type vector := t
end

module Make_vector(V : Atomic_vector) = struct
  include V
  let c_symbol = symbol "c"
  let c xs =
    call c_symbol (List.map (arg to_sexp) xs)
    |> unsafe_of_sexp
  module Matrix = Make_matrix(V)
end

module Numeric = Make_vector(Realsxp)
module Logical = Make_vector(Lglsxp)
module Integer = Make_vector(Intsxp)
module Character = Make_vector(Strsxp)

module Factor = struct
  include Integer

  let factor_fun = symbol "factor"
  let of_integer xs =
    call factor_fun [ arg Integer.to_sexp xs ]
    |> unsafe_of_sexp
  let of_character xs =
    call factor_fun [ arg Character.to_sexp xs ]
    |> unsafe_of_sexp

  let of_array xs = of_integer (of_array xs)
  let of_list xs = of_integer (of_list xs)
  let of_array_opt xs = of_integer (of_array_opt xs)

  let levels x =
    attr x "levels"
    |> Character.unsafe_of_sexp
end

type matrix = [
  | `Numeric   of Numeric.Matrix.t
  | `Logical   of Logical.Matrix.t
  | `Integer   of Integer.Matrix.t
  | `Factor    of Factor.Matrix.t
  | `Character of Character.Matrix.t
]

let classify_atomic_data x =
  match Sexptype.of_sexp x with
  | IntSxp ->
    if inherits x "factor"
    then Some (`Factor (Factor.unsafe_of_sexp x))
    else Some (`Integer (Integer.unsafe_of_sexp x))
  | RealSxp -> Some (`Numeric (Numeric.unsafe_of_sexp x))
  | StrSxp -> Some (`Character (Character.unsafe_of_sexp x))
  | LglSxp -> Some (`Logical (Logical.unsafe_of_sexp x))
  | _ -> None

module List_ = struct
  include Vecsxp

  let as_vecsxp x = x

  let gen_subset2 subset2 x field dec =
    subset2 (to_sexp x) field
    |> Sexp.nil_map ~f:dec

  let subset2 x field dec = gen_subset2 raw_subset2 x field dec
  let subset2_i x field dec = gen_subset2 raw_subset2_i x field dec

  let gen_subset2_exn f label x field dec =
    match f x field dec with
    | None -> failwith label
    | Some y -> y

  let subset2_exn x field dec = gen_subset2_exn subset2 "subset2_exn" x field dec
  let subset2_i_exn x field dec = gen_subset2_exn subset2_i "subset2_i_exn" x field dec

  let list_symbol = symbol "list"

  let create xs =
    List.map (fun (maybe_label, sexp) -> arg Sexp.to_sexp ?name:maybe_label sexp) xs
    |> call list_symbol
    |> unsafe_of_sexp
end

module Dataframe = struct
  include List_
  let as_list x = x

  let dim_symbol = symbol "dim.data.frame"
  let dim x =
    call dim_symbol [
      arg to_sexp ~name:"x" x ;
    ]
    |> Dec.ints
    |> function
    | [| i ; j |] -> (i, j)
    | _ -> assert false

  let of_env (env : Environment.t) x =
    Environment.get env ~class_:"data.frame" x
    |> Option.map unsafe_of_sexp

  type column = [
      `Numeric of Numeric.t
    | `Integer of Integer.t
    | `Logical of Logical.t
    | `Character of Character.t
    | `Factor of Factor.t
  ]

  let rarg_of_column_data name =
    let f g x = arg g ~name x in
    function
    | `Numeric x -> f Numeric.to_sexp x
    | `Logical x -> f Logical.to_sexp x
    | `Character x -> f Character.to_sexp x
    | `Integer x -> f Integer.to_sexp x
    | `Factor x -> f Factor.to_sexp x

  let create cols =
    List.map
      (fun (label, col) -> rarg_of_column_data label col)
      cols
    |> call (symbol "data.frame")
    |> unsafe_of_sexp

  let rbind_symbol = symbol "rbind"

  let rbind x y =
    call rbind_symbol [
      arg to_sexp x ;
      arg to_sexp y
    ]
    |> unsafe_of_sexp

  let cbind_symbol = symbol "cbind"

  let cbind x y =
    call cbind_symbol [
      arg to_sexp x ;
      arg to_sexp y ;
    ]
    |> unsafe_of_sexp

  let get_row m i =
    call subset_symbol [
      arg to_sexp m  ;
      arg Enc.int i ;
      arg Enc.sexp missing_arg ;
    ]
    |> unsafe_of_sexp

  let classify_column x =
    match classify_atomic_data x with
    | Some x -> x
    | None ->
      let msg =
        Printf.sprintf
          "OCamlR_base.Dataframe.classify_column: unsupported %s sexp"
          (Sexptype.to_string (Sexptype.of_sexp x))
    in
    invalid_arg msg

  let get_col m j =
    call subset_symbol [
      arg to_sexp m  ;
      arg Enc.sexp missing_arg ;
      arg Enc.int j ;
    ]
    |> classify_column

  let as_matrix_symbol = symbol "as.matrix.data.frame"

  let as'matrix df =
    call as_matrix_symbol [
      arg to_sexp df ;
    ]
    |> classify_atomic_data
    |> Option.get
end

let sample_symbol = symbol "sample"

let sample ?replace ?prob ~size x =
  call sample_symbol Enc.[
    arg floats x ;
    arg ~name:"size" int size ;
    opt_arg bool "replace" replace ;
    opt_arg floats "prob" prob ;
  ]
  |> Dec.floats

let readRDS_symbol = symbol "readRDS"

let readRDS fn =
  call readRDS_symbol Enc.[
    arg ~name:"file" string fn ;
  ]

let saveRDS_symbol = symbol "saveRDS"

let saveRDS ?ascii ?compress ~file obj =
  call saveRDS_symbol Enc.[
    arg ~name:"object" Fun.id obj ;
    arg ~name:"file" string file ;
    opt_arg bool "ascii" ascii ;
    opt_arg bool "compress" compress ;
  ]
  |> ignore

let table_symbol = symbol "table"

let table (type s) (module Vector : Vector with type t = s) (x : s) =
  call table_symbol [ arg Vector.to_sexp x ]
  |> Integer.unsafe_of_sexp

module Formula = struct
  include Langsxp
end
OCaml

Innovation. Community. Security.