package pythonlib

  1. Overview
  2. Docs

Source file import.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
open Base
open Poly
include Ppx_python_runtime


type pyobject = Pytypes.pyobject

let python_of_pyobject = Fn.id
let pyobject_of_python = Fn.id

module Of_pythonable (Pythonable : sig
    type t [@@deriving python]
  end)
    (Conv : sig
       type pythonable
       type t

       val to_pythonable : t -> pythonable
       val of_pythonable : pythonable -> t
     end
     with type pythonable := Pythonable.t) : sig
  type t [@@deriving python]
end
with type t := Conv.t = struct
  let python_of_t t = Conv.to_pythonable t |> Pythonable.python_of_t
  let t_of_python pyobject = Pythonable.t_of_python pyobject |> Conv.of_pythonable
end

module Convert_as_string (M : Stringable.S) = struct
  let python_of_t t = M.to_string t |> python_of_string
  let t_of_python p = string_of_python p |> M.of_string
end

let get_class p =
  Option.bind (Py.Object.get_attr_string p "__class__") ~f:(fun cls ->
    Option.map (Py.Object.get_attr_string cls "__name__") ~f:Py.String.to_string)
;;

module One_or_tuple = struct
  (* 'a should not be encoded as a python tuple or none! *)
  type 'a t = 'a list

  let python_of_t python_of_a t =
    match t with
    | [] -> Py.none
    | [ v ] -> python_of_a v
    | vs -> Py.Tuple.of_list_map python_of_a vs
  ;;

  let t_of_python a_of_python p =
    try [ a_of_python p ] with
    | _ ->
      if p = Py.none
      then []
      else if Py.Tuple.check p
      then Py.Tuple.to_list_map a_of_python p
      else failwith "incorrect python type"
  ;;
end

let to_iterable p =
  if Py.List.check p
  then Some p
  else (
    match get_class p with
    | Some "Series" ->
      (* [Py.List.to_list] assumes the python object to follow the
         PySequence[1] protocol. Most importantly, it expects the python object
         to implement the PySequence_GetItem[2] C function which is used to
         access individual elements.

         [Py.List.to_list] fails for a Series object that is sliced to represent a non
         contiguous layout (such as by doing series[::2] in python). Because a pandas
         Series object does not follow the PySequence protocol, we cannot use the
         [Py.List] module to access its individual elements or iterate over it. Even in
         Python, we cannot use the construct series[i] (where i is the index) to access
         its individual elements. The recommended way to access individual elements in
         Python is to use its [iloc] or [at] attributes.

         It may help to know that iterating over sequence in python using the [for ... in
         ...] syntax works by first obtaining an iterator to that sequence using the
         python builtin function [iter].

         In OCaml, we can similarly use the [Py.Iter] module to iterate over the Series
         object, if we first obtain an iterator to it, using [Py.Object.get_iter]. This is
         one way we can interface with a pandas Series in OCaml. Some other alternatives
         include:

         1. converting to a python list (using the [Series.tolist] method)
         2. converting to a numpy array (using the [Series.to_numpy] method)
         3. converting to a pandas array (using the [Series.array] attribute)

         Our profiling results (as of Dec 4, 2019) indicate that converting to a python
         list is ~60% faster than converting to a numpy array (next best alternative).

         It was interesting to observe that [Py.List.to_list] works as expected for a
         Series that is not sliced. But fails for sliced series with non contiguous
         elements. However, for correctness guarantees and performance reasons, we went
         ahead with converting the series to a python list before calling
         [Py.List.to_list_map].

         References:

         [1] https://docs.python.org/3.6/c-api/sequence.html
         [2] https://docs.python.org/3.6/c-api/sequence.html#c.PySequence_GetItem
         [3] https://github.com/pandas-dev/pandas/issues/30042
      *)
      let p = Py.Module.get_function_with_keywords p "tolist" [||] [] in
      Some p
    | _ -> if Py.Iter.check p then Some p else None)
;;

module One_or_tuple_or_list = struct
  (* 'a should not be encoded as a python tuple, list or none! *)
  type 'a t = 'a list

  let python_of_t = One_or_tuple.python_of_t

  let t_of_python a_of_python p =
    try One_or_tuple.t_of_python a_of_python p with
    | _ ->
      (match to_iterable p with
       | Some l -> Py.List.to_list_map a_of_python l
       | None -> failwith "incorrect python type")
  ;;
end

module Or_error_python = struct
  type 'a t = 'a Or_error.t

  let value_error_obj str =
    let value_error = Py.Module.get (Py.Module.builtins ()) "ValueError" in
    Py.Object.call_function_obj_args value_error [| python_of_string str |]
  ;;

  let of_error pyobject =
    let pyexception = Py.Module.get (Py.Module.builtins ()) "Exception" in
    if Py.Object.is_instance pyobject pyexception
    then
      Option.value_exn
        ~message:"no args field on python exception"
        (Py.Object.get_attr_string pyobject "args")
      |> list_of_python Py.Object.to_string
      |> String.concat ~sep:", "
      |> Option.some
    else None
  ;;

  let t_of_python ok_of_python p =
    match of_error p with
    | Some error -> Or_error.error_string error
    | None ->
      (match ok_of_python p with
       | v -> Ok v
       | exception exn -> Or_error.of_exn exn)
  ;;

  let python_of_t python_of_a t =
    match t with
    | Ok a -> python_of_a a
    | Error err -> Error.to_string_hum err |> value_error_obj
  ;;
end

let value_errorf fmt = Printf.ksprintf (fun msg -> raise (Py.Err (ValueError, msg))) fmt

module One_or_tuple_or_list_or_error = struct
  type 'a t = 'a Or_error_python.t list

  let python_of_t = One_or_tuple_or_list.python_of_t

  let t_of_python a_of_python p ~type_name =
    match One_or_tuple.t_of_python a_of_python p with
    | v -> List.map v ~f:(fun v -> Ok v)
    | exception _ ->
      (match to_iterable p with
       | Some p ->
         Py.List.to_list_map
           (fun p ->
              Or_error_python.t_of_python a_of_python p
              |> Or_error.tag ~tag:("trying to parse as " ^ type_name))
           p
       | None -> failwith "incorrect python type")
  ;;
end

let python_printf fmt =
  Printf.ksprintf
    (fun str ->
       let print = Py.Module.get (Py.Module.builtins ()) "print" in
       Py.Object.call_function_obj_args print [| Py.String.of_string str |]
       |> (ignore : pyobject -> unit))
    fmt
;;

let python_eprintf fmt =
  Printf.ksprintf
    (fun str ->
       let print = Py.Module.get (Py.Module.builtins ()) "print" in
       let stderr = Py.Module.get (Py.import "sys") "stderr" in
       Py.Callable.to_function_with_keywords
         print
         [| Py.String.of_string str |]
         [ "file", stderr ]
       |> (ignore : pyobject -> unit))
    fmt
;;
OCaml

Innovation. Community. Security.