package janestreet_csv

  1. Overview
  2. Docs
Tools for working with CSVs on the command line

Install

Dune Dependency

Authors

Maintainers

Sources

v0.17.0.tar.gz
sha256=8940b4aa979a3bd5993b52d36cd768fd3bd5d1fb11c36c9c269ee646c7511a41

doc/src/janestreet_csv.csv_tool_lib/csv_sort.ml.html

Source file csv_sort.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
open Core
open Csv_common
module Time = Time_float_unix

module Line_with_sort_key = struct
  type 'a t =
    { key : 'a
    ; line : string list
    }

  let compare c a b = c a.key b.key
  let line t = t.line
  let map_key t ~f = { t with key = f t.key }
end

module type Sortable = sig
  type t [@@deriving compare]

  val of_string : string -> t
end

module Converted = struct
  (* https://en.wikipedia.org/wiki/Schwartzian_transform *)

  type t =
    | T :
        { compare : 'a -> 'a -> int
        ; lines : 'a Line_with_sort_key.t Array.t
        }
        -> t

  let create_id compare lines = T { compare; lines }

  let create (module M : Sortable) lines =
    let lines = Array.map lines ~f:(Line_with_sort_key.map_key ~f:M.of_string) in
    create_id M.compare lines
  ;;

  (* The order here kinda matters, at least in that we need to check Int before Float.
     (2^63 is a parsable float, but loses information in the process.)

     All the typed values, however, are obviously incompatible (i.e. we can't confuse
     a time/span/byte, since the suffixes are unambiguous.) *)
  let infer_choices : (module Sortable) list =
    [ (module Time_ns.Span)
    ; (module Byte_units)
    ; (module Time)
    ; (module Int)
    ; (module Float)
    ]
  ;;

  let create_inferred lines =
    List.find_map infer_choices ~f:(fun choice ->
      Option.try_with (fun () -> create choice lines))
    (* Can't default to natsort because it would change behavior. Sad. *)
    |> Option.value_or_thunk ~default:(fun () -> create_id String.compare lines)
  ;;
end

module Sort_type = struct
  module T = struct
    type t =
      | Bytes
      | Float
      | Infer
      | Int
      | Natsort
      | Span
      | String
      | Time
    [@@deriving compare, enumerate, sexp_of]
  end

  include T

  let param =
    Enum.make_param_optional_comma_separated_with_default_doc
      "-field-types"
      (module T)
      ~aliases:[ "--field-types" ]
      ~default:[]
      ~doc:"field type for sorting (default: infer)"
      ~represent_choice_with:"_"
  ;;

  let convert sort_type (lines : string Line_with_sort_key.t Array.t) =
    match sort_type with
    | Bytes -> Converted.create (module Byte_units) lines
    | Float -> Converted.create (module Float) lines
    | Infer -> Converted.create_inferred lines
    | Int -> Converted.create (module Int) lines
    | Natsort -> Converted.create_id Numeric_string.compare lines
    | Span -> Converted.create (module Time_ns.Span) lines
    | String -> Converted.create_id String.compare lines
    | Time -> Converted.create (module Time) lines
  ;;
end

module Order = struct
  type t =
    | Ascending
    | Descending
  [@@deriving compare, enumerate, sexp_of]
end

module Sort_column = struct
  type t =
    { field : string
    ; order : Order.t
    ; sort_type : Sort_type.t
    }
  [@@deriving sexp_of]

  let param : t list Command.Param.t =
    let%map_open.Command () = return ()
    and fields = Csv_param.fields_backward_compat
    and sort_types = Sort_type.param
    and reverse = Csv_param.reverse
    and reverse_fields = Csv_param.reverse_fields in
    let l v = List.map fields ~f:(const v) in
    let sort_types =
      match sort_types with
      | [] -> l Sort_type.Infer
      | _ :: _ ->
        (match List.length sort_types = List.length fields with
         | true -> sort_types
         | false ->
           failwith
             "when specifying sort types you must specify one per sort field, in the \
              same order as the sort fields")
    in
    let orders =
      match reverse, reverse_fields with
      | true, _ :: _ -> failwith "may not specify both -reverse and -reverse-fields"
      | true, [] ->
        l Order.Descending (* an empty reverse list will reverse all sort fields *)
      | false, [] -> l Order.Ascending
      | false, reverse_fields ->
        let reverse_fields = String.Set.of_list reverse_fields in
        (match Set.is_subset reverse_fields ~of_:(String.Set.of_list fields) with
         | true ->
           List.map fields ~f:(fun field : Order.t ->
             if Set.mem reverse_fields field then Descending else Ascending)
         | false -> failwith "-reverse-fields must list a subset of the sort fields")
    in
    let num_fields = List.length fields in
    let num_orders = List.length orders in
    let num_sort_types = List.length sort_types in
    if num_fields <> num_orders
    then raise_s [%message "BUG" (num_fields : int) "<>" (num_orders : int)];
    if num_fields <> num_sort_types
    then
      failwith
        [%string
          "Unequal number of fields (%{num_fields#Int}) and sort_types \
           (%{num_sort_types#Int})"];
    List.zip_exn (List.zip_exn fields sort_types) orders
    |> List.map ~f:(fun ((field, sort_type), order) -> { field; order; sort_type })
  ;;
end

let sort_on_field ({ field; order; sort_type } : Sort_column.t) csv =
  match List.findi csv.header ~f:(fun _idx elem -> String.( = ) elem field) with
  | None -> failwithf "unable to find csv field %s" field ()
  | Some (idx, _) ->
    let lines =
      Array.of_list_map csv.lines ~f:(fun line ->
        { Line_with_sort_key.key = List.nth_exn line idx; line })
    in
    let (T { lines; compare }) = Sort_type.convert sort_type lines in
    let compare =
      match order with
      | Ascending -> compare
      | Descending -> fun a b -> Comparable.reverse compare a b
    in
    let compare = Line_with_sort_key.compare compare in
    Array.stable_sort lines ~compare;
    let lines = Array.map lines ~f:Line_with_sort_key.line |> Array.to_list in
    { header = csv.header; lines }
;;

let sort_on_fields ts csv = List.fold_right ts ~init:csv ~f:sort_on_field

let run ?separator ts file =
  Or_file.with_all file ?separator ~f:(fun csv ->
    csv |> sort_on_fields ts |> print_csv ?separator)
;;
OCaml

Innovation. Community. Security.