package janestreet_csv

  1. Overview
  2. Docs
Tools for working with CSVs on the command line

Install

Dune Dependency

Authors

Maintainers

Sources

janestreet_csv-v0.16.0.tar.gz
sha256=c039eeef15bc68460984e74a003c3068da2e4854c4dc2cdcfec6be946f2b0a65

doc/src/janestreet_csv.csv_tool_lib/csv_sort.ml.html

Source file csv_sort.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
open Core
open Csv_common
module Time = Time_float_unix

module Line_with_sort_key = struct
  type 'a t =
    { key : 'a
    ; line : string list
    }

  let compare c a b = c a.key b.key
  let line t = t.line
  let map_key t ~f = { t with key = f t.key }
end

module type Sortable = sig
  type t [@@deriving compare]

  val of_string : string -> t
end

module Converted = struct
  (* https://en.wikipedia.org/wiki/Schwartzian_transform *)

  type t =
    | T :
        { compare : 'a -> 'a -> int
        ; lines : 'a Line_with_sort_key.t Array.t
        }
        -> t

  let create_id compare lines = T { compare; lines }

  let create (module M : Sortable) lines =
    let lines = Array.map lines ~f:(Line_with_sort_key.map_key ~f:M.of_string) in
    create_id M.compare lines
  ;;

  (* The order here kinda matters, at least in that we need to check Int before Float.
     (2^63 is a parsable float, but loses information in the process.)

     All the typed values, however, are obviously incompatible (i.e. we can't confuse
     a time/span/byte, since the suffixes are unambiguous.) *)
  let infer_choices : (module Sortable) list =
    [ (module Time_ns.Span)
    ; (module Byte_units)
    ; (module Time)
    ; (module Int)
    ; (module Float)
    ]
  ;;

  let create_inferred lines =
    List.find_map infer_choices ~f:(fun choice ->
      Option.try_with (fun () -> create choice lines))
    (* Can't default to natsort because it would change behavior. Sad. *)
    |> Option.value_or_thunk ~default:(fun () -> create_id String.compare lines)
  ;;
end

module Sort_type = struct
  module T = struct
    type t =
      | Bytes
      | Float
      | Infer
      | Int
      | Natsort
      | Span
      | String
      | Time
    [@@deriving compare, enumerate, sexp_of]
  end

  include T

  let param =
    Enum.make_param_optional_with_default_doc
      "-field-type"
      (module T)
      ~aliases:[ "--field-type" ]
      ~default:Infer
      ~doc:"field type for sorting"
      ~represent_choice_with:"_"
  ;;

  let convert sort_type (lines : string Line_with_sort_key.t Array.t) =
    match sort_type with
    | Bytes -> Converted.create (module Byte_units) lines
    | Float -> Converted.create (module Float) lines
    | Infer -> Converted.create_inferred lines
    | Int -> Converted.create (module Int) lines
    | Natsort -> Converted.create_id Numeric_string.compare lines
    | Span -> Converted.create (module Time_ns.Span) lines
    | String -> Converted.create_id String.compare lines
    | Time -> Converted.create (module Time) lines
  ;;
end

let sort_on_field ~sort_type ~field ~reverse csv =
  match List.findi csv.header ~f:(fun _idx elem -> String.( = ) elem field) with
  | None -> failwithf "unable to find csv field %s" field ()
  | Some (idx, _) ->
    let lines =
      Array.of_list_map csv.lines ~f:(fun line ->
        { Line_with_sort_key.key = List.nth_exn line idx; line })
    in
    let (T { lines; compare }) = Sort_type.convert sort_type lines in
    let compare =
      if reverse then fun a b -> Comparable.reverse compare a b else compare
    in
    let compare = Line_with_sort_key.compare compare in
    Array.stable_sort lines ~compare;
    let lines = Array.map lines ~f:Line_with_sort_key.line |> Array.to_list in
    { header = csv.header; lines }
;;

let run ?separator ?(reverse = false) ~sort_type ~field file =
  Or_file.with_all file ?separator ~f:(fun csv ->
    csv |> sort_on_field ~sort_type ~field ~reverse |> print_csv ?separator)
;;
OCaml

Innovation. Community. Security.