package janestreet_csv
Tools for working with CSVs on the command line
Install
Dune Dependency
Authors
Maintainers
Sources
v0.17.0.tar.gz
sha256=8940b4aa979a3bd5993b52d36cd768fd3bd5d1fb11c36c9c269ee646c7511a41
doc/src/janestreet_csv.csv_tool_lib/cut.ml.html
Source file cut.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
open Core exception Not_enough_rows (*exception Invalid_range of string list*) type row_processor = Csv_common.Or_file.t -> skip_header:bool -> sep:char -> f:(string array -> unit) -> [ `Limit_to of string list | `All_but of string list ] -> unit let load_rows ?on_invalid_row file ~sep ~f = let process ic = protectx ic ~f:(fun ic -> Delimited_kernel.Read.fold_lines ?on_invalid_row ~sep ~header:`No Delimited_kernel.Read.Row.builder ic ~init:() ~f:(fun () row -> f (Delimited_kernel.Read.Row.to_array row))) ~finally:In_channel.close in match (file : Csv_common.Or_file.t) with | Csv { header; lines } -> List.iter (header :: lines) ~f:(fun row -> Array.of_list row |> f) | Stdin | File "-" -> process In_channel.stdin | File x -> process (In_channel.create x) ;; let get_positions header_row headers_wanted = match headers_wanted with | `Limit_to headers -> let hmap = Map.of_iteri_exn (module String) ~iteri:(fun ~f -> Array.iteri header_row ~f:(fun i h -> f ~key:h ~data:i) [@nontail]) in Array.of_list headers |> Array.map ~f:(fun header -> match Map.find hmap header with | Some position -> position | None -> (try Int.of_string header with | _ -> raise_s [%message "Unknown header" ~_:(header : string)])) | `All_but headers -> let headers = String.Set.of_list headers in Array.filter_mapi header_row ~f:(fun pos header -> if Set.mem headers header then None else Some pos) ;; let index_positions header_row headers_wanted = match headers_wanted with | `Limit_to headers -> Array.of_list_map headers ~f:Int.of_string | `All_but headers -> let headers = List.map headers ~f:Int.of_string |> Int.Set.of_list in Array.filter_mapi header_row ~f:(fun pos _ -> if Set.mem headers pos then None else Some pos) ;; let cut_by_fields file ~consume_header_names ~skip_header ~sep ~f headers_wanted = let handle_row = ref (fun _ -> assert false) in (handle_row := fun row -> let positions = match consume_header_names with | true -> get_positions row headers_wanted | false -> index_positions row headers_wanted in let grab row = Array.map positions ~f:(fun i -> if Array.length row < i then "" else row.(i)) in if not skip_header then f (grab row); handle_row := fun row -> f (grab row)); load_rows file ~sep ~f:(fun row -> !handle_row row) ;; let cut_by_field_names = cut_by_fields ~consume_header_names:true let cut_by_field_indices = cut_by_fields ~consume_header_names:false exception First_row of string list let field_names ~sep file = let throw_row row = raise (First_row (Array.to_list row)) in try load_rows ~on_invalid_row: (Delimited_kernel.Read.On_invalid_row.create (fun ~line_number:_ _ _ exn -> match exn with | First_row _ -> `Raise exn | _ -> `Fallback Delimited_kernel.Read.On_invalid_row.raise)) file ~sep ~f:throw_row; (* should throw First_row before this! *) raise Not_enough_rows with | First_row row -> row ;; let regex_match file ~sep ~f ~regexp = let handle_row rows = let grab rows = Array.map rows ~f:(fun row -> if Pcre.pmatch ~rex:regexp row then row else "") in f (grab rows) in load_rows file ~sep ~f:handle_row ;; (* apply [f_populated] to rows that have all fields corresponding to [headers] populated apply [f_unpopulated] to all other rows *) let split_populated_rows file ~skip_header ~sep ~f_populated ~f_unpopulated headers_wanted = let handle_row = ref (fun _ -> assert false) in (handle_row := fun row -> let positions = get_positions row headers_wanted in let populated row = let n = Array.length row in Array.for_all positions ~f:(fun i -> i < n && String.length row.(i) > 0) in if not skip_header then ( f_populated row; f_unpopulated row); handle_row := fun row -> if populated row then f_populated row else f_unpopulated row); load_rows file ~sep ~f:(fun row -> !handle_row row) ;; let fully_populated_rows file ~skip_header ~sep ~f headers_wanted = split_populated_rows file ~skip_header ~sep ~f_populated:f ~f_unpopulated:ignore headers_wanted ;; let not_fully_populated_rows file ~skip_header ~sep ~f headers_wanted = split_populated_rows file ~skip_header ~sep ~f_populated:ignore ~f_unpopulated:f headers_wanted ;;
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>