package SZXX
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file zip.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382
open! Core_kernel open Lwt.Syntax open Lwt.Infix type methd = | Stored | Deflated [@@deriving sexp_of] type version = | Zip_2_0 | Zip_4_5 [@@deriving sexp_of] type descriptor = { crc: Int32.t; compressed_size: Int64.t; uncompressed_size: Int64.t; } [@@deriving sexp_of] type extra_field = { id: int; size: int; data: string; } [@@deriving sexp_of] type entry = { version_needed: version; flags: int; trailing_descriptor_present: bool; methd: methd; descriptor: descriptor; filename: string; extra_fields: extra_field list; } [@@deriving sexp_of] module Action = struct type 'a t = | Skip | String | Fold_string of { init: 'a; f: entry -> string -> 'a -> 'a; } | Fold_bigstring of { init: 'a; f: entry -> Bigstring.t -> len:int -> 'a -> 'a; } | Parse of 'a Angstrom.t end module Data = struct type 'a t = | Skip | String of string | Fold_string of 'a | Fold_bigstring of 'a | Parse of ('a, string) result end let slice_size = Parsing.slice_size let slice_bits = 10 module Storage = struct type t = Parsing.storage = { add: Bigstring.t -> int -> unit; finalize: unit -> unit; } let deflated flush = let finished = ref true in let w = De.make_window ~bits:slice_bits in let outbs = Bigstring.create slice_size in let decoder = De.Inf.decoder `Manual ~o:outbs ~w in let rec do_uncompress () = match De.Inf.decode decoder with | `Await -> false | `End -> let len = slice_size - De.Inf.dst_rem decoder in if len > 0 then flush outbs len; true | `Flush -> let len = slice_size - De.Inf.dst_rem decoder in flush outbs len; De.Inf.flush decoder; do_uncompress () | `Malformed err -> failwith err in let uncompress bs len = De.Inf.src decoder bs 0 len; do_uncompress () in let add bs len = finished := uncompress bs len in let finalize () = if not !finished then ignore (uncompress outbs 0) in { add; finalize } let stored flush = let add = flush in let finalize () = () in { add; finalize } end module Mode = struct type 'a t = { flush: Bigstring.t -> int -> unit; complete: unit -> 'a Data.t * int * Optint.t; } let skip () = let crc = ref Optint.zero in let bytes_processed = ref 0 in let flush bs len = bytes_processed := !bytes_processed + len; crc := Checkseum.Crc32.digest_bigstring bs 0 len !crc in let complete () = Data.Skip, !bytes_processed, !crc in { flush; complete } let string ~buffer_size = let res = Bigbuffer.create buffer_size in let flush bs len = Bigbuffer.add_bigstring res (Bigstring.sub_shared bs ~pos:0 ~len) in let complete () = let str = Bigbuffer.contents res in let len = String.length str in Data.String str, len, Checkseum.Crc32.digest_string str 0 len Optint.zero in { flush; complete } let fold_string ~init ~f entry = let crc = ref Optint.zero in let bytes_processed = ref 0 in let acc = ref init in let flush bs len = let s = Bigstring.to_string bs ~pos:0 ~len in bytes_processed := !bytes_processed + len; crc := Checkseum.Crc32.digest_string s 0 len !crc; acc := f entry s !acc in let complete () = Data.Fold_string !acc, !bytes_processed, !crc in { flush; complete } let fold_bigstring ~init ~f entry = let crc = ref Optint.zero in let bytes_processed = ref 0 in let acc = ref init in let flush bs len = bytes_processed := !bytes_processed + len; crc := Checkseum.Crc32.digest_bigstring bs 0 len !crc; acc := f entry bs ~len !acc in let complete () = Data.Fold_bigstring !acc, !bytes_processed, !crc in { flush; complete } let parse angstrom = let crc = ref Optint.zero in let bytes_processed = ref 0 in let open Angstrom.Buffered in let state = ref (parse angstrom) in let flush bs len = bytes_processed := !bytes_processed + len; crc := Checkseum.Crc32.digest_bigstring bs 0 len !crc; match !state with | Done _ |Fail _ -> () | Partial feed -> state := feed (`Bigstring (Bigstring.sub_shared bs ~pos:0 ~len)) in let complete () = let final_state = match !state with | (Done _ as x) |(Fail _ as x) -> x | Partial feed -> feed `Eof in Data.Parse (state_to_result final_state), !bytes_processed, !crc in { flush; complete } end let fixed_size_reader size Storage.{ add; finalize } = let open Angstrom in let rec loop = function | n when n > slice_size -> take_bigstring slice_size >>= fun bs -> add bs slice_size; (loop [@tailcall]) (n - slice_size) | 0 -> finalize (); commit | n -> take_bigstring n <* commit >>= fun bs -> add bs n; (loop [@tailcall]) 0 in loop size let ( << ) = Int64.( lsl ) let ( ||* ) = Int64.( lor ) let ( .*[] ) s i = s.[i] |> Char.to_int |> Int64.of_int_exn let parse_le_uint64 ?(offset = 0) s = (1, s.*[offset]) |> Fn.apply_n_times ~n:7 (fun (i, x) -> i + 1, s.*[i + offset] << i * 8 ||* x) |> snd let parser cb = let open Angstrom in let local_file_header_signature = string "PK\003\004" in let descriptor_parser = lift3 (fun crc compressed_size uncompressed_size -> { crc; compressed_size; uncompressed_size }) LE.any_int32 (LE.any_int32 >>| Int32.to_int64) (LE.any_int32 >>| Int32.to_int64) in let rec extra_fields_parser acc = function | 0 -> return acc | left when left < 0 -> failwith "Mismatch between reported and actual extra fields size" | left -> lift2 Tuple2.create LE.any_uint16 LE.any_uint16 >>= fun (id, size) -> take size >>= fun data -> (extra_fields_parser [@tailcall]) ({ id; size; data } :: acc) (left - (size + 4)) in let dynamic_len_fields_parser = lift2 Tuple2.create LE.any_uint16 LE.any_uint16 >>= fun (filename_len, extra_fields_len) -> lift2 Tuple2.create (take filename_len) (extra_fields_parser [] extra_fields_len) in let flags_methd_parser = lift2 (fun flags methd -> if flags land 0x001 <> 0 then failwith "Encrypted entries not supported"; let methd = match methd with | 0 -> Stored | 8 -> Deflated | x -> failwithf "Unsupported compression method %d" x () in flags, methd) LE.any_uint16 LE.any_uint16 in let get_zip64_descriptor ~crc extra_fields = List.find_map extra_fields ~f:(function | { id = 1; size = 16; data } -> Some { crc; uncompressed_size = parse_le_uint64 data; compressed_size = parse_le_uint64 ~offset:8 data; } | { id = 1; size; _ } -> failwithf "Expected 16 bytes for ZIP64 extra field length but found %d" size () | _ -> None) in let entry_parser = Parsing.skip_until_pattern ~pattern:"PK\003\004" *> (LE.any_uint16 >>| function | 20 -> Zip_2_0 | 45 -> Zip_4_5 | x -> failwithf "Unsupported version: %d. Please report this bug." x ()) >>= fun version_needed -> lift3 (fun (flags, methd) descriptor (filename, extra_fields) -> let descriptor = match version_needed with | Zip_2_0 -> descriptor | Zip_4_5 -> Option.value_exn ~message:"Missing ZIP64 extra field" (get_zip64_descriptor ~crc:descriptor.crc extra_fields) in { version_needed; flags; trailing_descriptor_present = (flags land 0x008 <> 0 || Int64.(descriptor.compressed_size = 0L) || Int64.(descriptor.uncompressed_size = 0L) ); methd; descriptor; filename; extra_fields; }) (flags_methd_parser <* LE.any_uint16 (* last modified time *) <* LE.any_uint16 (* last modified date *) ) descriptor_parser dynamic_len_fields_parser in lift2 const entry_parser commit >>= fun entry -> let reader ~buffer_size () = let Mode.{ flush; complete } = match cb entry with | Action.Skip -> Mode.skip () | Action.String -> Mode.string ~buffer_size | Action.Fold_string { init; f } -> Mode.fold_string ~init ~f entry | Action.Fold_bigstring { init; f } -> Mode.fold_bigstring ~init ~f entry | Action.Parse angstrom -> Mode.parse angstrom in let storage_method, zipped_length = match entry.methd with | Stored -> Storage.stored flush, entry.descriptor.uncompressed_size | Deflated -> Storage.deflated flush, entry.descriptor.compressed_size in let file_reader = if Int64.(entry.descriptor.compressed_size = 0L) || Int64.(entry.descriptor.uncompressed_size = 0L) then Parsing.bounded_file_reader ~pattern:"PK\007\008" else fixed_size_reader (Int64.to_int_exn zipped_length) in file_reader storage_method >>| complete in let file = match entry.trailing_descriptor_present with | false -> reader ~buffer_size:(Int64.to_int_exn entry.descriptor.uncompressed_size) () >>| fun x -> x, entry | true -> lift3 (fun data_size_crc _header descriptor -> data_size_crc, { entry with descriptor }) (reader ~buffer_size:(slice_size * 4) ()) (option "" local_file_header_signature) descriptor_parser in file >>| fun data -> match data with | (_data, size, _crc), entry when Int64.(entry.descriptor.uncompressed_size <> of_int size) -> failwithf "%s: Size mismatch: %d but expected %Ld" entry.filename size entry.descriptor.uncompressed_size () | (_data, _size, crc), entry when Int32.(entry.descriptor.crc <> Optint.to_int32 crc) -> failwithf "%s: CRC mismatch" entry.filename () | (data, _size, _crc), entry -> entry, data type 'a slice = { buf: 'a; pos: int; len: int; } type feed = | String of (unit -> string option Lwt.t) | Bigstring of (unit -> Bigstring.t slice option Lwt.t) let stream_files ~feed:read cb = let read = let open Lwt.Infix in match read with | String f -> (fun () -> f () >|= Option.map ~f:(fun s -> `String s)) | Bigstring f -> fun () -> f () >|= Option.map ~f:(fun { buf; pos; len } -> `Bigstring (Bigstring.sub_shared buf ~pos ~len)) in let stream, bounded = Lwt_stream.create_bounded 1 in let mutex = Lwt_mutex.create () in let open Angstrom.Buffered in let rec loop = function | Fail (_, [], err) -> failwith err | Fail (_, marks, err) -> failwithf "%s: %s" (String.concat ~sep:" > " marks) err () | Done ({ buf; off = pos; len }, pair) -> ( let* () = Lwt_mutex.with_lock mutex (fun () -> bounded#push pair) in match parse (parser cb) with | Partial feed -> (loop [@tailcall]) (feed (`Bigstring (Bigstring.sub_shared buf ~pos ~len))) | state -> (loop [@tailcall]) state ) | Partial feed -> ( read () >>= function | None -> ( match feed `Eof with | Done (_, pair) -> Lwt_mutex.with_lock mutex (fun () -> bounded#push pair) | _ -> Lwt.return_unit ) | Some chunk -> (loop [@tailcall]) (feed chunk) ) in let p = Lwt.finalize (fun () -> loop (parse (parser cb))) (fun () -> bounded#close; Lwt.return_unit) in stream, p