Source file string_extended.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
open Core_kernel
open Poly
let collate s1 s2 =
let pos1 = ref 0
and pos2 = ref 0 in
let next ~ok s pos =
if !pos = String.length s
then None
else (
let c = s.[!pos] in
if ok c
then (
incr pos;
Some c)
else None)
in
let compare_non_numerical () =
let ok c = not (Char.is_digit c) in
let rec loop () =
match next ~ok s1 pos1, next ~ok s2 pos2 with
| Some _, None -> 1
| None, Some _ -> -1
| None, None -> 0
| Some c1, Some c2 when c1 = c2 -> loop ()
| Some c1, Some c2 -> Char.compare c1 c2
in
loop ()
in
let compare_numerical () =
let rec consume0 s pos =
match next ~ok:(( = ) '0') s pos with
| Some _ -> consume0 s pos
| None -> ()
in
let ok = Char.is_digit in
let bias = ref 0 in
let rec loop () =
match next ~ok s1 pos1, next ~ok s2 pos2 with
| Some _, None -> 1
| None, Some _ -> -1
| None, None when !bias <> 0 -> !bias
| None, None ->
!pos1 - !pos2
| Some c1, Some c2 when !bias = 0 ->
bias := Char.compare c1 c2;
loop ()
| Some _, Some _ -> loop ()
in
consume0 s1 pos1;
consume0 s2 pos2;
loop ()
in
let s1_length = String.length s1 in
let s2_length = String.length s2 in
let rec loop () =
let r = compare_non_numerical () in
let r' = compare_numerical () in
match r, r' with
| 0, 0 when !pos1 = s1_length && !pos2 = s2_length -> 0
| 0, 0 -> loop ()
| 0, i | i, _ -> i
in
loop ()
;;
let%test_module "collate" =
(module struct
let ( <! ) s s' = collate s s' < 0
let%test _ = "a2b" <! "a10b"
let%test _ = "a2b" <! "a02b"
let%test _ = "a010b" <! "a20b"
end)
;;
(**
Inverse operation of [String.escaped]
*)
exception Unescape_error of bool * int * string
let unescaped' ?(strict = true) s =
let len = String.length s in
let pos = ref 0 in
let error ?(fatal = false) message = raise (Unescape_error (fatal, !pos, message)) in
let consume () =
let i = !pos in
if i = len then error "unexpectedly reached end of string";
let c = s.[i] in
pos := i + 1;
c
in
let res = Buffer.create len in
let emit c = Buffer.add_char res c in
let emit_code code =
match Char.of_int code with
| Some c -> emit c
| None -> error ~fatal:true (Printf.sprintf "got invalid escape code %d" code)
in
let rec loop () =
if !pos < len
then (
let c = consume () in
if c <> '\\'
then emit c
else (
let mark = !pos in
try
let c = consume () in
match c with
| '\\' | '\"' -> emit c
| 'b' -> emit '\b'
| 'n' -> emit '\n'
| 'r' -> emit '\r'
| 't' -> emit '\t'
| '\n' ->
let rec consume_blank () =
if !pos < len
then (
match consume () with
| ' ' | '\t' -> consume_blank ()
| _ -> decr pos)
in
consume_blank ()
| 'x' ->
let c2hex c =
if c >= 'A' && c <= 'F'
then Char.to_int c + 10 - Char.to_int 'A'
else if c >= 'a' && c <= 'f'
then Char.to_int c + 10 - Char.to_int 'a'
else if c >= '0' && c <= '9'
then Char.to_int c - Char.to_int '0'
else error (Printf.sprintf "expected hex digit, got: %c" c)
in
let c1 = consume () in
let c2 = consume () in
emit_code ((16 * c2hex c1) + c2hex c2)
| c when Char.is_digit c ->
let char_to_num c =
match Char.get_digit c with
| None -> error (Printf.sprintf "expected digit,got: %c" c)
| Some i -> i
in
let i1 = char_to_num c in
let i2 = char_to_num (consume ()) in
let i3 = char_to_num (consume ()) in
emit_code ((100 * i1) + (10 * i2) + i3)
| c -> error (Printf.sprintf "got invalid escape character: %c" c)
with
| Unescape_error (false, _, _) when not strict ->
emit '\\';
pos := mark);
loop ())
else Buffer.contents res
in
loop ()
;;
let unescaped ?strict s =
try unescaped' ?strict s with
| Unescape_error (_, pos, message) ->
invalid_argf
"String_extended.unescaped error at position %d of %s: %s"
pos
s
message
()
;;
let unescaped_res ?strict s =
try Result.Ok (unescaped' ?strict s) with
| Unescape_error (_, pos, message) -> Result.Error (pos, message)
;;
let squeeze str =
let len = String.length str in
let buf = Buffer.create len in
let rec skip_spaces i =
if i >= len
then Buffer.contents buf
else (
let c = str.[i] in
if c = ' ' || c = '\n' || c = '\t' || c = '\r'
then skip_spaces (i + 1)
else (
Buffer.add_char buf c;
copy_chars (i + 1)))
and copy_chars i =
if i >= len
then Buffer.contents buf
else (
let c = str.[i] in
if c = ' ' || c = '\n' || c = '\t' || c = '\r'
then (
Buffer.add_char buf ' ';
skip_spaces (i + 1))
else (
Buffer.add_char buf c;
copy_chars (i + 1)))
in
copy_chars 0
;;
let pad_right ?(char = ' ') s l =
let src_len = String.length s in
if src_len >= l
then s
else (
let res = Bytes.create l in
Bytes.From_string.blit ~src:s ~dst:res ~src_pos:0 ~dst_pos:0 ~len:src_len;
Bytes.fill ~pos:src_len ~len:(l - src_len) res char;
Bytes.unsafe_to_string ~no_mutation_while_string_reachable:res)
;;
let pad_left ?(char = ' ') s l =
let src_len = String.length s in
if src_len >= l
then s
else (
let res = Bytes.create l in
Bytes.From_string.blit ~src:s ~dst:res ~src_pos:0 ~dst_pos:(l - src_len) ~len:src_len;
Bytes.fill ~pos:0 ~len:(l - src_len) res char;
Bytes.unsafe_to_string ~no_mutation_while_string_reachable:res)
;;
let line_break ~len s =
let buf = Buffer.create len in
let flush_buf () =
let res = Buffer.contents buf in
Buffer.reset buf;
res
in
let rec loop acc = function
| [] ->
let acc =
if Buffer.length buf <> 0
then flush_buf () :: acc
else if acc = []
then [ "" ]
else acc
in
List.rev acc
| h :: t when Buffer.length buf = 0 ->
Buffer.add_string buf h;
loop acc t
| h :: t when Buffer.length buf + 1 + String.length h < len ->
Buffer.add_char buf ' ';
Buffer.add_string buf h;
loop acc t
| l -> loop (flush_buf () :: acc) l
in
List.concat_map (String.split ~on:'\n' s) ~f:(fun s ->
loop [] (String.split ~on:' ' s))
;;
let rec word_wrap__break_one ~hard_limit ~soft_limit ~previous_match s ~pos ~len =
if pos = String.length s
then len, pos
else if previous_match > 0 && len >= soft_limit
then previous_match, pos - len + previous_match + 1
else if len >= hard_limit
then len, pos
else (
match s.[pos] with
| '\r' when pos < String.length s - 1 && s.[pos + 1] = '\n' -> len, pos + 2
| '\r' | '\n' -> len, pos + 1
| ' ' | '\t' ->
word_wrap__break_one
s
~hard_limit
~soft_limit
~previous_match:len
~pos:(pos + 1)
~len:(len + 1)
| _ ->
word_wrap__break_one
s
~previous_match
~hard_limit
~soft_limit
~pos:(pos + 1)
~len:(len + 1))
;;
let rec word_wrap__find_substrings ~hard_limit ~soft_limit s acc pos =
if pos < String.length s
then (
let len, new_pos =
word_wrap__break_one s ~hard_limit ~soft_limit ~previous_match:0 ~pos ~len:0
in
word_wrap__find_substrings ~hard_limit ~soft_limit s ((pos, len) :: acc) new_pos)
else acc
;;
let word_wrap
?(trailing_nl = false)
?(soft_limit = 80)
?(hard_limit = Int.max_value)
?(nl = "\n")
s
=
let soft_limit = min soft_limit hard_limit in
let lines = word_wrap__find_substrings ~soft_limit ~hard_limit s [] 0 in
match lines with
| [] | [ _ ] -> if trailing_nl then s ^ nl else s
| (hpos, hlen) :: t ->
let nl_len = String.length nl in
let body_len =
List.fold_left t ~f:(fun acc (_, len) -> acc + nl_len + len) ~init:0
in
let res_len = if trailing_nl then body_len + hlen + nl_len else body_len + hlen in
let res = Bytes.create res_len in
if trailing_nl
then
Bytes.From_string.blit
~src:nl
~dst:res
~len:nl_len
~src_pos:0
~dst_pos:(body_len + hlen);
Bytes.From_string.blit ~src:s ~dst:res ~len:hlen ~src_pos:hpos ~dst_pos:body_len;
let rec blit_loop dst_end_pos = function
| [] -> ()
| (src_pos, len) :: rest ->
let dst_pos = dst_end_pos - len - nl_len in
Bytes.From_string.blit ~src:s ~dst:res ~len ~src_pos ~dst_pos;
Bytes.From_string.blit
~src:nl
~dst:res
~len:nl_len
~src_pos:0
~dst_pos:(dst_pos + len);
blit_loop dst_pos rest
in
blit_loop body_len t;
Bytes.unsafe_to_string ~no_mutation_while_string_reachable:res
;;
let is_substring_deprecated ~substring:needle haystack =
if String.length needle = 0
then if String.length haystack = 0 then false else invalid_arg "index out of bounds"
else Core_kernel.String.is_substring ~substring:needle haystack
;;
let%test _ = is_substring_deprecated ~substring:"foo" "foo"
let%test _ = not (is_substring_deprecated ~substring:"" "")
let%test _ =
try
ignore (is_substring_deprecated ~substring:"" "foo");
assert false
with
| Invalid_argument _ -> true
;;
let%test _ = not (is_substring_deprecated ~substring:"foo" "")
let%test _ = is_substring_deprecated ~substring:"bar" "foobarbaz"
let%test _ = not (is_substring_deprecated ~substring:"Z" "z")
let%test _ = not (is_substring_deprecated ~substring:"store" "video stapler")
let%test _ = not (is_substring_deprecated ~substring:"sandwich" "apple")
let%test _ = is_substring_deprecated ~substring:"z" "abc\x00z"
let edit_distance_matrix ?transpose s1 s2 =
let transpose = Option.is_some transpose in
let l1, l2 = String.length s1, String.length s2 in
let d = Array.make_matrix 0 ~dimx:(l1 + 1) ~dimy:(l2 + 1) in
for x = 0 to l1 do
d.(x).(0) <- x
done;
for y = 0 to l2 do
d.(0).(y) <- y
done;
for y = 1 to l2 do
for x = 1 to l1 do
let min_d =
if s1.[x - 1] = s2.[y - 1]
then d.(x - 1).(y - 1)
else
List.reduce_exn
~f:min
[ d.(x - 1).(y) + 1; d.(x).(y - 1) + 1; d.(x - 1).(y - 1) + 1 ]
in
let min_d =
if transpose
&& x > 1
&& y > 1
&& s1.[x - 1] = s2.[y - 2]
&& s1.[x - 2] = s2.[y - 1]
then min min_d (d.(x - 2).(y - 2) + 1)
else min_d
in
d.(x).(y) <- min_d
done
done;
d
;;
let edit_distance ?transpose s1 s2 =
(edit_distance_matrix ?transpose s1 s2).(String.length s1).(String.length s2)
;;
let%test _ = edit_distance "" "" = 0
let%test _ = edit_distance "stringStringString" "stringStringString" = 0
let%test _ = edit_distance "ocaml" "coaml" = 2
let%test _ = edit_distance ~transpose:() "ocaml" "coaml" = 1
let%test _ = edit_distance "sitting" "kitten" = 3
let%test _ = edit_distance ~transpose:() "sitting" "kitten" = 3
let%test _ = edit_distance "abcdef" "1234567890" = 10
let%test _ = edit_distance "foobar" "fubahr" = 3
let%test _ = edit_distance "hylomorphism" "zylomorphism" = 1