Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file string_extended.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467openCore_kernel(* Natural ordering like found in gnome nautilus, the mac finder etc...
Refer to Mli for more documentation
*)letcollates1s2=letpos1=ref0andpos2=ref0inletnext~okspos=if!pos=String.lengthsthenNoneelse(letc=s.[!pos]inifokcthen(incrpos;Somec)elseNone)inletcompare_non_numerical()=letokc=not(Char.is_digitc)inletrecloop()=matchnext~oks1pos1,next~oks2pos2with|Some_,None->1|None,Some_->-1|None,None->0|Somec1,Somec2whenc1=c2->loop()|Somec1,Somec2->Char.comparec1c2inloop()inletcompare_numerical()=letrecconsume0spos=matchnext~ok:((=)'0')sposwith|Some_->consume0spos|None->()in(* Our main loop works on string representation of ints where all the
trailing zeros have been chopped of. Their magnitude is given by the
length of their representation. If they have the same magnitude the
lexical order is correct. Bias is used to save that information.
*)letok=Char.is_digitinletbias=ref0inletrecloop()=matchnext~oks1pos1,next~oks2pos2with|Some_,None->1|None,Some_->-1|None,Nonewhen!bias<>0->!bias|None,None->(* Both ints have the same value, The one with the shortest
representation (i.e. the least trailing zeroes) is
considered to be the smallest*)!pos1-!pos2|Somec1,Somec2when!bias=0->bias:=Char.comparec1c2;loop()|Some_,Some_->loop()inconsume0s1pos1;consume0s2pos2;loop()inlets1_length=String.lengths1inlets2_length=String.lengths2inletrecloop()=letr=compare_non_numerical()inletr'=compare_numerical()inmatchr,r'with|0,0when!pos1=s1_length&&!pos2=s2_length->0|0,0->loop()|0,i|i,_->iinloop();;let%test_module"collate"=(modulestructlet(<!)ss'=collatess'<0(*
let (>!) s s' = collate s s' > 0
let basic_tests = (fun (s,s') ->
"invertible" @? ((s' <! s) = (s >! s'));
"total" @? (definitive_clause [s<!s'; s=s'; s>!s']))
*)(* repeat 50 basic_tests (pg sg sg);
repeat 2 basic_tests (dup sg);
repeat 50 (fun (s,s',s'') ->
let (s1,s2,s3) =
match List.sort ~compare:String_extended.collate [s;s';s''] with
| [s1;s2;s3] -> s1,s2,s3
| _ -> assert false
in
"transitive" @?
(((s1 <! s2) || (s2 <! s3)) = (s1 <! s3)))
(tg sg sg sg); *)let%test_="a2b"<!"a10b"let%test_="a2b"<!"a02b"let%test_="a010b"<!"a20b"end);;(**
Inverse operation of [String.escaped]
*)exceptionUnescape_errorofbool*int*string(* The stdlib's escaped does a lot of fancy wazoo magic to avoid
using a buffer:
It works in two passes, the first one calculates the length of the string to
allocate and the second one does the actual escaping.
This would be more cumbersome to do here but might be worth the hassle if
performance ever gets to be an issue *)letunescaped'?(strict=true)s=letlen=String.lengthsinletpos=ref0inleterror?(fatal=false)message=raise(Unescape_error(fatal,!pos,message))inletconsume()=leti=!posinifi=lenthenerror"unexpectedly reached end of string";letc=s.[i]inpos:=i+1;cinletres=Buffer.createleninletemitc=Buffer.add_charrescinletemit_codecode=matchChar.of_intcodewith|Somec->emitc|None->error~fatal:true(Printf.sprintf"got invalid escape code %d"code)inletrecloop()=if!pos<lenthen(letc=consume()inifc<>'\\'thenemitcelse(letmark=!posintryletc=consume()inmatchcwith|'\\'|'\"'->emitc|'b'->emit'\b'|'n'->emit'\n'|'r'->emit'\r'|'t'->emit'\t'|'\n'->letrecconsume_blank()=if!pos<lenthen(matchconsume()with|' '|'\t'->consume_blank()|_->decrpos)inconsume_blank()|'x'->letc2hexc=ifc>='A'&&c<='F'thenChar.to_intc+10-Char.to_int'A'elseifc>='a'&&c<='f'thenChar.to_intc+10-Char.to_int'a'elseifc>='0'&&c<='9'thenChar.to_intc-Char.to_int'0'elseerror(Printf.sprintf"expected hex digit, got: %c"c)inletc1=consume()inletc2=consume()inemit_code((16*c2hexc1)+c2hexc2)|cwhenChar.is_digitc->letchar_to_numc=matchChar.get_digitcwith|None->error(Printf.sprintf"expected digit,got: %c"c)|Somei->iinleti1=char_to_numcinleti2=char_to_num(consume())inleti3=char_to_num(consume())inemit_code((100*i1)+(10*i2)+i3)|c->error(Printf.sprintf"got invalid escape character: %c"c)with|Unescape_error(false,_,_)whennotstrict->emit'\\';pos:=mark);loop())elseBuffer.contentsresinloop();;letunescaped?stricts=tryunescaped'?strictswith|Unescape_error(_,pos,message)->invalid_argf"String_extended.unescaped error at position %d of %s: %s"possmessage();;letunescaped_res?stricts=tryResult.Ok(unescaped'?stricts)with|Unescape_error(_,pos,message)->Result.Error(pos,message);;letsqueezestr=letlen=String.lengthstrinletbuf=Buffer.createleninletrecskip_spacesi=ifi>=lenthenBuffer.contentsbufelse(letc=str.[i]inifc=' '||c='\n'||c='\t'||c='\r'thenskip_spaces(i+1)else(Buffer.add_charbufc;copy_chars(i+1)))andcopy_charsi=ifi>=lenthenBuffer.contentsbufelse(letc=str.[i]inifc=' '||c='\n'||c='\t'||c='\r'then(Buffer.add_charbuf' ';skip_spaces(i+1))else(Buffer.add_charbufc;copy_chars(i+1)))incopy_chars0;;letpad_right?(char=' ')sl=letsrc_len=String.lengthsinifsrc_len>=lthenselse(letres=Bytes.createlinBytes.From_string.blit~src:s~dst:res~src_pos:0~dst_pos:0~len:src_len;Bytes.fill~pos:src_len~len:(l-src_len)reschar;Bytes.unsafe_to_string~no_mutation_while_string_reachable:res);;letpad_left?(char=' ')sl=letsrc_len=String.lengthsinifsrc_len>=lthenselse(letres=Bytes.createlinBytes.From_string.blit~src:s~dst:res~src_pos:0~dst_pos:(l-src_len)~len:src_len;Bytes.fill~pos:0~len:(l-src_len)reschar;Bytes.unsafe_to_string~no_mutation_while_string_reachable:res);;letline_break~lens=letbuf=Buffer.createleninletflush_buf()=letres=Buffer.contentsbufinBuffer.resetbuf;resinletrecloopacc=function|[]->letacc=ifBuffer.lengthbuf<>0thenflush_buf()::accelseifacc=[]then[""]elseaccinList.revacc|h::twhenBuffer.lengthbuf=0->Buffer.add_stringbufh;loopacct|h::twhenBuffer.lengthbuf+1+String.lengthh<len->Buffer.add_charbuf' ';Buffer.add_stringbufh;loopacct|l->loop(flush_buf()::acc)linList.concat_map(String.split~on:'\n's)~f:(funs->loop[](String.split~on:' 's));;(* Finds out where to break a given line; returns the len of the line to break
and the staring position of the next line.*)letrecword_wrap__break_one~hard_limit~soft_limit~previous_matchs~pos~len=ifpos=String.lengthsthenlen,poselseifprevious_match>0&&len>=soft_limitthenprevious_match,pos-len+previous_match+1elseiflen>=hard_limitthenlen,poselse(matchs.[pos]with(* Detect \r\n as one newline and not two... *)|'\r'whenpos<String.lengths-1&&s.[pos+1]='\n'->len,pos+2|'\r'|'\n'->len,pos+1|' '|'\t'->word_wrap__break_ones~hard_limit~soft_limit~previous_match:len~pos:(pos+1)~len:(len+1)|_->word_wrap__break_ones~previous_match~hard_limit~soft_limit~pos:(pos+1)~len:(len+1));;(* Returns an pos*length list of all the lines (as substrings of the argument
passed in) *)letrecword_wrap__find_substrings~hard_limit~soft_limitsaccpos=ifpos<String.lengthsthen(letlen,new_pos=word_wrap__break_ones~hard_limit~soft_limit~previous_match:0~pos~len:0inword_wrap__find_substrings~hard_limit~soft_limits((pos,len)::acc)new_pos)elseacc;;letword_wrap?(trailing_nl=false)?(soft_limit=80)?(hard_limit=Int.max_value)?(nl="\n")s=letsoft_limit=minsoft_limithard_limitinletlines=word_wrap__find_substrings~soft_limit~hard_limits[]0inmatchlineswith|[]|[_]->iftrailing_nlthens^nlelses|(hpos,hlen)::t->letnl_len=String.lengthnlinletbody_len=List.fold_leftt~f:(funacc(_,len)->acc+nl_len+len)~init:0inletres_len=iftrailing_nlthenbody_len+hlen+nl_lenelsebody_len+hleninletres=Bytes.createres_leniniftrailing_nlthenBytes.From_string.blit~src:nl~dst:res~len:nl_len~src_pos:0~dst_pos:(body_len+hlen);Bytes.From_string.blit~src:s~dst:res~len:hlen~src_pos:hpos~dst_pos:body_len;letrecblit_loopdst_end_pos=function|[]->()|(src_pos,len)::rest->letdst_pos=dst_end_pos-len-nl_leninBytes.From_string.blit~src:s~dst:res~len~src_pos~dst_pos;Bytes.From_string.blit~src:nl~dst:res~len:nl_len~src_pos:0~dst_pos:(dst_pos+len);blit_loopdst_posrestinblit_loopbody_lent;Bytes.unsafe_to_string~no_mutation_while_string_reachable:res;;letis_substring_deprecated~substring:needlehaystack=(* 2014-10-29 mbac: a recent release of Core introduced a fast and less surprising
version of KMP. Everyone should use that. This function is simply here to maintain
bug compatibiltiy with the original pure-ML version of f is_substring that used
to be here. *)ifString.lengthneedle=0thenifString.lengthhaystack=0thenfalseelseinvalid_arg"index out of bounds"elseCore_kernel.String.is_substring~substring:needlehaystack;;let%test_=is_substring_deprecated~substring:"foo""foo"let%test_=not(is_substring_deprecated~substring:"""")let%test_=(* For bug compatibility with the ML version that used to be here *)tryignore(is_substring_deprecated~substring:"""foo");assertfalse(* should not be reachable *)with|Invalid_argument_->true;;let%test_=not(is_substring_deprecated~substring:"foo""")let%test_=is_substring_deprecated~substring:"bar""foobarbaz"let%test_=not(is_substring_deprecated~substring:"Z""z")let%test_=not(is_substring_deprecated~substring:"store""video stapler")let%test_=not(is_substring_deprecated~substring:"sandwich""apple")let%test_=is_substring_deprecated~substring:"z""abc\x00z"letedit_distance_matrix?transposes1s2=lettranspose=Option.is_sometransposeinletl1,l2=String.lengths1,String.lengths2inletd=Array.make_matrix0~dimx:(l1+1)~dimy:(l2+1)inforx=0tol1dod.(x).(0)<-xdone;fory=0tol2dod.(0).(y)<-ydone;fory=1tol2doforx=1tol1doletmin_d=ifs1.[x-1]=s2.[y-1]thend.(x-1).(y-1)elseList.reduce_exn~f:min[d.(x-1).(y)+1;d.(x).(y-1)+1;d.(x-1).(y-1)+1]inletmin_d=iftranspose&&x>1&&y>1&&s1.[x-1]=s2.[y-2]&&s1.[x-2]=s2.[y-1]thenminmin_d(d.(x-2).(y-2)+1)elsemin_dind.(x).(y)<-min_ddonedone;d;;letedit_distance?transposes1s2=(edit_distance_matrix?transposes1s2).(String.lengths1).(String.lengths2);;let%test_=edit_distance""""=0let%test_=edit_distance"stringStringString""stringStringString"=0let%test_=edit_distance"ocaml""coaml"=2let%test_=edit_distance~transpose:()"ocaml""coaml"=1let%test_=edit_distance"sitting""kitten"=3let%test_=edit_distance~transpose:()"sitting""kitten"=3let%test_=edit_distance"abcdef""1234567890"=10let%test_=edit_distance"foobar""fubahr"=3let%test_=edit_distance"hylomorphism""zylomorphism"=1