Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file CCParse.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535(* This file is free software. See file "license" for more details. *)(** {1 Very Simple Parser Combinators} *)openCCShims_(*$inject
module T = struct
type tree = L of int | N of tree * tree
end
open T
open Result
let mk_leaf x = L x
let mk_node x y = N(x,y)
let ptree = fix @@ fun self ->
skip_space *>
( (try_ (char '(') *> (pure mk_node <*> self <*> self) <* char ')')
<|>
(U.int >|= mk_leaf) )
let ptree' = fix_memo @@ fun self ->
skip_space *>
( (try_ (char '(') *> (pure mk_node <*> self <*> self) <* char ')')
<|>
(U.int >|= mk_leaf) )
let rec pptree = function
| N (a,b) -> Printf.sprintf "N (%s, %s)" (pptree a) (pptree b)
| L x -> Printf.sprintf "L %d" x
let errpptree = function
| Ok x -> "Ok " ^ pptree x
| Error s -> "Error " ^ s
*)(*$= & ~printer:errpptree
(Ok (N (L 1, N (L 2, L 3)))) \
(parse_string ptree "(1 (2 3))" )
(Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
(parse_string ptree "((1 2) (3 (4 5)))" )
(Ok (N (L 1, N (L 2, L 3)))) \
(parse_string ptree' "(1 (2 3))" )
(Ok (N (N (L 1, L 2), N (L 3, N (L 4, L 5))))) \
(parse_string ptree' "((1 2) (3 (4 5)))" )
*)(*$R
let p = U.list ~sep:"," U.word in
let printer = function
| Ok l -> "Ok " ^ CCFormat.(to_string (list string)) l
| Error s -> "Error " ^ s
in
assert_equal ~printer
(Ok ["abc"; "de"; "hello"; "world"])
(parse_string p "[abc , de, hello ,world ]");
*)(*$R
let test n =
let p = CCParse.(U.list ~sep:"," U.int) in
let l = CCList.(1 -- n) in
let l_printed =
CCFormat.(to_string (within "[" "]" (list ~sep:(return ",") int))) l in
let l' = CCParse.parse_string_exn p l_printed in
assert_equal ~printer:Q.Print.(list int) l l'
in
test 300_000;
*)(*$R
let open CCParse.Infix in
let module P = CCParse in
let parens p = P.try_ (P.char '(') *> p <* P.char ')' in
let add = P.char '+' *> P.return (+) in
let sub = P.char '-' *> P.return (-) in
let mul = P.char '*' *> P.return ( * ) in
let div = P.char '/' *> P.return ( / ) in
let integer =
P.chars1_if (function '0'..'9'->true|_->false) >|= int_of_string in
let chainl1 e op =
P.fix (fun r ->
e >>= fun x -> P.try_ (op <*> P.return x <*> r) <|> P.return x) in
let expr : int P.t =
P.fix (fun expr ->
let factor = parens expr <|> integer in
let term = chainl1 factor (mul <|> div) in
chainl1 term (add <|> sub)) in
assert_equal (Ok 6) (P.parse_string expr "4*1+2");
assert_equal (Ok 12) (P.parse_string expr "4*(1+2)");
()
*)type'aor_error=('a,string)resulttypeline_num=inttypecol_num=intmoduleMemoTbl=structmoduleH=Hashtbl.Make(structtypet=int*int(* id of parser, position *)letequal((a,b):t)(c,d)=a=c&&b=dlethash=Hashtbl.hashend)(* table of closures, used to implement universal type *)typet=(unit->unit)H.tlazy_tletcreaten=lazy(H.createn)(* unique ID for each parser *)letid_=ref0type'ares=|Failofexn|Okof'aendtypeposition=int*int*int(* pos, line, column *)typeparse_branch=(line_num*col_num*stringoption)listtypestate={str:string;(* the input *)mutablei:int;(* offset *)mutablelnum:line_num;(* Line number *)mutablecnum:col_num;(* Column number *)mutablebranch:parse_branch;memo:MemoTbl.t;(* Memoization table, if any *)}exceptionParseErrorofparse_branch*(unit->string)letchar_equal(a:char)b=Stdlib.(=)abletstring_equal(a:string)b=Stdlib.(=)abletrecstring_of_branchl=letpp_s()=function|None->""|Somes->Format.sprintf"while parsing %s, "sinmatchlwith|[]->""|[l,c,s]->Format.sprintf"@[%aat line %d, col %d@]"pp_sslc|(l,c,s)::tail->Format.sprintf"@[%aat line %d, col %d@]@,%s"pp_sslc(string_of_branchtail)let()=Printexc.register_printer(function|ParseError(b,msg)->Some(Format.sprintf"@[<v>%s@ %s@]"(msg())(string_of_branchb))|_->None)letconst_x()=xletstate_of_stringstr=lets={str;i=0;lnum=1;cnum=1;branch=[];memo=MemoTbl.create32;}insletis_donest=st.i=String.lengthst.strletcurst=st.str.[st.i]letfail_~errstmsg=letb=(st.lnum,st.cnum,None)::st.branchinerr(ParseError(b,msg))letnextst~ok~err=ifst.i=String.lengthst.strthenfail_~errst(const_"unexpected end of input")else(letc=st.str.[st.i]inst.i<-st.i+1;ifchar_equalc'\n'then(st.lnum<-st.lnum+1;st.cnum<-1)elsest.cnum<-st.cnum+1;okc)letposst=st.i,st.lnum,st.cnumletbacktrackst(i',l',c')=assert(0<=i'&&i'<=st.i);st.i<-i';st.lnum<-l';st.cnum<-c';()type'at=state->ok:('a->unit)->err:(exn->unit)->unitletreturn:'a->'at=funx_st~ok~err:_->okxletpure=returnlet(>|=):'at->('a->'b)->'bt=funpfst~ok~err->pst~ok:(funx->ok(fx))~errlet(>>=):'at->('a->'bt)->'bt=funpfst~ok~err->pst~ok:(funx->fxst~ok~err)~errlet(<*>):('a->'b)t->'at->'bt=funfxst~ok~err->fst~ok:(funf'->xst~ok:(funx'->ok(f'x'))~err)~errlet(<*):'at->_t->'at=funxyst~ok~err->xst~ok:(funres->yst~ok:(fun_->okres)~err)~errlet(*>):_t->'at->'at=funxyst~ok~err->xst~ok:(fun_->yst~ok~err)~errletmapfx=x>|=fletmap2fxy=puref<*>x<*>yletmap3fxyz=puref<*>x<*>y<*>zletjunk_st=nextst~ok:ignore~err:(fun_->assertfalse)leteoist~ok~err=ifis_donestthenok()elsefail_~errst(const_"expected EOI")letfailmsgst~ok:_~err=fail_~errst(const_msg)letfailfmsg=Printf.ksprintffailmsgletparsingspst~ok~err=st.branch<-(st.lnum,st.cnum,Somes)::st.branch;pst~ok:(funx->st.branch<-List.tlst.branch;okx)~err:(fune->st.branch<-List.tlst.branch;erre)letnop_~ok~err:_=ok()letcharc=letmsg=Printf.sprintf"expected '%c'"cinfunst~ok~err->nextst~ok:(func'->ifchar_equalcc'thenokcelsefail_~errst(const_msg))~errletchar_ifpst~ok~err=nextst~ok:(func->ifpcthenokcelsefail_~errst(fun()->Printf.sprintf"unexpected char '%c'"c))~errletchars_ifpst~ok~err:_=leti=st.iinletlen=ref0inwhilenot(is_donest)&&p(curst)dojunk_st;incrlendone;ok(String.subst.stri!len)letchars1_ifpst~ok~err=chars_ifpst~ok:(funs->ifstring_equals""thenfail_~errst(const_"unexpected sequence of chars")elseoks)~errletrecskip_charspst~ok~err=ifnot(is_donest)&&p(curst)then(junk_st;skip_charspst~ok~err)elseok()letis_alpha=function|'a'..'z'|'A'..'Z'->true|_->falseletis_num=function'0'..'9'->true|_->falseletis_alpha_num=function|'a'..'z'|'A'..'Z'|'0'..'9'->true|_->falseletis_space=function' '|'\t'->true|_->falseletis_white=function' '|'\t'|'\n'->true|_->falseletspace=char_ifis_spaceletwhite=char_ifis_whiteletendlinest~ok~err=nextst~ok:(function|'\n'asc->okc|_->fail_~errst(const_"expected end-of-line"))~errletskip_space=skip_charsis_spaceletskip_white=skip_charsis_whitelet(<|>):'at->'at->'at=funxyst~ok~err->leti=st.iinxst~ok~err:(fune->letj=st.iinifi=jthenyst~ok~err(* try [y] *)elseerre(* fail *))lettry_:'at->'at=funpst~ok~err->leti=posstinpst~ok~err:(fune->backtracksti;erre)letsuspendfst~ok~err=f()st~ok~errlet(<?>):'at->string->'at=funxmsgst~ok~err->leti=st.iinxst~ok~err:(fune->ifst.i=ithenfail_~errst(fun()->msg)elseerre)letstringsst~ok~err=letrecchecki=ifi=String.lengthsthenokselsenextst~ok:(func->ifchar_equalcs.[i]thencheck(i+1)elsefail_~errst(fun()->Printf.sprintf"expected \"%s\""s))~errincheck0letrecmany_rec:'at->'alist->'alistt=funpaccst~ok~err->ifis_donestthenok(List.revacc)elsepst~ok:(funx->leti=posstinletacc=x::accinmany_recpaccst~ok~err:(fun_->backtracksti;ok(List.revacc)))~errletmany:'at->'alistt=funpst~ok~err->many_recp[]st~ok~err(*$R
let p0 = skip_white *> U.int in
let p = (skip_white *> char '(' *> many p0) <* try_ (skip_white <* char ')') in
let printer = CCFormat.(to_string @@ Dump.result @@ Dump.list int) in
assert_equal ~printer
(Ok [1;2;3]) (parse_string p "(1 2 3)");
assert_equal ~printer
(Ok [1;2; -30; 4]) (parse_string p "( 1 2 -30 4 )")
*)letmany1:'at->'alistt=funpst~ok~err->pst~ok:(funx->many_recp[x]st~ok~err)~errletrecskippst~ok~err=leti=posstinpst~ok:(fun_->skippst~ok~err)~err:(fun_->backtracksti;ok())(* by (sep1 ~by p) *)letrecsep_rec~byp=try_by*>sep1~bypandsep1~byp=p>>=funx->(sep_rec~byp>|=funtl->x::tl)<|>return[x]letsep~byp=(try_p>>=funx->(sep_rec~byp>|=funtl->x::tl)<|>return[x])<|>return[]letfixf=letrecpst~ok~err=fpst~ok~errinpletmemo(typea)(p:at):at=letid=!MemoTbl.id_inincrMemoTbl.id_;letr=refNonein(* used for universal encoding *)funst~ok~err->leti=st.iinlet(lazytbl)=st.memointryletf=MemoTbl.H.findtbl(i,id)in(* extract hidden value *)r:=None;f();beginmatch!rwith|None->assertfalse|Some(MemoTbl.Okx)->okx|Some(MemoTbl.Faile)->erreendwithNot_found->(* parse, and save *)pst~ok:(funx->MemoTbl.H.replacetbl(i,id)(fun()->r:=Some(MemoTbl.Okx));okx)~err:(fune->MemoTbl.H.replacetbl(i,id)(fun()->r:=Some(MemoTbl.Faile));erre)letfix_memof=letrecp=letp'=lazy(memop)infunst~ok~err->f(Lazy.forcep')st~ok~errinpletget_lnum=funst~ok~err:_->okst.lnumletget_cnum=funst~ok~err:_->okst.cnumletget_pos=funst~ok~err:_->ok(st.lnum,st.cnum)letparse_exnpst=letres=refNoneinpst~ok:(funx->res:=Somex)~err:(fune->raisee);match!reswith|None->assertfalse|Somex->xletexn_to_erre=Error(Printexc.to_stringe)letparsepst=tryOk(parse_exnpst)withe->exn_to_erreletparse_string_exnps=parse_exnp(state_of_strings)letparse_stringps=parsep(state_of_strings)letread_all_ic=letbuf=Buffer.create1024inbegintrywhiletruedoletline=input_lineicinBuffer.add_stringbufline;Buffer.add_charbuf'\n';done;assertfalsewithEnd_of_file->()end;Buffer.contentsbufletparse_file_exnpfile=letic=open_infileinletst=state_of_string(read_all_ic)intryletres=parse_exnpstinclose_inic;reswithe->close_in_noerric;raiseeletparse_filepfile=tryOk(parse_file_exnpfile)withe->exn_to_erremoduleInfix=structlet(>|=)=(>|=)let(>>=)=(>>=)let(<*>)=(<*>)let(<*)=(<*)let(*>)=(*>)let(<|>)=(<|>)let(<?>)=(<?>)endmoduleU=structletsep_=sepletlist?(start="[")?(stop="]")?(sep=";")p=stringstart*>skip_white*>sep_~by:(skip_white*>stringsep*>skip_white)p<*skip_white<*stringstopletint=chars1_if(func->is_numc||char_equalc'-')>>=funs->tryreturn(int_of_strings)withFailure_->fail"expected an int"letprepend_strcs=String.make1c^sletword=map2prepend_str(char_ifis_alpha)(chars_ifis_alpha_num)letpair?(start="(")?(stop=")")?(sep=",")p1p2=stringstart*>skip_white*>p1>>=funx1->skip_white*>stringsep*>skip_white*>p2>>=funx2->stringstop*>return(x1,x2)lettriple?(start="(")?(stop=")")?(sep=",")p1p2p3=stringstart*>skip_white*>p1>>=funx1->skip_white*>stringsep*>skip_white*>p2>>=funx2->skip_white*>stringsep*>skip_white*>p3>>=funx3->stringstop*>return(x1,x2,x3)endincludeCCShimsMkLet_.Make(structtypenonrec'at='atincludeInfixletmonoid_producta1a2=pure(funxy->x,y)<*>a1<*>a2end)