Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file parse_js.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409(* Yoann Padioleau
*
* Copyright (C) 2010, 2013 Facebook
* Copyright (C) 2019 Yoann Padioleau
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonmoduleFlag=Flag_parsingmoduleFlag_js=Flag_parsing_jsmoduleAst=Cst_jsmoduleTH=Token_helpers_jsmodulePI=Parse_info(*****************************************************************************)(* Prelude *)(*****************************************************************************)(* Lots of copy paste with my other parsers (e.g. PHP, C, ML) but
* copy paste is sometimes ok.
*)(*****************************************************************************)(* Types *)(*****************************************************************************)(* the token list contains also the comment-tokens *)typeprogram_and_tokens=Cst_js.programoption*Parser_js.tokenlist(*****************************************************************************)(* Error diagnostic *)(*****************************************************************************)leterror_msg_toktok=Parse_info.error_message_info(TH.info_of_toktok)(*****************************************************************************)(* Helpers *)(*****************************************************************************)(* Now that we parse individual items separately, and that we do not
* rely on EOF as a final marker, we need to take care when
* running the parser on the next item. Indeed, certain
* items such as if and try have optional trailing element
* (an else branch, a finally stmt) that forces the parser to
* lookahead and consume an extra token in lexer_function to
* decide to reduce or not the current item.
* Before running the parser on the next item we need to put
* back this consumed token back in the stream!
*
* alt:
* - use ii_of_any and check if tr.current is in it
* WARNING!: this requires that the last token of what is
* parsed is in the CST! otherwise this will reintroduce in
* the stream an extra token so take care!
* - match for If without else and Try without Finally in AST
* (only cases?)
* - less: ask on caml list if can access parser state?
* but Parsing.parser_env is abstract and no much API around it.
*
* see also top comment in tests/js/items.js
*)letput_back_lookahead_token_if_neededtritem_opt=matchitem_optwith|None->()|Someitem->letiis=Lib_parsing_js.ii_of_any(Ast.Program[item])inletcurrent=tr.PI.currentinletinfo=TH.info_of_tokcurrentin(* bugfix: without test on is_origintok, the parser timeout
* TODO: why?
*)ifnot(PI.is_origintokinfo)||List.meminfoiisthen()elsebeginif!Flag.debug_lexerthenpr2(spf"putting back %s"(Common.dumpcurrent));tr.PI.rest<-current::tr.PI.rest;tr.PI.passed<-List.tltr.PI.passed;end(*****************************************************************************)(* ASI (Automatic Semicolon Insertion) part 2 *)(*****************************************************************************)(* To get the right to do an ASI, the parse error needs to be
* on a new line. In some cases though the current offending token might
* not be the first token on the line. Indeed in
* if(true) continue
* x = y;
* the parser does not generate a parse error at 'x' but at '=' because
* 'continue' can accept an identifier.
* In fact, the situation is worse, because for
* if(true) continue
* x;
* we must generate two independent statements, even though it does
* look like a parse error. To handle those cases we
* need a parsing_hack phase that inserts semicolon after the
* continue if there is a newline after.
*)letrecline_previous_tokxs=matchxswith|[]->None|x::xs->ifTH.is_commentxthenline_previous_tokxselseSome(TH.line_of_tokx)letasi_opportunitycharposlast_charpos_errorcurtr=matchtr.PI.passedwith|_whencharpos<=!last_charpos_error->None|[]->None(* see tests/js/parsing/asi_incr_bis.js *)|offending::((Parser_js.T_INCR_|Parser_js.T_DECR_)asreal_offender)::xs->(matchline_previous_tokxs,offendingwith|Someline,_whenTH.line_of_tokreal_offender>line->Some([offending],real_offender,xs)|_,(Parser_js.T_RCURLY_|Parser_js.EOF_)->Some([],offending,(real_offender::xs))|_->None)|offending::xs->(matchline_previous_tokxs,curwith|Someline,_whenTH.line_of_tokcur>line->Some([],offending,xs)|_,(Parser_js.T_RCURLY_|Parser_js.EOF_)->Some([],offending,xs)|_->None)letasi_insertcharposlast_charpos_errortr(passed_before,passed_offending,passed_after)=letinfo=TH.info_of_tokpassed_offendinginletvirtual_semi=Parser_js.T_VIRTUAL_SEMICOLON(Ast.fakeInfoAttachinfo)inif!Flag_js.debug_asithenpr2(spf"insertion fake ';' at %s"(PI.string_of_infoinfo));lettoks=List.revpassed_after@[virtual_semi;passed_offending]@passed_before@tr.PI.restin(* like in Parse_info.mk_tokens_state *)tr.PI.rest<-toks;tr.PI.current<-List.hdtoks;tr.PI.passed<-[];(* try again!
* This significantly slow-down parsing, especially on minimized
* files. Indeed, minimizers put all the code inside a giant
* function, which means no incremental parsing, and leverage ASI
* before right curly brace to save one character (hmmm). This means
* that we parse again and again the same series of tokens, just
* progressing a bit more everytime, and restarting from scratch.
* This is quadratic behavior.
*)last_charpos_error:=charpos(*****************************************************************************)(* Lexing only *)(*****************************************************************************)lettokens2file=lettable=Parse_info.full_charpos_to_pos_largefileinCommon.with_open_infilefile(funchan->letlexbuf=Lexing.from_channelchaninLexer_js.reset();letjstokenlexbuf=matchLexer_js.current_mode()with|Lexer_js.ST_IN_CODE->Lexer_js.initiallexbuf|Lexer_js.ST_IN_XHP_TAGcurrent_tag->Lexer_js.st_in_xhp_tagcurrent_taglexbuf|Lexer_js.ST_IN_XHP_TEXTcurrent_tag->Lexer_js.st_in_xhp_textcurrent_taglexbuf|Lexer_js.ST_IN_BACKQUOTE->Lexer_js.backquotelexbufinletrectokens_auxacc=lettok=jstokenlexbufinif!Flag.debug_lexerthenCommon.pr2_gentok;ifnot(TH.is_commenttok)thenLexer_js._last_non_whitespace_like_token:=Sometok;lettok=tok+>TH.visitor_info_of_tok(funii->{iiwithPI.token=(* could assert pinfo.filename = file ? *)matchii.PI.tokenwith|PI.OriginTokpi->PI.OriginTok(PI.complete_token_location_largefiletablepi)|PI.FakeTokStr_|PI.Ab|PI.ExpandedTok_->raiseImpossible})inifTH.is_eoftokthenList.rev(tok::acc)elsetokens_aux(tok::acc)intokens_aux[])lettokensa=Common.profile_code"Parse_js.tokens"(fun()->tokens2a)(*****************************************************************************)(* Helper for main entry point *)(*****************************************************************************)(* Hacked lex. This function use refs passed by parse.
* 'tr' means 'token refs'.
*)letreclexer_functiontr=funlexbuf->matchtr.PI.restwith|[]->(pr2"LEXER: ALREADY AT END";tr.PI.current)|v::xs->tr.PI.rest<-xs;tr.PI.current<-v;tr.PI.passed<-v::tr.PI.passed;ifTH.is_commentv(* || other condition to pass tokens ? *)thenlexer_function(*~pass*)trlexbufelsev(*****************************************************************************)(* Main entry point *)(*****************************************************************************)exceptionParse_errorofParse_info.infoletparse2filename=letstat=PI.default_statfilenameinlettoks=tokensfilenameinlettoks=Parsing_hacks_js.fix_tokenstoksinlettoks=Parsing_hacks_js.fix_tokens_ASItoksinlettr=PI.mk_tokens_statetoksinletlast_charpos_error=ref0inletlexbuf_fake=Lexing.from_function(fun_buf_n->raiseImpossible)inSys.set_signalSys.sigalrm(Sys.Signal_handle(fun_->raiseTimeout));(* todo: minimized files abusing ASI before '}' requires a very long time
* to parse
*)ignore(Unix.alarm5);letrecparse_module_item_or_eoftr=tryletitem=(* -------------------------------------------------- *)(* Call parser *)(* -------------------------------------------------- *)Common.profile_code"Parser_js.module_item"(fun()->Parser_js.module_item_or_eof(lexer_functiontr)lexbuf_fake)in(* this seems optional *)Parsing.clear_parser();put_back_lookahead_token_if_neededtritem;Leftitemwith|Lexer_js.Lexical_error(s,_)->letcur=tr.PI.currentinif!Flag.show_parsing_errorthenpr2("lexical error "^s^"\n ="^error_msg_tokcur);Rightcur|Parsing.Parse_error->letcur=tr.PI.currentinletinfo=TH.info_of_tokcurinletcharpos=Parse_info.pos_of_infoinfoin(* try Automatic Semicolon Insertion *)(matchasi_opportunitycharposlast_charpos_errorcurtrwith|None->if!Flag.show_parsing_errorthenpr2("parse error \n = "^error_msg_tokcur);Rightcur|Some(passed_before,passed_offending,passed_after)->asi_insertcharposlast_charpos_errortr(passed_before,passed_offending,passed_after);parse_module_item_or_eoftr)inletrecauxtr=letline_start=TH.line_of_toktr.PI.currentinletres=parse_module_item_or_eoftrinletpassed=tr.PI.passedintr.PI.passed<-[];letlines=trylet(head,_middle,last)=Common2.head_middle_tailpassedinletline1=TH.line_of_toklastinletline2=TH.line_of_tokheadinline2-line1(* +1? *)with_->1inmatchreswith(* EOF *)|LeftNone->stat.PI.correct<-stat.PI.correct+lines;[]|Left(Somex)->stat.PI.correct<-stat.PI.correct+lines;if!Flag_js.debug_asithenpr2(spf"parsed: %s"(Ast.Program[x]|>Meta_cst_js.vof_any|>Ocaml.string_of_v));x::auxtr|Righterr_tok->letmax_line=Common.catfilename+>List.lengthinif!Flag.show_parsing_errorthenbeginletfilelines=Common2.cat_arrayfilenameinletcur=tr.PI.currentinletline_error=TH.line_of_tokcurinPI.print_badline_error(line_start,minmax_line(line_error+10))filelines;end;if!Flag.error_recoverythenbegin(* todo? try to recover? call 'aux tr'? but then can be really slow *)stat.PI.bad<-stat.PI.bad+(max_line-line_start);[]endelseraise(Parse_error(TH.info_of_tokerr_tok))inletitems=tryauxtrwithTimeout->ignore(Unix.alarm0);if!Flag.show_parsing_errorthenpr2(spf"TIMEOUT on %s"filename);stat.PI.bad<-Common.catfilename|>List.length;stat.PI.have_timeout<-true;stat.PI.correct<-0;[]inignore(Unix.alarm0);(* the correct count is accurate because items do not fall always
* on clean line boundaries so we may count multiple times the same line
*)ifstat.PI.bad=0thenstat.PI.correct<-Common.catfilename|>List.length;(Someitems,toks),statletparsea=Common.profile_code"Parse_js.parse"(fun()->parse2a)letparse_programfile=let((astopt,_toks),_stat)=parsefileinCommon2.someastoptletparse_string(w:string):Ast.program=Common2.with_tmp_file~str:w~ext:"js"parse_program(*****************************************************************************)(* Sub parsers *)(*****************************************************************************)let(program_of_string:string->Cst_js.program)=funs->Common2.with_tmp_file~str:s~ext:"js"(funfile->parse_programfile)(* for sgrep/spatch *)letany_of_strings=Common2.with_tmp_file~str:s~ext:"js"(funfile->lettoks=tokensfileinlettr=PI.mk_tokens_statetoksinletlexbuf_fake=Lexing.from_function(fun_buf_n->raiseImpossible)in(* -------------------------------------------------- *)(* Call parser *)(* -------------------------------------------------- *)Parser_js.sgrep_spatch_pattern(lexer_functiontr)lexbuf_fake)(*****************************************************************************)(* Fuzzy parsing *)(*****************************************************************************)letparse_fuzzyfile=lettoks=tokensfileinlettrees=Parse_fuzzy.mk_trees{Parse_fuzzy.tokf=TH.info_of_tok;kind=TH.token_kind_of_tok;}toksintrees,toks