Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file parse_php.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511(* Yoann Padioleau
*
* Copyright (C) 2009-2011 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonmoduleAst=Cst_phpmoduleFlag=Flag_parsingmoduleFlag_php=Flag_parsing_phpmoduleTH=Token_helpers_phpmodulePI=Parse_info(*****************************************************************************)(* Prelude *)(*****************************************************************************)(* A PHP parser.
*
* related work:
* - miamide, also in ocaml, but didn't support all of PHP
* - https://github.com/sfindeisen/phphard, also written in ocaml, but
* seems pretty rudimentary
*)(*****************************************************************************)(* Types *)(*****************************************************************************)typeprogram_with_comments=Cst_php.program*Parser_php.tokenlist(*****************************************************************************)(* Error diagnostic *)(*****************************************************************************)leterror_msg_toktok=PI.error_message_info(TH.info_of_toktok)(*****************************************************************************)(* Lexing only *)(*****************************************************************************)lettokens_from_changen?(init_state=Lexer_php.INITIAL)changen=lettable=PI.full_charpos_to_pos_large_from_changenchangeninlet(chan,_,file)=changen()inCommon.finalize(fun()->letlexbuf=Lexing.from_channelchaninLexer_php.reset();Lexer_php._mode_stack:=[init_state];tryletphptokenlexbuf=(* for yyless emulation *)match!Lexer_php._pending_tokenswith|x::xs->Lexer_php._pending_tokens:=xs;x|[]->(matchLexer_php.current_mode()with|Lexer_php.INITIAL->Lexer_php.initiallexbuf|Lexer_php.ST_IN_SCRIPTING->Lexer_php.st_in_scriptinglexbuf|Lexer_php.ST_IN_SCRIPTING2->Lexer_php.st_in_scriptinglexbuf|Lexer_php.ST_DOUBLE_QUOTES->Lexer_php.st_double_quoteslexbuf|Lexer_php.ST_BACKQUOTE->Lexer_php.st_backquotelexbuf|Lexer_php.ST_LOOKING_FOR_PROPERTY->Lexer_php.st_looking_for_propertylexbuf|Lexer_php.ST_LOOKING_FOR_VARNAME->Lexer_php.st_looking_for_varnamelexbuf|Lexer_php.ST_VAR_OFFSET->Lexer_php.st_var_offsetlexbuf|Lexer_php.ST_START_HEREDOCs->Lexer_php.st_start_heredocslexbuf|Lexer_php.ST_START_NOWDOCs->Lexer_php.st_start_nowdocslexbuf(* xhp: *)|Lexer_php.ST_IN_XHP_TAGcurrent_tag->ifnot!Flag_php.xhp_builtinthenraiseImpossible;Lexer_php.st_in_xhp_tagcurrent_taglexbuf|Lexer_php.ST_IN_XHP_TEXTcurrent_tag->ifnot!Flag_php.xhp_builtinthenraiseImpossible;Lexer_php.st_in_xhp_textcurrent_taglexbuf)inletrectokens_auxacc=lettok=phptokenlexbufinif!Flag.debug_lexerthenCommon.pr2_gentok;ifnot(TH.is_commenttok)thenLexer_php._last_non_whitespace_like_token:=Sometok;lettok=tok|>TH.visitor_info_of_tok(funii->{iiwithPI.token=(* could assert pinfo.filename = file ? *)matchii.PI.tokenwith|PI.OriginTokpi->PI.OriginTok(PI.complete_token_location_largefiletablepi)|PI.FakeTokStr_|PI.Ab|PI.ExpandedTok_->raiseImpossible})inifTH.is_eoftokthenList.rev(tok::acc)elsetokens_aux(tok::acc)intokens_aux[]with|Lexer_php.Lexicals->failwith("lexical error "^s^"\n ="^(PI.error_messagefile(PI.lexbuf_to_strposlexbuf)))|e->raisee)(fun()->close_inchan)lettokens2?init_state=PI.file_wrap_changen(tokens_from_changen?init_state)lettokens?init_statea=Common.profile_code"Parse_php.tokens"(fun()->tokens2?init_statea)(*****************************************************************************)(* Helper for main entry point *)(*****************************************************************************)(* Hacked lex. This function use refs passed by parse.
* 'tr' means 'token refs'.
*)letreclexer_functiontr=funlexbuf->matchtr.PI.restwith|[]->(pr2"LEXER: ALREADY AT END";tr.PI.current)|v::xs->tr.PI.rest<-xs;tr.PI.current<-v;tr.PI.passed<-v::tr.PI.passed;ifTH.is_commentv||(* TODO a little bit specific to FB ? *)(matchvwith|Parser_php.T_OPEN_TAG_->true|Parser_php.T_CLOSE_TAG_->true|_->false)thenlexer_function(*~pass*)trlexbufelsev(*****************************************************************************)(* Main entry point *)(*****************************************************************************)(* could move that in h_program-lang/, but maybe clearer to put it closer
* to the parsing function.
*)exceptionParse_errorofPI.tletparse2?(pp=(!Flag_php.pp_default))filename=letorig_filename=filenameinletfilename=(* note that now that pfff support XHP constructs directly,
* this code is not that needed.
*)matchppwith|None->orig_filename|Somecmd->Common.profile_code"Parse_php.pp_maybe"(fun()->letpp_flag=if!Flag_php.verbose_ppthen"-v"else""in(* The following requires the preprocessor command to
* support the -q command line flag.
*
* Maybe a little bit specific to XHP and xhpize ... But
* because I use as a convention that 0 means no_need_pp, if
* the preprocessor does not support -q, it should return an
* error code, in which case we will fall back to the regular
* case. *)letcmd_need_pp=spf"%s -q %s %s"cmdpp_flagfilenameinif!Flag_php.verbose_ppthenpr2(spf"executing %s"cmd_need_pp);letret=Sys.commandcmd_need_ppinifret=0thenorig_filenameelsebeginCommon.profile_code"Parse_php.pp"(fun()->lettmpfile=Common.new_temp_file"pp"".pphp"inletfullcmd=spf"%s %s %s > %s"cmdpp_flagfilenametmpfileinif!Flag_php.verbose_ppthenpr2(spf"executing %s"fullcmd);letret=Sys.commandfullcmdinifret<>0thenfailwith"The preprocessor command returned an error code";tmpfile)end)inletstat=PI.default_statfilenameinletfilelines=Common2.cat_arrayfilenameinlettoks=tokensfilenamein(* note that now that pfff support XHP constructs directly,
* this code is not that needed.
*)lettoks=iffilename=orig_filenamethentokselsePp_php.adapt_tokens_pp~tokenizer:tokens~orig_filenametoksinlettoks=Parsing_hacks_php.fix_tokenstoksinlettr=PI.mk_tokens_statetoksinletcheckpoint=TH.line_of_toktr.PI.currentinletlexbuf_fake=Lexing.from_function(fun_buf_n->raiseImpossible)inletelems=try((* -------------------------------------------------- *)(* Call parser *)(* -------------------------------------------------- *)Left(Common.profile_code"Parser_php.main"(fun()->(Parser_php.main(lexer_functiontr)lexbuf_fake))))withe->letline_error=TH.line_of_toktr.PI.currentinlet_passed_before_error=tr.PI.passedinletcurrent=tr.PI.currentin(* no error recovery, the whole file is discarded *)tr.PI.passed<-List.revtoks;letinfo_of_bads=Common2.map_eff_revTH.info_of_toktr.PI.passedinRight(info_of_bads,line_error,current,e)inmatchelemswith|Leftxs->stat.PI.correct<-(Common.catfilename|>List.length);(xs,toks),stat|Right(info_of_bads,line_error,cur,exn)->ifnot!Flag.error_recoverythenraise(Parse_error(TH.info_of_tokcur));(matchexnwith|Lexer_php.Lexical_|Parsing.Parse_error(*| Semantic_c.Semantic _ *)->()|e->raisee);if!Flag.show_parsing_errorthen(matchexnwith(* Lexical is not anymore launched I think *)|Lexer_php.Lexicals->pr2("lexical error "^s^"\n ="^error_msg_tokcur)|Parsing.Parse_error->pr2("parse error \n = "^error_msg_tokcur)(* | Semantic_java.Semantic (s, i) ->
pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok tr.current)
*)|_e->raiseImpossible);letcheckpoint2=Common.catfilename|>List.lengthinif!Flag_php.show_parsing_error_fullthenPI.print_badline_error(checkpoint,checkpoint2)filelines;stat.PI.bad<-Common.catfilename|>List.length;letinfo_item=(List.revtr.PI.passed)in([Ast.NotParsedCorrectlyinfo_of_bads],info_item),statlet_hmemo_parse_php=Hashtbl.create101letparse_memo?ppfile=ifnot!Flag_php.caching_parsingthenparse2?ppfileelseCommon.memoized_hmemo_parse_phpfile(fun()->Common.profile_code"Parse_php.parse_no_memo"(fun()->parse2?ppfile))letparse?ppa=Common.profile_code"Parse_php.parse"(fun()->parse_memo?ppa)letparse_program?ppfile=let((ast,_toks),_stat)=parse?ppfileinastletast_and_tokensfile=let((ast,toks),_stat)=parsefilein(ast,toks)(*****************************************************************************)(* Sub parsers *)(*****************************************************************************)letparse_any_from_changen(changen:PI.changen)=lettoks=tokens_from_changen~init_state:Lexer_php.ST_IN_SCRIPTINGchangeninlettr=PI.mk_tokens_statetoksinletlexbuf_fake=Lexing.from_function(fun_buf_n->raiseImpossible)intryParser_php.sgrep_spatch_pattern(lexer_functiontr)lexbuf_fakewithexn->letcur=tr.PI.currentinif!Flag.show_parsing_errorthen(matchexnwith(* Lexical is not anymore launched I think *)|Lexer_php.Lexicals->pr2("lexical error "^s^"\n ="^error_msg_tokcur)|Parsing.Parse_error->pr2("parse error \n = "^error_msg_tokcur)(* | Semantic_java.Semantic (s, i) ->
pr2 ("semantic error " ^s^ "\n ="^ error_msg_tok tr.current)
*)|_->raiseexn);raiseexnletparse_any=PI.file_wrap_changenparse_any_from_changen(* any_of_string() allows small chunks of PHP to be parsed without
* having to use the filesystem by leveraging the changen mechanism.
* In order to supply a string as a channel we must create a socket
* pair and write our string to it. This is not ideal and may fail if
* we try to parse too many short strings without closing the channel,
* or if the string is so large that the OS blocks our socket.
*)letany_of_strings=letlen=String.lengthsinletchangen=(fun()->let(socket_a,socket_b)=Unix.(socketpairPF_UNIXSOCK_STREAM0)inletfake_filename=""inlet(data_in,data_out)=Unix.(in_channel_of_descrsocket_a,out_channel_of_descrsocket_b)inoutput_stringdata_outs;flushdata_out;close_outdata_out;(data_in,len,fake_filename))in(* disable showing parsing errors as there is no filename and
* error_msg_tok() would throw a Sys_error exception
*)Common.save_excursionFlag.show_parsing_errorfalse(fun()->parse_any_from_changenchangen)(*
* todo: obsolete now with parse_any ? just redirect to parse_any ?
*
* This function is useful not only to test but also in our own code
* as a shortcut to build complex expressions
*)let(expr_of_string:string->Cst_php.expr)=funs->lettmpfile=Common.new_temp_file"pfff_expr_of_s""php"inCommon.write_filetmpfile("<?php \n"^s^";\n");letast=parse_programtmpfileinletres=(matchastwith|[Ast.StmtList[Ast.ExprStmt(e,_tok)];Ast.FinalDef_]->e|_->failwith"only expr pattern are supported for now")inCommon.erase_this_temp_filetmpfile;res(* It is clearer for our testing code to programmatically build source files
* so that all the information about a test is in the same
* file. You don't have to open extra files to understand the test
* data. This function is useful mostly for our unit tests
*)let(program_of_string:string->Cst_php.program)=funs->lettmpfile=Common.new_temp_file"pfff_expr_of_s""php"inCommon.write_filetmpfile("<?php \n"^s^"\n");letast=parse_programtmpfileinCommon.erase_this_temp_filetmpfile;ast(* use program_of_string when you can *)lettmp_php_file_from_string?(header="<?php\n")s=lettmp_file=Common.new_temp_file"test"".php"inCommon.write_file~file:tmp_file(header^s);tmp_file(* this function is useful mostly for our unit tests *)let(tokens_of_string:string->Parser_php.tokenlist)=funs->lettmpfile=Common.new_temp_file"pfff_tokens_of_s""php"inCommon.write_filetmpfile("<?php \n"^s^"\n");lettoks=tokenstmpfileinCommon.erase_this_temp_filetmpfile;toks(* A fast-path parser of xdebug expressions in xdebug dumpfiles.
* See xdebug.ml *)let(xdebug_expr_of_string:string->Cst_php.expr)=fun_s->(*
let lexbuf = Lexing.from_string s in
let expr = Parser_php.expr basic_lexer_skip_comments lexbuf in
expr
*)raiseTodo(* The default PHP parser function stores position information for all tokens,
* build some Parse_php.info_items for each toplevel entities, and
* do other things which are most of the time useful for some analysis
* but starts to really slow down parsing for huge (generated) PHP files.
* Enters parse_fast() that disables most of those things.
* Note that it may not parse correctly all PHP code, so use with
* caution.
*)letparse_fastfile=letchan=open_infileinletlexbuf=Lexing.from_channelchaninLexer_php.reset();Lexer_php._mode_stack:=[Lexer_php.INITIAL];letrecphp_next_tokenlexbuf=lettok=(* for yyless emulation *)match!Lexer_php._pending_tokenswith|x::xs->Lexer_php._pending_tokens:=xs;x|[]->(matchLexer_php.current_mode()with|Lexer_php.INITIAL->Lexer_php.initiallexbuf|Lexer_php.ST_IN_SCRIPTING->Lexer_php.st_in_scriptinglexbuf|Lexer_php.ST_IN_SCRIPTING2->Lexer_php.st_in_scriptinglexbuf|Lexer_php.ST_DOUBLE_QUOTES->Lexer_php.st_double_quoteslexbuf|Lexer_php.ST_BACKQUOTE->Lexer_php.st_backquotelexbuf|Lexer_php.ST_LOOKING_FOR_PROPERTY->Lexer_php.st_looking_for_propertylexbuf|Lexer_php.ST_LOOKING_FOR_VARNAME->Lexer_php.st_looking_for_varnamelexbuf|Lexer_php.ST_VAR_OFFSET->Lexer_php.st_var_offsetlexbuf|Lexer_php.ST_START_HEREDOCs->Lexer_php.st_start_heredocslexbuf|Lexer_php.ST_START_NOWDOCs->Lexer_php.st_start_nowdocslexbuf|Lexer_php.ST_IN_XHP_TAGcurrent_tag->Lexer_php.st_in_xhp_tagcurrent_taglexbuf|Lexer_php.ST_IN_XHP_TEXTcurrent_tag->Lexer_php.st_in_xhp_textcurrent_taglexbuf)inmatchtokwith|Parser_php.T_COMMENT_|Parser_php.T_DOC_COMMENT_|Parser_php.TSpaces_|Parser_php.TNewline_|Parser_php.TCommentPP_|Parser_php.T_OPEN_TAG_|Parser_php.T_CLOSE_TAG_->php_next_tokenlexbuf|_->tokintryletres=Parser_php.mainphp_next_tokenlexbufinclose_inchan;reswithParsing.Parse_error->pr2(spf"parsing error in php fast parser: %s"(Lexing.lexemelexbuf));raiseParsing.Parse_error