Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file parse_cpp.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507(* Yoann Padioleau
*
* Copyright (C) 2002-2013 Yoann Padioleau
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License (GPL)
* version 2 as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* file license.txt for more details.
*)openCommonmoduleFlag=Flag_parsingmodulePI=Parse_infomoduleStat=Parse_infomoduleFT=File_typemoduleAst=Ast_cppmoduleFlag_cpp=Flag_parsing_cppmoduleT=Parser_cppmoduleTH=Token_helpers_cppmoduleLexer=Lexer_cppmoduleSemantic=Parser_cpp_mly_helper(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* A heuristic based C/cpp/C++ parser.
*
* See "Parsing C/C++ Code without Pre-Preprocessing - Yoann Padioleau, CC'09"
* avalaible at http://padator.org/papers/yacfe-cc09.pdf
*)(*****************************************************************************)(* Types *)(*****************************************************************************)typetoplevels_and_tokens=(Ast.toplevel*Parser_cpp.tokenlist)listletprogram_of_program2xs=xs+>List.mapfstexceptionParse_errorofParse_info.info(*****************************************************************************)(* Wrappers *)(*****************************************************************************)letpr2,_pr2_once=Common2.mk_pr2_wrappersFlag.verbose_parsing(*****************************************************************************)(* Error diagnostic *)(*****************************************************************************)leterror_msg_toktok=Parse_info.error_message_info(TH.info_of_toktok)(*****************************************************************************)(* Stats on what was passed/commentized *)(*****************************************************************************)letcommentizedxs=xs+>Common.map_filter(function|T.TComment_Pp(cppkind,ii)->if!Flag_cpp.filter_classic_passedthen(matchcppkindwith|Token_cpp.CppOther->lets=PI.str_of_infoiiin(matchswith|swhens=~"KERN_.*"->None|swhens=~"__.*"->None|_->Some(ii.PI.token))|Token_cpp.CppDirective|Token_cpp.CppAttr|Token_cpp.CppMacro->None|Token_cpp.CppMacroExpanded|Token_cpp.CppPassingNormal|Token_cpp.CppPassingCosWouldGetError->raiseTodo)elseSome(ii.PI.token)|T.TAny_Actionii->Some(ii.PI.token)|_->None)letcount_lines_commentizedxs=letline=ref(-1)inletcount=ref0incommentizedxs+>List.iter(function|PI.OriginTokpinfo|PI.ExpandedTok(_,pinfo,_)->letnewline=pinfo.PI.lineinifnewline<>!linethenbeginline:=newline;incrcountend|_->());!count(* See also problematic_lines and parsing_stat.ml *)(* for most problematic tokens *)letis_same_line_or_closelinetok=TH.line_of_toktok=|=line||TH.line_of_toktok=|=line-1||TH.line_of_toktok=|=line-2(*****************************************************************************)(* Lexing only *)(*****************************************************************************)(* called by parse below *)lettokens2file=lettable=Parse_info.full_charpos_to_pos_largefileinCommon.with_open_infilefile(funchan->letlexbuf=Lexing.from_channelchanintryletrectokens_aux()=lettok=Lexer.tokenlexbufin(* fill in the line and col information *)lettok=tok+>TH.visitor_info_of_tok(funii->{iiwithPI.token=(* could assert pinfo.filename = file ? *)matchii.PI.tokenwith|PI.OriginTokpi->PI.OriginTok(Parse_info.complete_token_location_largefiletablepi)|PI.ExpandedTok(pi,vpi,off)->PI.ExpandedTok((Parse_info.complete_token_location_largefiletablepi),vpi,off)|PI.FakeTokStr(s,vpi_opt)->PI.FakeTokStr(s,vpi_opt)|PI.Ab->raiseImpossible})inifTH.is_eoftokthen[tok]elsetok::(tokens_aux())intokens_aux()with|Lexer.Lexicals->failwith(spf"lexical error %s \n = %s"s(PI.error_messagefile(PI.lexbuf_to_strposlexbuf)))|e->raisee)lettokensa=Common.profile_code"Parse_cpp.tokens"(fun()->tokens2a)(*****************************************************************************)(* Fuzzy parsing *)(*****************************************************************************)letrecmulti_grouped_listxs=xs+>List.mapmulti_groupedandmulti_grouped=function|Token_views_cpp.Braces(tok1,xs,(Sometok2))->Ast_fuzzy.Braces(tokexttok1,multi_grouped_listxs,tokexttok2)|Token_views_cpp.Parens(tok1,xs,(Sometok2))->Ast_fuzzy.Parens(tokexttok1,multi_grouped_list_commaxs,tokexttok2)|Token_views_cpp.Angle(tok1,xs,(Sometok2))->Ast_fuzzy.Angle(tokexttok1,multi_grouped_listxs,tokexttok2)|Token_views_cpp.Tok(tok)->(matchPI.str_of_info(tokexttok)with|"..."->Ast_fuzzy.Dots(tokexttok)|swhenAst_fuzzy.is_metavars->Ast_fuzzy.Metavar(s,tokexttok)|s->Ast_fuzzy.Tok(s,tokexttok))|_->failwith"could not find closing brace/parens/angle"andtokexttok_extended=TH.info_of_toktok_extended.Token_views_cpp.tandmulti_grouped_list_commaxs=letrecauxaccxs=matchxswith|[]->ifnullaccthen[]else[Left(acc+>List.rev+>multi_grouped_list)]|(x::xs)->(matchxwith|Token_views_cpp.ToktokwhenPI.str_of_info(tokexttok)=","->letbefore=acc+>List.rev+>multi_grouped_listinifnullbeforethenaux[]xselse(Leftbefore)::(Right(tokexttok))::aux[]xs|_->aux(x::acc)xs)inaux[]xs(* This is similar to what I did for OPA. This is also similar
* to what I do for parsing hacks, but this fuzzy AST can be useful
* on its own, e.g. for a not too bad sgrep/spatch.
*
* note: this is similar to what cpplint/fblint of andrei does?
*)letparse_fuzzyfile=Common.save_excursionFlag_cpp.sgrep_modetrue(fun()->lettoks_orig=tokensfileinlettoks=toks_orig+>Common.exclude(funx->Token_helpers_cpp.is_commentx||Token_helpers_cpp.is_eofx)inletextended=toks+>List.mapToken_views_cpp.mk_token_extendedinParsing_hacks_cpp.find_template_inf_supextended;letgroups=Token_views_cpp.mk_multiextendedinmulti_grouped_listgroups,toks_orig)(*****************************************************************************)(* Extract macros *)(*****************************************************************************)(* It can be used to to parse the macros defined in a macro.h file. It
* can also be used to try to extract the macros defined in the file
* that we try to parse *)letextract_macros2file=Common.save_excursionFlag.verbose_lexingfalse(fun()->lettoks=tokens(* todo: ~profile:false *)fileinlettoks=Parsing_hacks_define.fix_tokens_definetoksinPp_token.extract_macrostoks)letextract_macrosa=Common.profile_code_exclusif"Parse_cpp.extract_macros"(fun()->extract_macros2a)(* less: pass it as a parameter to parse_program instead ?
* old: was a ref, but a hashtbl.t is actually already a kind of ref
*)let(_defs:(string,Pp_token.define_body)Hashtbl.t)=Hashtbl.create101(* We used to have also a init_defs_builtins() so that we could use a
* standard.h containing macros that were always useful, and a macros.h
* that the user could customize for his own project.
* But this was adding complexity so now we just have _defs and people
* can call add_defs to add local macro definitions.
*)letadd_defsfile=ifnot(Sys.file_existsfile)thenfailwith(spf"Could not find %s, have you set PFFF_HOME correctly?"file);pr2(spf"Using %s macro file"file);letxs=extract_macrosfileinxs+>List.iter(fun(k,v)->Hashtbl.add_defskv)letinit_defsfile=Hashtbl.clear_defs;add_defsfile(*****************************************************************************)(* Error recovery *)(*****************************************************************************)(* see parsing_recovery_cpp.ml *)(*****************************************************************************)(* Consistency checking *)(*****************************************************************************)(* todo: a parsing_consistency_cpp.ml *)(*****************************************************************************)(* Helper for main entry point *)(*****************************************************************************)(* Hacked lex. This function use refs passed by parse.
* 'tr' means 'token refs'. This is used mostly to enable
* error recovery (This used to do lots of stuff, such as
* calling some lookahead heuristics to reclassify
* tokens such as TIdent into TIdent_Typeded but this is
* now done in a fix_tokens style in parsing_hacks_typedef.ml.
*)letreclexer_functiontr=funlexbuf->matchtr.PI.restwith|[]->(pr2"LEXER: ALREADY AT END";tr.PI.current)|v::xs->tr.PI.rest<-xs;tr.PI.current<-v;tr.PI.passed<-v::tr.PI.passed;if!Flag.debug_lexerthenpr2_genv;ifTH.is_commentvthenlexer_function(*~pass*)trlexbufelsev(* was a define ? *)letpassed_a_definetr=letxs=tr.PI.passed+>List.rev+>Common.excludeTH.is_commentinifList.lengthxs>=2then(matchCommon2.head_middle_tailxswith|T.TDefine_,_,T.TCommentNewline_DefineEndOfMacro_->true|_->false)elsebeginpr2"WIERD: length list of error recovery tokens < 2 ";falseend(*****************************************************************************)(* Main entry point *)(*****************************************************************************)(*
* note: as now we go in two passes, there is first all the error message of
* the lexer, and then the error of the parser. It is not anymore
* interwinded.
*
* !!!This function use refs, and is not reentrant !!! so take care.
* It uses the _defs global defined above!!!!
*)letparse_with_lang?(lang=Flag_parsing_cpp.Cplusplus)file=letstat=Parse_info.default_statfileinletfilelines=Common2.cat_arrayfilein(* -------------------------------------------------- *)(* call lexer and get all the tokens *)(* -------------------------------------------------- *)lettoks_orig=tokensfileinlettoks=tryParsing_hacks.fix_tokens~macro_defs:_defslangtoks_origwithToken_views_cpp.UnclosedSymbols->pr2s;if!Flag_cpp.debug_cplusplusthenraise(Token_views_cpp.UnclosedSymbols)elsetoks_originlettr=Parse_info.mk_tokens_statetoksinletlexbuf_fake=Lexing.from_function(fun_buf_n->raiseImpossible)inletrecloop()=letinfo=TH.info_of_toktr.PI.currentin(* todo?: I am not sure that it represents current_line, cos maybe
* tr.current partipated in the previous parsing phase, so maybe tr.current
* is not the first token of the next parsing phase. Same with checkpoint2.
* It would be better to record when we have a } or ; in parser.mly,
* cos we know that they are the last symbols of external_declaration2.
*)letcheckpoint=PI.line_of_infoinfoin(* bugfix: may not be equal to 'file' as after macro expansions we can
* start to parse a new entity from the body of a macro, for instance
* when parsing a define_machine() body, cf standard.h
*)letcheckpoint_file=PI.file_of_infoinfointr.PI.passed<-[];(* for some statistics *)letwas_define=reffalseinletelem=(try(* -------------------------------------------------- *)(* Call parser *)(* -------------------------------------------------- *)Parser_cpp.toplevel(lexer_functiontr)lexbuf_fakewithe->ifnot!Flag.error_recoverythenraise(Parse_error(TH.info_of_toktr.PI.current));if!Flag.show_parsing_errorthen(matchewith(* Lexical is not anymore launched I think *)|Lexer.Lexicals->pr2("lexical error "^s^"\n ="^error_msg_toktr.PI.current)|Parsing.Parse_error->pr2("parse error \n = "^error_msg_toktr.PI.current)|Semantic.Semantic(s,_i)->pr2("semantic error "^s^"\n ="^error_msg_toktr.PI.current)|e->raisee);letline_error=TH.line_of_toktr.PI.currentinletpbline=tr.PI.passed+>List.filter(is_same_line_or_closeline_error)+>List.filterTH.is_ident_likeinleterror_info=(pbline+>List.map(funtok->PI.str_of_info(TH.info_of_toktok))),line_errorinstat.Stat.problematic_lines<-error_info::stat.Stat.problematic_lines;(* error recovery, go to next synchro point *)let(passed',rest')=Parsing_recovery_cpp.find_next_synchrotr.PI.resttr.PI.passedintr.PI.rest<-rest';tr.PI.passed<-passed';tr.PI.current<-List.hdpassed';(* <> line_error *)letinfo=TH.info_of_toktr.PI.currentinletcheckpoint2=PI.line_of_infoinfoinletcheckpoint2_file=PI.file_of_infoinfoinwas_define:=passed_a_definetr;(if!was_define&&!Flag_cpp.filter_define_errorthen()else(* bugfix: *)(if(checkpoint_file=checkpoint2_file)&&checkpoint_file=filethenPI.print_badline_error(checkpoint,checkpoint2)filelineselsepr2"PB: bad: but on tokens not from original file"));letinfo_of_bads=Common2.map_eff_revTH.info_of_toktr.PI.passedinSome(Ast.NotParsedCorrectlyinfo_of_bads))in(* again not sure if checkpoint2 corresponds to end of bad region *)letinfo=TH.info_of_toktr.PI.currentinletcheckpoint2=PI.line_of_infoinfoinletcheckpoint2_file=PI.file_of_infoinfoinletdiffline=if(checkpoint_file=checkpoint2_file)&&(checkpoint_file=file)then(checkpoint2-checkpoint)else0(* TODO? so if error come in middle of something ? where the
* start token was from original file but synchro found in body
* of macro ? then can have wrong number of lines stat.
* Maybe simpler just to look at tr.passed and count
* the lines in the token from the correct file ?
*)inletinfo=List.revtr.PI.passedin(* some stat updates *)stat.Stat.commentized<-stat.Stat.commentized+count_lines_commentizedinfo;(matchelemwith|Some(Ast.NotParsedCorrectly_xs)->if!was_define&&!Flag_cpp.filter_define_errorthenstat.Stat.commentized<-stat.Stat.commentized+difflineelsestat.Stat.bad<-stat.Stat.bad+diffline|_->stat.Stat.correct<-stat.Stat.correct+diffline);(matchelemwith|None->[]|Somexs->(xs,info)::loop()(* recurse *))inletv=loop()in(v,stat)letparse2file=matchFile_type.file_type_of_filefilewith|FT.PL(FT.C_)->(tryparse_with_lang~lang:Flag_cpp.Cfilewith_exn->parse_with_lang~lang:Flag_cpp.Cplusplusfile)|FT.PL(FT.Cplusplus_)->parse_with_lang~lang:Flag_cpp.Cplusplusfile|_->failwith(spf"not a C/C++ file: %s"file)letparsefile=Common.profile_code"Parse_cpp.parse"(fun()->tryparse2filewithStack_overflow->pr2(spf"PB stack overflow in %s"file);[(Ast.NotParsedCorrectly[],([]))],{Stat.correct=0;bad=Common2.nblines_with_wcfile;filename=file;have_timeout=true;commentized=0;problematic_lines=[];})letparse_programfile=let(ast2,_stat)=parsefileinprogram_of_program2ast2