Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file lib_unparser.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328(* Yoann Padioleau
*
* Copyright (C) 2013 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommonmodulePI=Parse_infoopenParse_info(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* There are multiple ways to unparse/pretty-print code:
* - one can iterate over an AST (or better CST), and print its leaves, but
* comments and spaces are usually not in the CST (and for a good reason)
* so you need some extra code that also visits the tokens and try
* to "sync" the visit of the CST with the tokens
* - one can use a real pretty printer with a boxing or backtracking model
* working on an AST extended with comments (see julien's ast_pretty_print/)
* - one can iterate over the tokens, where comments and spaces are normal
* citizens, but this can be too low level
*
* Right now the preferred method for spatch is the last one. The pretty
* printer currently is too different from our coding conventions
* (also because we don't have precise coding conventions).
* This token-based unparser handles transformation annotations (Add/Remove).
* This was also the approach used in Coccinelle.
*)(*****************************************************************************)(* Types *)(*****************************************************************************)(* Intermediate representations easier to work on; more convenient to
* program heuristics which try to maintain some good indentation
* and style.
*)typeelt=|OrigEltofstring|Removedofstring|Addedofstring|Esthet2of(Parse_info.esthet*string)(* with tarzan *)(*****************************************************************************)(* Globals *)(*****************************************************************************)letdebug=reffalse(*****************************************************************************)(* Vof *)(*****************************************************************************)(* autogenerated by ocamltarzan *)letrecvof_elt=function|OrigEltv1->letv1=Ocaml.vof_stringv1inOcaml.VSum(("OrigElt",[v1]))|Removedv1->letv1=Ocaml.vof_stringv1inOcaml.VSum(("Removed",[v1]))|Addedv1->letv1=Ocaml.vof_stringv1inOcaml.VSum(("Added",[v1]))|Esthet2(v1,v2)->letv1=vof_esthetv1inletv2=Ocaml.vof_stringv2inOcaml.VSum(("Esthet",[v1;v2]))andvof_esthet=function|Comment->Ocaml.VSum(("Comment",[]))|Newline->Ocaml.VSum(("Newline",[]))|Space->Ocaml.VSum(("Space",[]))(*****************************************************************************)(* Helpers *)(*****************************************************************************)lets_of_add=function|AddStrs->s|AddNewlineAndIdent->raiseTodo(*****************************************************************************)(* AddArgsBefore helpers *)(*****************************************************************************)(* rh = reversed head, tl = tail *)letrecadd_if_need_commaadd_strrhtl=matchtlwith(* Because this token is right parenthese, there must be
something before*)|[]->failwith"Error with need_comma"|(OrigEltstr)::_twhen((str=",")||(str="("))->List.rev_appendrhtl|((OrigElt_str)ash)::t->List.rev_appendrh((Addedadd_str)::h::t)|((Removed_str)ash)::t->add_if_need_commaadd_str(h::rh)t(* Added is very arbitrary, I'd rather not handle them.
* This can be avoided by using AddArgsBefore only
*)|(Added_str)::_t->failwith"need comma: cannot handle this case!"|((Esthet2_)ash)::t->add_if_need_commaadd_str(h::rh)tletrecsearch_prev_elt?(ws=0)acc=matchaccwith(* Because this token is right parenthese, there must be
something before *)|[]->failwith"Error with search_prev_real_elt"|(OrigEltstr)::_t->(OrigEltstr,ws)|(Removed_str)::t->search_prev_elt~wst|(Added_str)::_t->failwith"search_prev_real_elt: cannot handle this case"|(Esthet2(Comment,_str))::t->search_prev_elt~wst|(Esthet2(Newline,str))::_t->(Esthet2(Newline,str),ws)|(Esthet2(Space,str))::t->search_prev_elt~ws:(ws+String.lengthstr)t(* This function decides how to add arguments.
* factors considered:
* prepend/append comma around arguments?
* new line for each argument?
* heuristic:
* if previous (real) token is '(' or ',', do not prepend comma
* if this token (right parenthese) follows a newline and some space, add newline for
* each argument, and append a comma
*)letelts_of_add_args_beforeaccxs=let(elt,ws)=search_prev_eltaccin(* search_prev_elt will fail if meet Added, which may be inserted
during add_if_need_comma.
*)matcheltwith|Esthet2(Newline,_)->(* new line for each argument *)letacc=add_if_need_comma","[]accinletsep=xs+>List.map(funs->" "^s^",\n"^String.makews' ')inletadd_str=join""sepin(Addedadd_str)::acc|_->letacc=add_if_need_comma", "[]accinletadd_str=join", "xsin(Addedadd_str)::acc(*****************************************************************************)(* Elts of any *)(*****************************************************************************)letelt_and_info_of_tok~kind_and_info_of_toktok=let(kind,info)=kind_and_info_of_toktokinletstr=PI.str_of_infoinfoinletelt=matchkindwith|PI.Esthetx->Esthet2(x,str)|_->OrigElt(str)inelt,infoletelts_of_any~kind_and_info_of_toktoks=letrecauxacctoks=matchtokswith|[]->List.revacc|tok::t->letelt,info=elt_and_info_of_tok~kind_and_info_of_toktokin(matchinfo.tokenwith|Ab|FakeTokStr_|ExpandedTok_->raiseImpossible|OriginTok_->(matchinfo.transfowith(* acc is reversed! *)|NoTransfo->aux(elt::acc)t|Remove->aux(Removed(PI.str_of_infoinfo)::acc)t|Replacetoadd->(* could also be Removed::Added::_, now that we have
* drop_useless_space(), this should not matter anymore
*)aux(Added(s_of_addtoadd)::Removed(PI.str_of_infoinfo)::acc)t|AddAftertoadd->aux(Added(s_of_addtoadd)::elt::acc)t|AddBeforetoadd->aux(elt::Added(s_of_addtoadd)::acc)t|AddArgsBeforexs->letelt_list=elts_of_add_args_beforeaccxsinletacc=elt::elt_listinauxacct))inaux[]toks(*****************************************************************************)(* Heuristics *)(*****************************************************************************)(* but needs to keep the Removed, otherwise drop_whole_line_if_only_removed()
* can not know which new empty lines it has to remove
*)letdrop_esthet_between_removedxs=letrecoutside_remove=function|[]->[]|Removeds::xs->Removeds::in_remove[]xs|x::xs->x::outside_removexsandin_removeacc=function|[]->List.revacc|Removeds::xs->Removeds::in_remove[]xs|Esthet2x::xs->in_remove(Esthet2x::acc)xs|Addeds::xs->List.rev(Addeds::acc)@outside_removexs|OrigElts::xs->List.rev(OrigElts::acc)@outside_removexsinoutside_removexs(* note that it will also remove comments in the line if everthing else
* was removed, which is what we want most of the time
*)letdrop_whole_line_if_only_removedxs=let(before_first_newline,xxs)=xs+>Common2.group_by_pre(function|Esthet2(Newline,_)->true|_->false)inletxxs=xxs+>Common.exclude(fun(_newline,elts_after_newline)->lethas_a_remove=elts_after_newline+>List.exists(function|Removed_->true|_->false)inletonly_remove_or_esthet=elts_after_newline+>List.for_all(function|Esthet2_|Removed_->true|Added_|OrigElt_->false)inhas_a_remove&&only_remove_or_esthet)inbefore_first_newline@(xxs+>List.map(fun(elt,elts)->elt::elts)+>List.flatten)(* people often write s/foo(X,Y)/.../ but some calls to foo may have
* a trailing comma that we also want to remove automatically
*)letdrop_trailing_comma_between_removedxs=letrecauxxs=matchxswith|Removeds1::OrigElt","::Removed")"::rest->Removeds1::Removed","::Removed")"::auxrest|x::xs->x::auxxs|[]->[]inauxxsletdrop_removedxs=xs+>Common.exclude(function|Removed_->true|_->false)(* When removing code, it's quite common as a result to have double
* spacing. For instance when in 'class X implements I {' we remove
* the interface 'I', as a result we naively get 'class X {'.
* The function below then detect those cases and remove the double spacing.
*
* We can have double space only as a result of a transformation on that line.
* Otherwise the spacing will have been agglomerated by the parser. So we
* don't risk to remove too much spaces here.
*)letrecdrop_useless_spacexs=matchxswith|[]->[]|Esthet2(Space,s)::Esthet2(Space,_s2)::rest->drop_useless_space((Esthet2(Space,s))::rest)(* see tests/php/spatch/distr_plus.spatch, just like we can have
* double spaces, we can also have space before comma that are
* useless
*)|Esthet2(Space,_s)::OrigElt","::rest->drop_useless_space(OrigElt","::rest)|x::xs->x::drop_useless_spacexs(*****************************************************************************)(* Main entry point *)(*****************************************************************************)(*
* The idea of the algorithm below is to iterate over all the tokens
* and depending on the token 'transfo' annotation to print or not
* the token as well as the comments/spaces associated with the token.
* Note that if two tokens were annotated with a Remove, we
* also want to remove the spaces between so we need a few heuristics
* to maintain some good style.
*)letstring_of_toks_using_transfo~kind_and_info_of_toktoks=Common2.with_open_stringbuf(fun(_pr_with_nl,buf)->letpps=Buffer.add_stringbufsinletxs=elts_of_any~kind_and_info_of_toktoksinif!debugthenxs+>List.iter(funx->pr2(Ocaml.string_of_v(vof_eltx)));letxs=drop_esthet_between_removedxsinletxs=drop_trailing_comma_between_removedxsinletxs=drop_whole_line_if_only_removedxsin(* must be after drop_whole_line_if_only_removed *)letxs=drop_removedxsinletxs=drop_useless_spacexsinxs+>List.iter(function|OrigElts|Addeds|Esthet2((Comment|Space),s)->pps|Removed_->raiseImpossible(* see drop_removed *)|Esthet2(Newline,_)->pp"\n"))