Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file cst_php.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005(* Yoann Padioleau
*
* Copyright (C) 2009-2013 Facebook
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* version 2.1 as published by the Free Software Foundation, with the
* special exception on linking described in file license.txt.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the file
* license.txt for more details.
*)openCommon(*****************************************************************************)(* Prelude *)(*****************************************************************************)(*
* This module defines a Concrete Syntax Tree (CST) for PHP 5.2 with
* a few extensions from PHP 5.3 (e.g. closures, namespace, const) and
* PHP 5.4 (e.g. traits) as well as support for many Facebook
* extensions (XHP, generators, annotations, generics, collections,
* type definitions, implicit fields via constructor parameters).
*
* a CST is convenient in a refactoring context or code visualization
* context, but if you need to do some heavy static analysis, consider
* instead lang_php/analyze/foundation/pil.ml which defines a
* PHP Intermediate Language a la CIL.
*
* todo:
* - unify toplevel statement vs statements? hmmm maybe not
* - maybe even in a refactoring context a PIL+comment
* (see pretty/ast_pp.ml) would make more sense.
*
* NOTE: data from this type are often marshalled in berkeley DB tables
* which means that if you add a new constructor or field in the types below,
* you must erase the berkeley DB databases otherwise pfff
* will probably ends with a segfault (OCaml serialization is not
* type-safe). A hacky solution is to add new constructors only at the end
* of a type definition.
*
* COUPLING: some programs in other languages (e.g. Python) may
* use some of the pfff binding, or json/sexp exporters, so if you
* change the name of constructors in this file, don't forget
* to regenerate the json/sexp exporters, but also to modify the
* dependent programs !!!! An easier solution is to not change this
* file, or to only add new constructors.
*)(*****************************************************************************)(* The AST related types *)(*****************************************************************************)(* ------------------------------------------------------------------------- *)(* Token/info *)(* ------------------------------------------------------------------------- *)(* Contains among other things the position of the token through
* the Parse_info.parse_info embedded inside it, as well as the
* transformation field that makes possible spatch.
*)typetok=Parse_info.tandinfo=tok(* shortcuts to annotate some information with token/position information *)and'awrap='a*tokand'aparen=tok*'a*tokand'abrace=tok*'a*tokand'abracket=tok*'a*tokand'aangle=tok*'a*tokand'asingle_angle=tok*'a*tokand'acomma_list=('a,tok(* the comma *))Common.eitherlistand'acomma_list_dots=('a,tok(* ... in parameters *),tok(* the comma *))Common.either3list(* with tarzan *)(* ------------------------------------------------------------------------- *)(* Ident/Name/LongName *)(* ------------------------------------------------------------------------- *)(* See also analyze_php/namespace_php.ml *)(* Why not factorize Name and XhpName together? Because I was not
* sure originally some analysis should also be applied on Xhp
* classes. Moreover there is two syntax for xhp: :x:base for 'defs'
* and <x:base for 'uses', so having this xhp_tag allow us to easily do
* comparison between xhp identifiers.
*)typeident=(* was called T_STRING in Zend, which are really just LABEL, see lexer.mll*)|Nameofstringwrap(* xhp: for :x:foo the list is ["x";"foo"] *)|XhpNameofxhp_tagwrap(* for :x:foo the list is ["x";"foo"] *)andxhp_tag=stringlist(* The string does not contain the '$'. The info itself will usually
* contain it, but not always! Indeed if the variable we build comes
* from an encapsulated strings as in echo "${x[foo]}" then the 'x'
* will be parsed as a T_STRING_VARNAME, and eventually lead to a DName,
* even if in the text it appears as a name.
* So this token is kind of a FakeTok sometimes.
*
* If at some point you want to do some program transformation,
* you may have to normalize this 'string wrap' before moving it
* to another context !!!
*)typedname=(* D for dollar. Was called T_VARIABLE in the original PHP parser/lexer *)|DNameofstringwrap(* The antislash is a separator but it can also be in the leading position.
* The keyword 'namespace' can also be in a leading position.
*)typequalified_ident=qualified_ident_elementlistandqualified_ident_element=|QIofident(* the ident can be 'namespace' *)|QITokoftok(* '\' *)(* with tarzan *)typename=|XNameofqualified_ident(* Could also transform at parsing time all occurences of self:: and
* parent:: by their respective names. But I prefer to have all the
* PHP features somehow explicitely represented in the AST.
*)|Selfoftok|Parentoftok(* php 5.3 late static binding (no idea why it's useful ...) *)|LateStaticoftok(* with tarzan *)(* ------------------------------------------------------------------------- *)(* Types *)(* ------------------------------------------------------------------------- *)typehint_type=|Hintofname(* only self/parent, no static *)*type_argsoption|HintArrayoftok|HintQuestionof(tok*hint_type)|HintTupleofhint_typecomma_listparen|HintCallbackof(tok(* "function" *)*(hint_typecomma_list_dotsparen)(* params *)*(tok*tokoption*hint_type)option(* return type *))paren|HintShapeoftok(* "shape" *)*(string_const_expr*tok(* '=>' *)*hint_type)comma_listparen|HintTypeConstofhint_type(* lhs *)*tok(* '::' *)*hint_type(* rhs *)|HintVariadicof(tok*hint_typeoption)andtype_args=hint_typecomma_listsingle_angleandtype_params=type_paramcomma_listsingle_angleandtype_param=|TParamofident|TParamConstraintofident*tok(* as *)*class_nameandclass_name=hint_type(* This is used in Cast. For type analysis see type_php.ml *)andptype=|BoolTy|IntTy|DoubleTy(* float *)|StringTy|ArrayTy|ObjectTy(* ------------------------------------------------------------------------- *)(* Expression *)(* ------------------------------------------------------------------------- *)(* I used to have a 'type expr = exprbis * exp_type_info' but it complicates
* many patterns when working on expressions, and it turns out I never
* implemented the type annotater. It's easier to do such an annotater on
* a real AST like the PIL. So just have this file be a simple concrete
* syntax tree and no more.
*)andexpr=(* constant/function/class/method/field/class_cst name.
*
* Now that we've unified lvalue and expr, the use of Id is more
* ambiguous; it can refer to a classname, a function name,
* a constant, etc. You need to match the context of use of Id
* to know in which situation you are (and take care if you use a visitor
* to not always call recursively the visitor/continuation):
*
* - function: Call (Id, _)
* - method: Call (ObjGet (_, Id), _), Call (ClassGet (_, Id), _)
* - class: ClassGet (Id, _), New (Id, _), AssignNew(Id,_, InstanceOf(_, Id)
* and also extends, implements, catch, type
* - class_constant: ClassGet (_, Id) (including the special C::class)
* - field: ObjGet(_, Id)
* - constant: Id
*
* todo: just like we annotate IdVar with scope info, we could annotate
* Id with a kind info.
*)|Idofname(* less: maybe could unify.
* note that IdVar is used not only for local variables
* but also for globals, class variables, parameters, etc.
*)|IdVarofdname*Scope_php.phpscoperef|Thisoftok|Callofexpr*argumentcomma_listparen|ObjGetofexpr*tok(* -> *)*expr|ClassGetofclass_name_reference*tok(* :: *)*expr|ArrayGetofexpr*exproptionbracket|HashGetofexpr*exprbrace|BraceIdentofexprbrace|Derefoftok(* $ *)*expr(* start of expr_without_variable in original PHP lexer/parser terminology *)|Scofscalar|Binaryofexpr*binaryOpwrap*expr|UnaryofunaryOpwrap*expr(* should be a statement ... *)|Assignoflvalue*tok(* = *)*expr|AssignOpoflvalue*assignOpwrap*expr|Postfixofrw_variable*fixOpwrap(* todo: should actually be called Prefix :) *)|InfixoffixOpwrap*rw_variable(* PHP 5.3 allows 'expr ?: expr' hence the 'option' type below
* from www.php.net/manual/en/language.operators.comparison.php#language.operators.comparison.ternary:
* "Since PHP 5.3, it is possible to leave out the middle part of the
* ternary operator. Expression
* expr1 ?: expr3 returns expr1 if expr1 evaluates to TRUE, and expr3
* otherwise."
*)|CondExprofexpr*tok(* ? *)*exproption*tok(* : *)*expr|AssignListoftok(* list *)*list_assigncomma_listparen*tok(* = *)*expr|ArrayLongoftok(* array | shape *)*array_paircomma_listparen(* php 5.4: https://wiki.php.net/rfc/shortsyntaxforarrays *)|ArrayShortofarray_paircomma_listbracket(* facebook-ext: *)|Collectionofname*array_paircomma_listbrace|Newoftok*class_name_reference*argumentcomma_listparenoption|Cloneoftok*expr|AssignRefoflvalue*tok(* = *)*tok(* & *)*lvalue|AssignNewoflvalue*tok(* = *)*tok(* & *)*tok(* new *)*class_name_reference*argumentcomma_listparenoption|CastofcastOpwrap*expr|CastUnsetoftok*expr(* ??? *)|InstanceOfofexpr*tok*class_name_reference(* !The evil eval! *)|Evaloftok*exprparen(* Woohoo, viva PHP 5.3 *)|Lambdaoflambda_def|ShortLambdaofshort_lambda_def(* should be a statement ... *)|Exitoftok*(exproptionparen)option|Atoftok(* @ *)*expr|Printoftok*expr|BackQuoteoftok*encapslist*tok(* should be at toplevel *)|Includeoftok*expr|IncludeOnceoftok*expr|Requireoftok*expr|RequireOnceoftok*expr|Emptyoftok*lvalueparen|Issetoftok*lvaluecomma_listparen(* xhp: *)|XhpHtmlofxhp_html(* php-facebook-ext:
*
* todo: this should be at the statement level as there are only a few
* forms of yield that HPHP supports (e.g. yield <expr>; and
* <lval> = yield <expr>). One could then have a YieldReturn and YieldAssign
* but this may change and none of the analysis in pfff need to
* understand yield so for now just make it simple and add yield
* at the expression level.
*)|Yieldoftok*array_pair(* should have no ref inside *)|YieldBreakoftok*tok(* php-facebook-ext: Just like yield, this should be at the statement level *)|Awaitoftok*expr(* only appear when process sgrep patterns *)|SgrepExprDotsoftok(* unparser: *)|ParenExprofexprparenandscalar=|Cofconstant|Guiloftok(* '"' or b'"' *)*encapslist*tok(* '"' *)|HereDocoftok(* < < < EOF, or b < < < EOF *)*encapslist*tok(* EOF; *)andconstant=|Intofstringwrap(* decimal, hex, or binary int format *)|Doubleofstringwrap(* see also Guil for interpolated strings
* The string does not contain the enclosing '"' or "'".
* It does not contain either the possible 'b' prefix
*)|Stringofstringwrap|PreProcessofcpp_directivewrap(* only appear when process xdebug coverage file *)|XdebugClassofname*class_stmtlist|XdebugResource(* http://php.net/manual/en/language.constants.predefined.php *)andcpp_directive=|Line|File|Dir|ClassC|TraitC|MethodC|FunctionC|NamespaceCandencaps=|EncapsStringofstringwrap|EncapsVaroflvalue(* for "xx {$beer}s" *)|EncapsCurlyoftok*lvalue*tok(* for "xx ${beer}s" *)|EncapsDollarCurlyoftok(* '${' *)*lvalue*tok|EncapsExproftok*expr*tokandfixOp=Ast_generic.incr_decrandbinaryOp=ArithofarithOp|LogicaloflogicalOp|BinaryConcat(* . *)|Pipe|CombinedComparisonandarithOp=|Plus|Minus|Mul|Div|Mod|DecLeft|DecRight|And|Or|XorandlogicalOp=|Inf|Sup|InfEq|SupEq|Eq|NotEq|Identical(* === *)|NotIdentical(* !== *)|AndLog|OrLog|XorLog|AndBool|OrBool(* diff with AndLog ? short-circuit operators ? *)andassignOp=AssignOpArithofarithOp|AssignConcat(* .= *)andunaryOp=|UnPlus|UnMinus|UnBang|UnTildeandcastOp=ptype(* less: merge with foreach_pattern, list($k => $v) = ... is allowed no? *)andlist_assign=|ListVaroflvalue|ListListoftok*list_assigncomma_listparen|ListEmptyandarray_pair=|ArrayExprofexpr|ArrayRefoftok(* & *)*lvalue|ArrayArrowExprofexpr*tok(* => *)*expr|ArrayArrowRefofexpr*tok(* => *)*tok(* & *)*lvalueandxhp_html=|Xhpofxhp_tagwrap*xhp_attributelist*tok(* > *)*xhp_bodylist*xhp_tagoptionwrap|XhpSingletonofxhp_tagwrap*xhp_attributelist*tok(* /> *)andxhp_attribute=xhp_attr_name*tok(* = *)*xhp_attr_valueandxhp_attr_name=stringwrap(* e.g. task-bar *)andxhp_attr_value=|XhpAttrStringoftok(* '"' *)*encapslist*tok(* '"' *)|XhpAttrExprofexprbrace(* sgrep: *)|SgrepXhpAttrValueMvarofstringwrapandxhp_body=|XhpTextofstringwrap|XhpExprofexprbrace|XhpNestedofxhp_htmlandargument=|Argofexpr|ArgRefoftok*w_variable|ArgUnpackoftok*expr(* now unified with expr *)andlvalue=exprandclass_name_reference=expr(* semantic: those grammar rule names were used in the original PHP
* lexer/parser but not enforced. It's just comments. *)andrw_variable=lvalueandr_variable=lvalueandw_variable=lvalue(* static_scalar used to be a special type allowing constants and
* a restricted form of expressions. But it was yet
* another type and it turned out it was making things like spatch
* and visitors more complicated because stuff like "+ 1" could
* be an expr or a static_scalar. We don't need this "isomorphism".
* I never leveraged the specificities of static_scalar (maybe a compiler
* would, but my checker/refactorers/... didn't).
*
* Note that it's not 'type static_scalar = scalar' because static_scalar
* actually allows arrays (why the heck they called it a scalar then ...)
* and plus/minus which are only in expr.
*)andstatic_scalar=expr(* string_const_expr is for shape field names which are permitted to be either
* literal strings or class constants. *)andstring_const_expr=expr(* ------------------------------------------------------------------------- *)(* Statement *)(* ------------------------------------------------------------------------- *)(* By introducing Lambda, expr and stmt are now mutually recursive *)andstmt=|ExprStmtofexpr*tok(* ; *)|EmptyStmtoftok(* ; *)|Blockofstmt_and_deflistbrace|Ifoftok*exprparen*stmt*(* elseif *)if_elseiflist*(* else *)if_elseoption|IfColonoftok*exprparen*tok*stmt_and_deflist*new_elseiflist*new_elseoption*tok*tok(* if(cond):
* stmts; defs;
* elseif(cond):
* stmts;..
* else(cond):
* defs; stmst;
* endif; *)|Whileoftok*exprparen*colon_stmt|Dooftok*stmt*tok*exprparen*tok|Foroftok*tok*for_expr*tok*for_expr*tok*for_expr*tok*colon_stmt|Switchoftok*exprparen*switch_case_list(* if it's a expr_without_variable, the second arg must be a Right variable,
* otherwise if it's a variable then it must be a foreach_variable
*)|Foreachoftok*tok(*'('*)*expr*tokoption(* await *)*tok(* as *)*foreach_pattern*tok(*')'*)*colon_stmt(* example: foreach(expr as $lvalue) { ... }
* foreach(expr as $foreach_varialbe => $lvalue) { ... }
* foreach(expr as list($x, $y)) { ... }
*)|Breakoftok*exproption*tok|Continueoftok*exproption*tok|Returnoftok*exproption*tok|Throwoftok*expr*tok|Tryoftok*stmt_and_deflistbrace*catchlist*finallylist|Echooftok*exprcomma_list*tok|Globalsoftok*global_varcomma_list*tok|StaticVarsoftok*static_varcomma_list*tok|InlineHtmlofstringwrap|Useoftok*use_filename*tok|Unsetoftok*lvaluecomma_listparen*tok|Declareoftok*declarecomma_listparen*colon_stmt(* nested funcs and classes are mostly used inside if() where the
* if() actually behaves like an ifdef in C.
*)(* was in stmt_and_def before *)|FuncDefNestedoffunc_def(* traits are actually not allowed here *)|ClassDefNestedofclass_defandswitch_case_list=|CaseListoftok(* { *)*tokoption(* ; *)*caselist*tok(* } *)|CaseColonListoftok(* : *)*tokoption(* ; *)*caselist*tok(* endswitch *)*tok(* ; *)andcase=|Caseoftok*expr*tok*stmt_and_deflist|Defaultoftok*tok*stmt_and_deflistandif_elseif=tok*exprparen*stmtandif_else=(tok*stmt)andfor_expr=exprcomma_list(* can be empty *)andforeach_pattern=|ForeachVarofforeach_variable|ForeachArrowofforeach_pattern*tok*foreach_pattern|ForeachListoftok(* list *)*list_assigncomma_listparenandforeach_variable=is_ref*lvalueandcatch=tok*(class_name*dname)paren*stmt_and_deflistbraceandfinally=tok*stmt_and_deflistbraceanduse_filename=|UseDirectofstringwrap|UseParenofstringwrapparenanddeclare=ident*static_scalar_affectandcolon_stmt=|SingleStmtofstmt|ColonStmtoftok(* : *)*stmt_and_deflist*tok(* endxxx *)*tok(* ; *)andnew_elseif=tok*exprparen*tok*stmt_and_deflistandnew_else=tok*tok*stmt_and_deflist(* stmt_and_def used to be a special type allowing Stmt or nested functions
* or classes but it was introducing yet another, not so useful, intermediate
* type.
*)andstmt_and_def=stmt(* ------------------------------------------------------------------------- *)(* Function (and method) definition *)(* ------------------------------------------------------------------------- *)andfunc_def={f_attrs:attributesoption;f_tok:tok;(* function *)f_type:function_type;(* "async" always valid ; others only valid for methods *)f_modifiers:modifierwraplist;f_ref:is_ref;(* can be a Name("__lambda", f_tok) when used for lambdas *)f_name:ident;f_tparams:type_paramsoption;(* the dots should be only at the end (unless in sgrep mode) *)f_params:parametercomma_list_dotsparen;(* static-php-ext: *)f_return_type:(tok(* : *)*tokoption(* @ *)*hint_type)option;(* the opening/closing brace can be (fakeInfo(), ';') for abstract methods *)f_body:stmt_and_deflistbrace;}andfunction_type=|FunctionRegular|FunctionLambda|MethodRegular|MethodAbstractandparameter={p_attrs:attributesoption;(* php-facebook-ext: implicit field via constructor parameter,
* this is always None except for constructors and the modifier
* can be only Public or Protected or Private (but never Static, etc).
*)p_modifier:modifierwrapoption;(* php-facebook-ext: to not generate runtime errors if wrong type hint *)p_soft_type:tok(* @ *)option;p_type:hint_typeoption;p_ref:is_ref;p_name:dname;p_default:static_scalar_affectoption;p_variadic:tok(* ... *)option;}andis_ref=tok(* bool wrap ? *)option(* the f_name in func_def should be a fake name *)andlambda_def=(lexical_varsoption*func_def)andlexical_vars=tok(* use *)*lexical_varcomma_listparenandlexical_var=LexicalVarofis_ref*dname(* todo? could factorize with func_def, but this will require many
* elements to be fake token, e.g. the parenthesis for parameters
* when have only one parameter, the brace and semicolon when the body
* is a simple expression
*)andshort_lambda_def={(* "async" is the only valid modifier *)sl_modifiers:modifierwraplist;sl_params:short_lambda_params;sl_tok:tok(* ==> *)option;(* async { } doesn't use a ==> *)sl_body:short_lambda_body;}andshort_lambda_params=|SLSingleParamofparameter|SLParamsofparametercomma_list_dotsparen|SLParamsOmitted(* for async { } lambdas *)andshort_lambda_body=|SLExprofexpr|SLBodyofstmt_and_deflistbrace(* ------------------------------------------------------------------------- *)(* Constant definition *)(* ------------------------------------------------------------------------- *)andconstant_def={cst_toks:tok(* const *)*tok(* = *)*tok(* ; *);cst_name:ident;cst_type:hint_typeoption;cst_val:static_scalar;}(* ------------------------------------------------------------------------- *)(* Class (and interface/trait) definition *)(* ------------------------------------------------------------------------- *)(* I used to have a class_def and interface_def because interface_def
* didn't allow certain forms of statements (methods with a body), but
* with the introduction of traits, it does not make that much sense
* to be so specific, so I factorized things. Classes/interfaces/traits
* are not that different; interfaces are really just abstract traits.
* We also now include enums, which share a bunch of machinery with classes.
*)andclass_def={c_attrs:attributesoption;c_type:class_type;c_name:ident;c_tparams:type_paramsoption;(* PHP uses single inheritance. Interfaces can also use 'extends'
* but we use the c_implements field for that (because it can be a list).
*)c_extends:extendoption;(* For classes it's a list of interfaces, for interfaces a list of other
* interfaces it extends. Traits can also now implement interfaces.
*)c_implements:interfaceoption;(* If this class is an enum, what is the underlying type (and
* constraint) of the enum? *)c_enum_type:enum_typeoption;(* The class_stmt for interfaces are restricted to only abstract methods.
* The class_stmt seems to be unrestricted for traits (it can even
* contain some 'use') *)c_body:class_stmtlistbrace;}andclass_type=|ClassRegularoftok(* class *)|ClassAbstractFinaloftok*tok*tok(* abstract final class *)|ClassFinaloftok*tok(* final class *)|ClassAbstractoftok*tok(* abstract class *)|Interfaceoftok(* interface *)(* PHP 5.4 traits: http://php.net/manual/en/language.oop5.traits.php
* Allow to mixin behaviors and data so it's really just
* multiple inheritance with a cooler name.
* note: traits are allowed only at toplevel.
*)|Traitoftok(* trait *)|Enumoftok(* enum *)andextend=tok*class_nameandinterface=tok*class_namecomma_listandclass_stmt=(* This is abused to represent class constants in enums, so sometimes
* tok is actually fakeInfo. *)|ClassConstantsoftokoption(* abstract *)*tok(* const *)*hint_typeoption*class_constantcomma_list*tok(*;*)|ClassVariablesofclass_var_modifier*(* static-php-ext: *)hint_typeoption*class_variablecomma_list*tok(* ; *)|Methodofmethod_def|XhpDeclofxhp_decl(* php 5.4, 'use' can appear in classes/traits (but not interface) *)|UseTraitoftok(*use*)*class_namecomma_list*(tok(* ; *),trait_rulelistbrace)Common.either(* facebook-ext: 'require' can appear only in traits *)|TraitConstraintoftok(* require *)*trait_constraint_kindwrap*hint_type*tok(* ; *)|ClassTypeoftype_defandclass_constant=ident*static_scalar_affectoptionandclass_variable=dname*static_scalar_affectoptionandclass_var_modifier=|NoModifiersoftok(* 'var' *)|VModifiersofmodifierwraplist(* a few special names: __construct, __call, __callStatic
* ugly: f_body is an empty stmt_and_def for abstract method
* and the ';' is put for the info of the closing brace
* (and the opening brace is a fakeInfo).
*)andmethod_def=func_defandmodifier=|Public|Private|Protected|Static|Abstract|Final|Asyncandxhp_decl=|XhpAttributesDecloftok(* attribute *)*xhp_attribute_declcomma_list*tok(*;*)(* there is normally only one 'children' declaration in a class *)|XhpChildrenDecloftok(* children *)*xhp_children_decl*tok(*;*)|XhpCategoriesDecloftok(* category *)*xhp_category_declcomma_list*tok(*;*)andxhp_attribute_decl=|XhpAttrInheritofxhp_tagwrap|XhpAttrDeclofxhp_attribute_type*xhp_attr_name*xhp_value_affectoption*tokoption(* is required *)andxhp_attribute_type=|XhpAttrTypeofhint_type|XhpAttrVaroftok|XhpAttrEnumoftok(* enum *)*constantcomma_listbraceandxhp_value_affect=tok(* = *)*static_scalar(* Regexp-like syntax. The grammar actually restricts what kinds of
* regexps can be written. For instance pcdata must be nested. But
* here I simplified the type.
*)andxhp_children_decl=|XhpChildofxhp_tagwrap(* :x:frag *)|XhpChildCategoryofxhp_tagwrap(* %x:frag *)|XhpChildAnyoftok|XhpChildEmptyoftok|XhpChildPcdataoftok|XhpChildSequenceofxhp_children_decl*tok(*,*)*xhp_children_decl|XhpChildAlternativeofxhp_children_decl*tok(*|*)*xhp_children_decl|XhpChildMulofxhp_children_decl*tok(* * *)|XhpChildOptionofxhp_children_decl*tok(* ? *)|XhpChildPlusofxhp_children_decl*tok(* + *)|XhpChildParenofxhp_children_declparenandxhp_category_decl=xhp_tagwrap(* %x:frag *)(* those are bad features ... noone should use them. *)andtrait_rule=|InsteadOfofname*tok*ident*tok(* insteadof *)*class_namecomma_list*tok(* ; *)|Asof(ident,name*tok*ident)Common.either*tok(* as *)*modifierwraplist*identoption*tok(* ; *)andtrait_constraint_kind=|MustExtend|MustImplementandenum_type={e_tok:tok;(* : *)e_base:hint_type;e_constraint:(tok(* as *)*hint_type)option;}(* ------------------------------------------------------------------------- *)(* Type definition *)(* ------------------------------------------------------------------------- *)(* facebook-ext: *)andtype_def={t_tok:tok;(* type/newtype *)t_name:ident;t_tparams:type_paramsoption;t_tconstraint:(tok(* as *)*hint_type)option;t_tokeq:tok;(* = *)t_kind:type_def_kind;t_sc:tok;(* ; *)}andtype_def_kind=|Aliasofhint_type|Newtypeofhint_type|ClassConstTypeofhint_typeoption(* ------------------------------------------------------------------------- *)(* Other declarations *)(* ------------------------------------------------------------------------- *)andglobal_var=|GlobalVarofdname|GlobalDollaroftok*r_variable|GlobalDollarExproftok*exprbraceandstatic_var=dname*static_scalar_affectoptionandstatic_scalar_affect=tok(* = *)*static_scalar(* the qualified_ident can have a leading '\' *)andnamespace_use_rule=|ImportNamespaceofqualified_ident|AliasNamespaceofqualified_ident*tok(* as *)*ident(* ------------------------------------------------------------------------- *)(* User attributes, a.k.a annotations *)(* ------------------------------------------------------------------------- *)(* HPHP extension similar to http://en.wikipedia.org/wiki/Java_annotation *)andattribute=|Attributeofstringwrap|AttributeWithArgsofstringwrap*static_scalarcomma_listparenandattributes=attributecomma_listangle(* ------------------------------------------------------------------------- *)(* The toplevels elements *)(* ------------------------------------------------------------------------- *)(* For parsing reasons and estet I think it's better to differentiate
* nested functions and toplevel functions.
* update: sure? ast_php_simple simplify things.
* Also it's better to group the toplevel statements together (StmtList below),
* so that in the database later they share the same id.
*
* Note that nested functions are usually under a if(defined(...)) at
* the toplevel. There is no ifdef in PHP so they reuse 'if'.
*)andtoplevel=|StmtListofstmtlist|FuncDefoffunc_def|ClassDefofclass_def(* PHP 5.3, see http://us.php.net/const *)|ConstantDefofconstant_def(* facebook extension *)|TypeDefoftype_def(* PHP 5.3, see http://www.php.net/manual/en/language.namespaces.rules.php*)(* the qualified_ident below can not have a leading '\' *)|NamespaceDefoftok*qualified_ident*tok(* ; *)(* when there is no qualified_ident, this means global scope *)|NamespaceBracketDefoftok*qualified_identoption*toplevellistbrace|NamespaceUseoftok*namespace_use_rulecomma_list*tok(* ; *)(* old: | Halt of tok * unit paren * tok (* __halt__ ; *) *)|NotParsedCorrectlyoftoklist(* when Flag.error_recovery = true *)|FinalDefoftok(* EOF *)andprogram=toplevellist(* with tarzan *)(* ------------------------------------------------------------------------- *)(* Entity and any *)(* ------------------------------------------------------------------------- *)(* The goal of the entity type is to lift up important entities which
* are originally nested in the AST such as methods.
*
* history: was in ast_entity_php.ml before but better to put everything
* in one file.
*)typeentity=|FunctionEoffunc_def|ClassEofclass_def|ConstantEofconstant_def|TypedefEoftype_def|StmtListEofstmtlist|MethodEofmethod_def|ClassConstantEofclass_constant|ClassVariableEofclass_variable*modifierlist|XhpAttrEofxhp_attribute_decl|MiscEoftoklisttypeany=|Exprofexpr|Stmt2ofstmt|StmtAndDefsofstmt_and_deflist|Topleveloftoplevel|Programofprogram|Entityofentity|Argumentofargument|Argumentsofargumentcomma_list|Parameterofparameter|Parametersofparametercomma_list_dotsparen|Bodyofstmt_and_deflistbrace|ClassStmtofclass_stmt|ClassConstant2ofclass_constant|ClassVariableofclass_variable|ListAssignoflist_assign|ColonStmt2ofcolon_stmt|Case2ofcase|XhpAttributeofxhp_attribute|XhpAttrValueofxhp_attr_value|XhpHtml2ofxhp_html|XhpChildrenDecl2ofxhp_children_decl|Infooftok|InfoListoftoklist|Ident2ofident|Hint2ofhint_type(* with tarzan *)(*****************************************************************************)(* Some constructors *)(*****************************************************************************)letnoScope()=ref(Scope_code.NoScope)letfakeInfo?(next_to=None)str={Parse_info.token=Parse_info.FakeTokStr(str,next_to);transfo=Parse_info.NoTransfo;}(*****************************************************************************)(* Wrappers *)(*****************************************************************************)letunwrap=fstletunparen(_a,b,_c)=bletunbrace=unparenletunbracket=unparenletuncommaxs=Common.map_filter(function|Lefte->Somee|Right_info->None)xsletuncomma_dotsxs=Common.map_filter(function|Left3e->Somee|Right3_info|Middle3_info->None)xsletunargarg=matchargwith|Arge->e|ArgRef_->failwith"Found a ArgRef"|ArgUnpack_->failwith"Found a ArgUnpack"letunargsxs=uncommaxs|>Common.partition_either(function|Arge->Lefte|ArgRef(_tok,e)->Right(e)|ArgUnpack(_tok,e)->Right(e))letunmodifiersclass_vars=matchclass_varswith|NoModifiers_->[]|VModifiersxs->List.mapunwrapxsletmap_parenf(lp,x,rp)=(lp,fx,rp)letmap_comma_listfxs=List.map(funx->matchxwith|Lefte->Left(fe)|Righttok->Righttok)xs(*****************************************************************************)(* Abstract line *)(*****************************************************************************)(* When we have extended the AST to add some info about the tokens,
* such as its line number in the file, we can not use anymore the
* ocaml '=' to compare AST elements. To overcome this problem, to be
* able to use again '=', we just have to get rid of all those extra
* information, to "abstract those line" (al) information.
*)letal_infox={xwithParse_info.token=Parse_info.Ab}(*****************************************************************************)(* Views *)(*****************************************************************************)(* examples:
* inline more static funcall in expr type or variable type
*)(*****************************************************************************)(* Helpers (could also be put in lib_parsing.ml) *)(*****************************************************************************)letstr_of_idente=matchewith|Namex->unwrapx|XhpName(xs,_tok)->":"^(Common.join":"xs)letinfo_of_idente=matchewith|(Name(_x,y))->y|(XhpName(_x,y))->yletstr_of_dname(DNamex)=unwrapxletinfo_of_dname(DName(_x,y))=yletinfo_of_qualified_ident=function|[]->raiseImpossible|(QIx)::_xs->info_of_identx|(QIToktok)::_xs->tokletinfo_of_namex=matchxwith|Selftok|Parenttok|LateStatictok->tok|XNamexs->(matchxswith|[]->raiseImpossible|x::_->(matchxwith|QIToktok->tok|QIid->info_of_identid))exceptionTodoNamespaceoftok(* todo? copy the one in cmf/uses_module_helpers.ml now? *)letstr_of_namex=matchxwith|XName[QIx]->str_of_identx|Selftok|Parenttok|LateStatictok->Parse_info.str_of_infotok|XNamequ->raise(TodoNamespace(info_of_qualified_identqu))letstr_of_name_namespacex=matchxwith|Selftok|Parenttok|LateStatictok->Parse_info.str_of_infotok|XNamexs->xs|>List.map(function|QITok_->"\\"|QIid->str_of_identid)|>Common.join""letname_of_class_namex=matchxwith|Hint(name,_targs)->name|_->raiseImpossibleletstr_of_class_namex=letname=name_of_class_namexinstr_of_namenameletident_of_class_namex=letname=name_of_class_namexinmatchnamewith|XName[QIx]->x|XNamequ->raise(TodoNamespace(info_of_qualified_identqu))|Self_tok|Parent_tok|LateStatic_tok->raiseImpossible