package pfff

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file lib_parsing_php.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
(*s: lib_parsing_php.ml *)
(*s: Facebook copyright *)
(* Yoann Padioleau
 * 
 * Copyright (C) 2009-2011 Facebook
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * version 2.1 as published by the Free Software Foundation, with the
 * special exception on linking described in file license.txt.
 * 
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the file
 * license.txt for more details.
 *)
(*e: Facebook copyright *)
open Common

(*s: basic pfff module open and aliases *)
open Cst_php 

module Ast = Cst_php
module Flag = Flag_parsing
(*e: basic pfff module open and aliases *)
module V = Visitor_php 
module V2 = Map_php 

module PI = Parse_info

(*****************************************************************************)
(* Wrappers *)
(*****************************************************************************)
let pr2, _pr2_once = Common2.mk_pr2_wrappers Flag.verbose_parsing

(*****************************************************************************)
(* Filenames *)
(*****************************************************************************)

let is_php_script file = 
  Common.with_open_infile file (fun chan ->
    try 
      let l = input_line chan in
      l =~ "#!/usr/.*/php" ||
      l =~ "#!/bin/env php" ||
      l =~ "#!/usr/bin/env php"

    with End_of_file -> false
  )

let is_php_filename filename =
  (filename =~ ".*\\.php$") || (filename =~ ".*\\.phpt$") ||
  (* hotcrp uses this extension *)
  (filename =~ ".*\\.inc") ||
  (* hack uses this extension *)
  (* todo: can not include those files for now because
   * they conflict with pfff/data/php_stdlib and generate lots
   * of DUPE in codegraph
   * 
   * (filename =~ ".*\\.hhi") 
   *)
  false

let is_hhi_filename filename =
  (filename =~ ".*\\.hhi$") ||
  false

let is_php_filename_phar filename =
  (filename =~ ".*\\.phar$") ||
  false

let is_php_file filename =
  not (is_php_filename_phar filename) && (is_php_filename filename || is_php_script filename)

(* 
 * In command line tools like git or mercurial, many operations works 
 * when a file, a set of files, or even dirs are passed as parameters.
 * We want the same with pfff, hence this small helper function that
 * transform such files_or_dirs into a flag set of filenames.
 *)
let find_source_files_of_dir_or_files ?(verbose=false) ?(include_hack=false) xs = 
  Common.files_of_dir_or_files_no_vcs_nofilter xs 
  |> List.filter (fun filename ->
    (* note: there was a possible race here because between the time we
     * do the 'find' and the time we call is_php_file(), the file may have
     * disappeared (this happens for instance because of watchman).
     * Hence the Sys.file_exists guard.
     *)
    let valid = 
      (* note that there is still a race between the call to file_exists
       * and is_php_file, but this one is far shorter :)
       *)
      Sys.file_exists filename && (
        is_php_file filename ||
        (include_hack && is_hhi_filename filename)
      )
    in
    if not valid && verbose
    then pr2 ("not analyzing: " ^ filename);
    valid
   ) |> Common.sort

(*****************************************************************************)
(* Extract infos *)
(*****************************************************************************)
(*s: extract infos *)
let extract_info_visitor recursor = 
  let globals = ref [] in
  let hooks = { V.default_visitor with
    V.kinfo = (fun (_k, _) i -> 
      (* most of the time when you use ii_of_any, you want to use
       * functions like max_min_pos which works only on origin tokens
       * hence the filtering done here.
       * 
       * ugly: For PHP we use a fakeInfo only for generating a fake left
       * brace for abstract methods.
       *)
      match i.Parse_info.token with
      | Parse_info.OriginTok _ ->
        Common.push i globals
      | _ ->
        ()
    )
  } in
  begin
    let vout = V.mk_visitor hooks in
    recursor vout;
    List.rev !globals
  end
(*x: extract infos *)
let ii_of_any any = 
  extract_info_visitor (fun visitor -> visitor any)
(*e: extract infos *)

(*****************************************************************************)
(* Abstract position *)
(*****************************************************************************)
(*s: abstract infos *)
let abstract_position_visitor recursor = 
  let hooks = { V2.default_visitor with
    V2.kinfo = (fun (_k, _) i -> 
      { i with Parse_info.token = Parse_info.Ab }
    )
  } in
  begin
    let vout = V2.mk_visitor hooks in
    recursor vout;
  end
(*x: abstract infos *)
let abstract_position_info_any x = 
  abstract_position_visitor (fun visitor -> visitor.V2.vany x)
(*e: abstract infos *)

(*****************************************************************************)
(* Max min, range *)
(*****************************************************************************)
(*s: max min range *)
(*x: max min range *)

let (range_of_origin_ii: Cst_php.tok list -> (int * int) option) = 
 fun ii -> 
  let ii = List.filter Parse_info.is_origintok ii in
  try 
    let (min, max) = Parse_info.min_max_ii_by_pos ii in
    assert(PI.is_origintok max);
    assert(PI.is_origintok min);
    let strmax = PI.str_of_info max in
    Some 
      (PI.pos_of_info min, PI.pos_of_info max + String.length strmax)
  with _ -> 
    None
(*e: max min range *)

(*****************************************************************************)
(* Ast getters *)
(*****************************************************************************)
(*s: ast getters *)
let get_funcalls_any any = 
  let h = Hashtbl.create 101 in
  
  let hooks = { V.default_visitor with
    (* TODO if nested function ??? still wants to report ? *)
    V.kexpr = (fun (k,_vx) x ->
      match x with
      | Call (Id callname, _args) ->
          let str = Cst_php.str_of_name callname in
          Hashtbl.replace h str true;
          k x
      | _ -> k x
    );
  } 
  in
  let visitor = V.mk_visitor hooks in
  visitor any;
  Common.hashset_to_list h
(*x: ast getters *)
(*x: ast getters *)
let get_constant_strings_any any = 
  let h = Hashtbl.create 101 in

  let hooks = { V.default_visitor with
    V.kconstant = (fun (k,_vx) x ->
      match x with
      | String (str,_ii) ->
          Hashtbl.replace h str true;
      | _ -> k x
    );
    V.kencaps = (fun (k,_vx) x ->
      match x with
      | EncapsString (str, _ii) ->
          Hashtbl.replace h str true;
      | _ -> k x
    );
  }
  in
  (V.mk_visitor hooks) any;
  Common.hashset_to_list h
(*e: ast getters *)

let get_static_vars_any any =
  any |> V.do_visit_with_ref (fun aref -> { V.default_visitor with
    V.kstmt = (fun (k,_vx) x ->
      match x with
      | StaticVars (_tok, xs, _tok2) ->
          xs |> Ast.uncomma |> List.iter (fun (dname, _affect_opt) -> 
            Common.push dname aref
          );
      | _ -> 
          k x
    );
  })
  
(* todo? do last_stmt_is_a_return isomorphism ? *)
let get_returns_any any = 
  V.do_visit_with_ref (fun aref -> { V.default_visitor with
    V.kstmt = (fun (k,_vx) x ->
      match x with
      | Return (_tok1, Some e, _tok2) ->
          Common.push e aref
      | _ -> k x
    )}) any

let get_vars_any any = 
  V.do_visit_with_ref (fun aref -> { V.default_visitor with
    V.kexpr = (fun (k, _vx) x ->
      match x with
      | IdVar (dname, _scope) ->
          Common.push dname aref

      (* todo? sure ?? *)
      | Lambda (l_use, _def) ->
          l_use |> Common.do_option (fun (_tok, xs) ->
            xs |> Ast.unparen |> Ast.uncomma |> List.iter (function
            | LexicalVar (_is_ref, dname) ->
                Common.push dname aref
            )
          );
          k x
      | _ -> k x
    );
  }) any

(*****************************************************************************)
(* Ast adapters *)
(*****************************************************************************)

let top_statements_of_program ast = 
  ast |> List.map (function
  | StmtList xs -> xs
  | FinalDef _|NotParsedCorrectly _
  | ClassDef _| FuncDef _ | ConstantDef _ | TypeDef _
  | NamespaceDef _ | NamespaceBracketDef _ | NamespaceUse _
      -> []
  ) |> List.flatten  

(* We often do some analysis on "unit" of code like a function,
 * a method, or toplevel statements. One can not use the
 * 'toplevel' type for that because it contains Class and Interface which
 * are too coarse grained; the method granularity is better.
 * 
 * For instance it makes sense to have a CFG for a function, a method,
 * or toplevel statements but a CFG for a class does not make sense.
 *)
let functions_methods_or_topstms_of_program prog =
  let funcs = ref [] in
  let methods = ref [] in
  let toplevels = ref [] in

  let visitor = V.mk_visitor { V.default_visitor with
    V.kfunc_def = (fun (_k, _) def -> 
      match def.f_type with
      | FunctionRegular -> Common.push def funcs
      | MethodRegular | MethodAbstract -> Common.push def methods
      | FunctionLambda -> ()
    );
    V.ktop = (fun (k, _) top ->
      match top with
      | StmtList xs ->
          Common.push xs toplevels
      | _ ->
          k top
    );
  }
  in
  visitor (Program prog);
  !funcs, !methods, !toplevels


(* do some isomorphisms for declaration vs assignement *)
let get_vars_assignements_any recursor = 
  (* We want to group later assignement by variables, and 
   * so we want to use function like Common.group_by_xxx 
   * which requires to have identical key. Each dname occurence 
   * below has a different location and so we can use dname as 
   * key, but the name of the variable can be used, hence the use
   * of Ast.dname
   *)
  V.do_visit_with_ref (fun aref -> { V.default_visitor with
      V.kstmt = (fun (k,_) x ->
        match x with
        | StaticVars (_tok, xs, _tok2) ->
            xs |> Ast.uncomma |> List.iter (fun (dname, affect_opt) -> 
              let s = Ast.str_of_dname dname in
              affect_opt |> Common.do_option (fun (_tok, scalar) ->
                Common.push (s, scalar) aref;
              );
            );
        | _ -> 
            k x
      );

      V.kexpr = (fun (k,_vx) x ->
        match x with
        | Assign (lval, _, e) 
        | AssignOp (lval, _, e) ->
            (* the expression itself can contain assignements *)
            k x; 
            
            (* for now we handle only simple direct assignement to simple
             * variables *)
            (match lval with
            | IdVar (dname, _scope) ->
                let s = Ast.str_of_dname dname in
                Common.push (s, e) aref;
            | _ ->
                ()
            )
        (* todo? AssignRef AssignNew ? *)
        | _ -> 
            k x
      );
    }
  ) recursor |> Common.group_assoc_bykey_eff

(*e: lib_parsing_php.ml *)
OCaml

Innovation. Community. Security.