package pfff

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file unit_matcher.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
open Common
open OUnit

(*****************************************************************************)
(* Sgrep Fuzzy Unit tests *)
(*****************************************************************************)

(* See https://github.com/facebook/pfff/wiki/Sgrep *)

(* run also by sgrep -test *)
let sgrep_fuzzy_unittest ~ast_fuzzy_of_string =
  "sgrep features" >:: (fun () ->

    (* spec: pattern string, code string, should_match boolean *)
    let triples = [

      (* ------------ *)
      (* spacing *)  
      (* ------------ *)
   
      (* basic string match of course *)
      "foo(1,2);", "foo(1,2);", true;
      "foo(1,3);", "foo(1,2);", false;
      (* matches even when space or newline differs *)
      "foo(1,2);", "foo(1,     2);", true;
      "foo(1,2);", "foo(1,     
                        2);", true;
      (* matches even when have comments in the middle *)
      "foo(1,2);", "foo(1, /* foo */ 2);", true;

      (* ------------ *)
      (* metavariables *)
      (* ------------ *)

      (* for identifiers *)
      "class $X { int x; }", "class Foo { int x; }", true;
      (* for expressions *)
      "foo($X);",  "foo(1);", true;
      "foo($X);",  "foo(1+1);", true;
      (* for lvalues *)
      "$X->method();",  "this->method();", true;
(*TODO      "$X->method();"  ,  "this->foo()->method();", true; *)
(* this will work though: "->method();"  ,  "$this->foo()->method();", true; *)

      (* "linear" patterns, a la Prolog *)
      "$X & $X;", "(a | b) & (a | b);", true;
      "foo($X, $X);", "foo(a, a);", true;
      "foo($X, $X);", "foo(a, b);", false;

      (* many arguments metavariables *)
(*TODO      "foo($MANYARGS);", "foo(1,2,3);", true; *)

      (* metavariable on function name *)
      "$X(1,2);", "foo(1,2);", true;
      (* metavariable on class name *)
      "$X::foo();", "Ent::foo();", true;
      (* metavariable string for identifiers *)
(*TODO      "foo('X');", "foo('a_func');", true; *)
      (* metavariable on reference arguments *)
      "foo($X,$Y);", "foo(&a, b);", true;
      (* metavariable on class name reference *)
      "new $X(...);", "new $dyn();", true;
      "new $X(...);", "new self();", true;

      (* ------------ *)
      (* ... *)
      (* ------------ *)

      (* for stmts *)
      "class Foo { ... }", "class Foo { int x; }", true;

      (* '...' in funcall *)
      "foo(...);", "foo();", true;
      "foo(...);", "foo(1);", true;
      "foo(...);", "foo(1,2);", true;
      "foo($X,...);", "foo(1,2);", true;
      (* ... also match when there is no additional arguments *)
      "foo($X,...);", "foo(1);", true;
      (* TODO: foo(..., 3, ...), foo(1,2,3,4) *)

      (* '...' in arrays *)
      "foo($X, array(...));",  "foo(1, array(2, 3));", true;

      (* '...' in strings *)
(*TODO      "foo(\"...\");", "foo(\"a string\");", true; *)
(*TODO      "foo(\"...\");", "foo(\"a string\" . \"another string\");", true;*)

      (* '...' in new *)
      "new Foo(...);","new Foo(1);", true;
      "new Foo(...);","new Foo();", true;

      (* more complex expressions *)
      "strstr(...) == false;", "strstr(x)==false;", true;

      (* ------------ *)
      (* Misc isomorphisms *)
      (* ------------ *)
(*TODO      "new Foo(...);","new Foo;", true; *)

    ]
    in
    triples +> List.iter (fun (spattern, scode, should_match) ->
      let pattern = ast_fuzzy_of_string spattern in
      let code = ast_fuzzy_of_string scode in
      let matches_with_env = Matching_fuzzy.match_trees_trees pattern code in
      if should_match
      then
        assert_bool (spf "pattern:|%s| should match |%s" spattern scode)
          (matches_with_env <> [])
      else
        assert_bool (spf "pattern:|%s| should not match |%s" spattern scode)
          (matches_with_env = [])
    )
  )


(*****************************************************************************)
(* Sgrep generic Unit tests *)
(*****************************************************************************)

let sgrep_gen_unittest ~any_gen_of_string =
  "sgrep features" >:: (fun () ->

    (* spec: pattern string, code string, should_match boolean *)
    let triples = [
      (* right now any_gen_of_string use the Python sgrep_spatch_pattern
       * parser so the syntax below must be valid Python code  
       *)

      (* ------------ *)
      (* spacing *)  
      (* ------------ *)
   
      (* basic string-match of course *)
      "foo(1,2)", "foo(1,2)", true;
      "foo(1,3)", "foo(1,2)", false;

      (* matches even when space or newline differs *)
      "foo(1,2)", "foo(1,     2)", true;
      "foo(1,2)", "foo(1,     
                        2)", true;
      (* matches even when have comments in the middle *)
      "foo(1,2)", "foo(1, #foo
                       2)", true;

      (* ------------ *)
      (* metavariables *)
      (* ------------ *)

      (* for identifiers *)
      "import $X", "import Foo", true;
      "x.$X", "x.foo", true;

      (* for expressions *)
      "foo($X)",  "foo(1)", true;
      "foo($X)",  "foo(1+1)", true;

      (* for lvalues *)
      "$X.method()",  "foo.method()", true;
      "$X.method()"  ,  "foo.bar.method()", true;

      (* "linear" patterns, a la Prolog *)
      "$X & $X", "(a | b) & (a | b)", true;
      "foo($X, $X)", "foo(a, a)", true;
      "foo($X, $X)", "foo(a, b)", false;

      (* metavariable on function name *)
      "$X(1,2)", "foo(1,2)", true;
      (* metavariable on method call *)
      "$X.foo()", "Bar.foo()", true;
      (* should not match infix expressions though, even if those
       * are transformed internally in Calls *)
      "$X(...)", "a+b", false;

      (* metavariable for statements *)
      "if($X):
 $S
",
       "if(True):
  return 1
", true;

      (* metavariable string for identifiers *)
(*     "foo('X');", "foo('a_func');", true; *)
      (* many arguments metavariables *)
(*      "foo($MANYARGS);", "foo(1,2,3);", true; *)

      (* ------------ *)
      (* '...' *)
      (* ------------ *)

      (* '...' in funcall *)
      "foo(...)", "foo()", true;
      "foo(...)", "foo(1)", true;
      "foo(...)", "foo(1,2)", true;
      "foo($X,...)", "foo(1,2)", true;

      (* ... also match when there is no additional arguments *)
      "foo($X,...)", "foo(1)", true;
      "foo(..., 3, ...)", "foo(1,2,3,4)", true;

      (* ... in more complex expressions *)
      "strstr(...) == False", "strstr(x)==False", true;

      (* in strings *)
      "foo(\"...\")", "foo(\"this is a long string\")", true;

      (* for stmts *)
(*      "class Foo { ... }", "class Foo { int x; }", true; *)
      (* '...' in strings *)
(*      "foo(\"...\");", "foo(\"a string\" . \"another string\");", true;*)
      (* '...' in new *)
(*      "new Foo(...);","new Foo(1);", true;*)
(*      "new Foo(...);","new Foo();", true; *)
      (* '...' in arrays *)
(*      "foo($X, array(...));",  "foo(1, array(2, 3));", true; *)

      (* ------------ *)
      (* Misc isomorphisms *)
      (* ------------ *)

      (* regexp matching in strings *)
      "foo(\"=~/a+/\")", "foo(\"aaaa\")", true;
      "foo(\"=~/a+/\")", "foo(\"bbbb\")", false;
(*      "new Foo(...);","new Foo;", true; *)

    ]
    in
    triples +> List.iter (fun (spattern, scode, should_match) ->
     try 
      let pattern = any_gen_of_string spattern in
      let code    = any_gen_of_string scode in
      let matches_with_env = 
            Sgrep_generic.match_any_any pattern code in
      if should_match
      then
        assert_bool (spf "pattern:|%s| should match |%s" spattern scode)
          (matches_with_env <> [])
      else
        assert_bool (spf "pattern:|%s| should not match |%s" spattern scode)
          (matches_with_env = [])
     with
      Parsing.Parse_error -> 
              failwith (spf "problem parsing %s or %s" spattern scode)
    )
  )

(*****************************************************************************)
(* Spatch Unit tests *)
(*****************************************************************************)

(* See https://github.com/facebook/pfff/wiki/Spatch *)

(* run by spatch -test *)
let spatch_fuzzy_unittest ~ast_fuzzy_of_string ~parse_file = 
  "spatch regressions files" >:: (fun () ->

    let testdir = Filename.concat Config_pfff.path "tests/fuzzy/spatch/" in
    let expfiles = Common2.glob (testdir ^ "*.exp") in

    expfiles +> List.iter (fun expfile ->
      (* todo: this regexp should just be .*? but ocaml regexp do not
       * have the greedy feature :( Also note that expfile is a fullpath
       * so it can contains /, hence this ugly regexp
       *)
      if expfile =~ "\\([a-zA-Z_/]+\\)\\([0-9]*\\)\\.exp$" then begin
        let (prefix, variant) = Common.matched2 expfile in
        let spatchfile = prefix ^ ".spatch" in
        let srcfile = prefix ^ variant ^ ".fuzzy" in

        let pattern =
          Spatch_fuzzy.parse
            ~pattern_of_string:ast_fuzzy_of_string
            ~ii_of_pattern:Lib_ast_fuzzy.toks_of_trees
            spatchfile
        in
        let trees, toks = 
          parse_file srcfile
        in
        let was_modified = Spatch_fuzzy.spatch pattern trees in
        let resopt =
          if was_modified
          then Some (Lib_unparser.string_of_toks_using_transfo toks)
          else None
        in

        let file_res = 
          match resopt with
          | None -> srcfile
          | Some s ->
            let tmpfile = Common.new_temp_file "spatch_test" ".fuzzy" in
            Common.write_file ~file:tmpfile s;
            tmpfile
        in
        let diff = Common2.unix_diff file_res expfile in
        diff +> List.iter pr;
        if List.length diff > 1
        then assert_failure
          (spf "spatch %s on %s should have resulted in %s" 
              (Filename.basename spatchfile)
              (Filename.basename srcfile)
              (Filename.basename expfile))
      end 
      else failwith ("wrong format for expfile: " ^ expfile)
    )
  )


(*****************************************************************************)
(* Misc unit tests *)
(*****************************************************************************)
(*
let misc_unittest =
  "misc" >::: [
    "join_with_space" >:: (fun () ->
      assert_equal
        (Matching_report.join_with_space_if_needed ["$x";"=";"print";"FOO"])
        "$x=print FOO"
    )
  ]
*)

(*****************************************************************************)
(* Final suite *)
(*****************************************************************************)

(*
let unittest =
  "matcher" >::: (
    sgrep_unittest ++ spatch_unittest ++ [misc_unittest]
  )
*)
OCaml

Innovation. Community. Security.