package pfff

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file token_views_context.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
(* Yoann Padioleau
 *
 * Copyright (C) 2014 Facebook
 * Copyright (C) 2002-2008 Yoann Padioleau
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License (GPL)
 * version 2 as published by the Free Software Foundation.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * file license.txt for more details.
 *)
open Common

open Parser_cpp
open Token_views_cpp

module TH = Token_helpers_cpp
module TV = Token_views_cpp

(*****************************************************************************)
(* Prelude *)
(*****************************************************************************)

(*****************************************************************************)
(* Argument vs Parameter *)
(*****************************************************************************)

let look_like_argument _tok_before xs =

  (* normalize for C++ *)
  let xs = xs +> List.map (function
    | Tok ({t=TAnd ii} as record) -> Tok ({record with t=TMul ii})
    | x -> x
  )
  in
  (* split by comma so can easily check if have stuff like '*xx'
   * that takes the full argument
   *)
  let xxs = split_comma xs in

  let aux1 xs =
    match xs with
    | [] -> false
    (* *xx    (note: actually can also be a function pointer decl) *)
    | [Tok{t=TMul _}; Tok{t=TIdent _}] -> true
    (* *(xx) *)
    | [Tok{t=TMul _}; Parens _] -> true
    (* TODO: xx * yy  and space = 1 between the 2 :) *)
    | _ -> false
  in
  
  let rec aux xs =
    match xs with
    | [] -> false
    (* a function call probably *)
    | Tok{t=TIdent _}::Parens _::_xs -> 
        (* todo? look_like_argument recursively in Parens || aux xs ? *)
        true
    (* if have = ... then must stop, could be default parameter of a method *)
    | Tok{t=TEq _}::_xs ->   false

    (* could be part of a type declaration *)
    | Tok {t=TOCro _}::Tok {t=TCCro _}::_xs ->   false
    | Tok {t=TOCro _}::Tok {t=(TInt _)}::Tok {t=TCCro _}::_xs -> false
    | Tok {t=TOCro _}::Tok {t=(TIdent _)}::Tok {t=TCCro _}::_xs -> false

    | x::xs ->
        (match x with
        | Tok {t=(TInt _ | TFloat _ | TChar _ | TString _) } -> true
        | Tok {t=(Ttrue _ | Tfalse _) } -> true
        | Tok {t=(Tthis _)} -> true
        | Tok {t=(Tnew _ )} -> true
        | Tok {t= tok} when TH.is_binary_operator_except_star tok -> true
        | Tok {t=(TInc _ | TDec _)} -> true
        | Tok {t = (TDot _ | TPtrOp _ | TPtrOpStar _ | TDotStar _)} -> true
        | Tok {t = (TOCro _)} -> true
        | Tok {t = (TWhy _ | TBang _)} -> true
        | _ -> aux xs
        )
  in
  (* todo? what if they contradict each other? if one say arg and
   * the other a parameter?
   *)
  xxs +> List.exists aux1 || aux xs

let look_like_typedef s =
  s =~ ".*_t$" ||
  s = "ulong" || s = "uchar" || s = "uvlong" || s = "vlong" || s = "uintptr"
  (* plan9, but actually some fp such as Paddr which is actually a macro *)
  (*  || s =~ "[A-Z][a-z].*$" *)
  (* with DECLARE_BOOST_TYPE, but have some false positives
   * when people do xx* indexPtr = const_cast<>(indexPtr);
   *)
   (* s =~ ".*Ptr$" *)
  (* || s = "StringPiece" *)



(* todo: pass1, look for const, etc
 * todo: pass2, look xx_t, xx&, xx*, xx**, see heuristics in typedef
 * 
 * Many patterns should mimic some heuristics in parsing_hack_typedef.ml
 *)
let look_like_parameter tok_before xs =

  (* normalize for C++ *)
  let xs = xs +> List.map (function
    | Tok ({t=TAnd ii} as record) -> Tok ({record with t=TMul ii})
    | x -> x
  )
  in
  let xxs = split_comma xs in

  let aux1 xs =
    match xs with
    | [] -> false
    (* xx_t *)
    | [Tok {t=TIdent (s, _)}] when look_like_typedef s -> true
    (* xx* *)
    | [Tok {t=TIdent _}; Tok {t=TMul _}] -> true
    (* xx** *)
    | [Tok {t=TIdent _}; Tok {t=TMul _}; Tok {t=TMul _}] -> true
    (* xx * y      could be multiplication (or xx & yy) ..
     * todo: could look if space around :) but because of the
     *  filtering of template and qualifier the no_space_between
     *  may not be completely accurate here. May need lower level access
     *  to the list of TCommentSpace and their position.
     *  hmm but can look at col?
     * 
     * C-s for parameter_decl in grammar to see that catch() is
     * a InParameter.
     *)
    | [Tok {t=TIdent _}; Tok {t=TMul _};Tok {t=TIdent _};] ->
      (match tok_before with 
      | Tok{t=(
            Tcatch _ 
          (* ugly: TIdent_Constructor interaction between past heuristics *)
          | TIdent_Constructor _
          | Toperator _
          (* no! | TIdent _ *)
        )} -> true 
      | _ -> false
      )

    | _ -> false
  in

  let rec aux xs =
    match xs with
    | [] -> false
    (* xx yy *)
    | Tok {t=TIdent _}::Tok{t=TIdent _}::_xs -> true
    | x::xs ->
        (match x with
        | Tok {t= tok} when TH.is_basic_type tok -> true
        | Tok {t = (Tconst _ | Tvolatile _)} -> true
        | Tok {t = (Tstruct _ | Tunion _ | Tenum _ | Tclass _)} -> true
        | _ -> aux xs
        )
  in
  xxs +> List.exists aux1 || aux xs


(*****************************************************************************)
(* Main heuristics *)
(*****************************************************************************)
(* 
 * Most of the important contexts are introduced via some '{' '}'. To
 * disambiguate is it often enough to just look at a few tokens before the
 * '{'.
 * 
 * Below we assume a view without: 
 * - comments 
 * - cpp directives 
 * 
 * todo 
 *  - handle more C++ (right now I did it mostly to be able to parse plan9)
 *  - harder now that have c++, can have function inside struct so need
 *    handle all together. 
 *  - change token but do not recurse in
 *    nested Braceised. maybe do via accumulator, don't use iter_token_brace?
 *  - need remove the qualifier as they make the sequence pattern matching
 *    more difficult?
 *)
let set_context_tag_multi groups = 
  let rec aux xs =
    match xs with
    | [] -> ()

  (* struct Foo {, also valid for class and union *)
  | Tok{t=(Tstruct _ | Tunion _ | Tclass _)}::Tok{t=TIdent(s,_)}
    ::(Braces(_t1, _body, _t2) as braces)::xs
    ->
      [braces] +> TV.iter_token_multi (fun tok ->
        tok.TV.where <- (TV.InClassStruct s)::tok.TV.where;
      );
      aux (braces::xs)

  | Tok{t=(Tstruct _ | Tunion _)}::(Braces(_t1, _body, _t2) as braces)::xs
    ->
      [braces] +> TV.iter_token_multi (fun tok ->
        tok.TV.where <- (TV.InClassStruct "__anon__")::tok.TV.where;
      );
      aux (braces::xs)

  (* = { } *)
  | Tok ({t=TEq _; _})::(Braces(_t1, _body, _t2) as braces)::xs -> 
      [braces] +> TV.iter_token_multi (fun tok -> 
        tok.TV.where <- InInitializer::tok.TV.where;
      );
      aux (braces::xs)

  (* enum xxx { InEnum *)
  | Tok{t=Tenum _}::Tok{t=TIdent(_,_)}::(Braces(_t1, _body, _t2) as braces)::xs
  | Tok{t=Tenum _}::(Braces(_t1, _body, _t2) as braces)::xs
    ->
      [braces] +> TV.iter_token_multi (fun tok ->
        tok.TV.where <- TV.InEnum::tok.TV.where;
      );
      aux (braces::xs)


  (* C++: class Foo : ... { *)
  | Tok{t=Tclass _ | Tstruct _}::Tok{t=TIdent(s,_)}
    ::Tok{t= TCol ii}::xs
    ->
      let (before, braces, after) =
        try 
          xs +> Common2.split_when (function
          | Braces _ -> true
          | _ -> false
          )
        with Not_found ->
          raise (UnclosedSymbol (spf "PB with split_when at %s"
                                    (Parse_info.string_of_info ii)))
      in
      aux before;
      [braces] +> TV.iter_token_multi (fun tok ->
        tok.TV.where <- (TV.InClassStruct s)::tok.TV.where;
      );
      aux [braces];
      aux after



  (* need to look what was before to help the look_like_xxx heuristics 
   *
   * The order of the 3 rules below is important. We must first try
   * look_like_argument which has less FP than look_like_parameter
  *)
  | x::(Parens(_t1, body, _t2) as parens)::xs 
    when look_like_argument x body ->
      (*msg_context t1.t (TV.InArgument); *)
      [parens] +> TV.iter_token_multi (fun tok ->
        tok.TV.where <- (TV.InArgument)::tok.TV.where;
      );
      (* todo? recurse on body? *)
      aux [x];
      aux (parens::xs)

  (* C++: special cases *)
  | (Tok{t=Toperator _} as tok1)::tok2::(Parens(_t1, body, _t2) as parens)::xs 
    when look_like_parameter tok1 body ->
      (* msg_context t1.t (TV.InParameter); *)
      [parens] +> TV.iter_token_multi (fun tok ->
        tok.TV.where <- (TV.InParameter)::tok.TV.where;
      );
      (* recurse on body? hmm if InParameter should not have nested 
       * stuff except when pass function pointer 
       *)
      aux [tok1;tok2];
      aux (parens::xs)


  | x::(Parens(_t1, body, _t2) as parens)::xs 
    when look_like_parameter x body ->
      (* msg_context t1.t (TV.InParameter); *)
      [parens] +> TV.iter_token_multi (fun tok ->
        tok.TV.where <- (TV.InParameter)::tok.TV.where;
      );
      (* recurse on body? hmm if InParameter should not have nested 
       * stuff except when pass function pointer 
       *)
      aux [x];
      aux (parens::xs)

  (* void xx() *)
  | Tok{t=typ}::Tok{t=TIdent _}::(Parens(_t1, _body, _t2) as parens)::xs 
    when TH.is_basic_type typ ->
      (* msg_context t1.t (TV.InParameter); *)
      [parens] +> TV.iter_token_multi (fun tok ->
        tok.TV.where <- (TV.InParameter)::tok.TV.where;
      );
      aux (parens::xs)


  | x::xs ->
      (match x with
      | Tok _t -> ()
      | Parens (_t1, xs, _t2)
      | Braces (_t1, xs, _t2)
      | Angle  (_t1, xs, _t2)
         ->
          aux xs
      );
      aux xs
  in
  (* sane initialization *)
  groups +> TV.iter_token_multi (fun tok ->
    tok.TV.where <- [TV.InTopLevel];
  );
  aux groups

(*****************************************************************************)
(* Main heuristics C++ *)
(*****************************************************************************)
(* 
 * assumes a view without: 
 * - template arguments, qualifiers, 
 * - comments and cpp directives 
 * - TODO public/protected/... ?
 *)
let set_context_tag_cplus groups =
  set_context_tag_multi groups
OCaml

Innovation. Community. Security.