package pfff

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file parsing_hacks_js.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
(* Yoann Padioleau
 *
 * Copyright (C) 2010, 2013 Facebook
 * Copyright (C) 2019 Yoann Padioleau
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * version 2.1 as published by the Free Software Foundation, with the
 * special exception on linking described in file license.txt.
 * 
 * This library is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the file
 * license.txt for more details.
 *)
open Common 

module Flag = Flag_parsing
module PI = Parse_info

module Ast = Cst_js
module T = Parser_js
module TH   = Token_helpers_js
module F = Ast_fuzzy

(*****************************************************************************)
(* Prelude *)
(*****************************************************************************)
(* The goal for this module is to retag tokens 
 * (e.g., a T_LPAREN in T_LPAREN_ARROW)
 * or insert tokens (e.g., T_VIRTUAL_SEMICOLON) to
 * help the grammar remains simple and unambiguous. See 
 * lang_cpp/parsing/parsing_hacks.ml for more information about
 * this technique.
 *
 * This module inserts fake virtual semicolons, which is known as
 * Automatic Semicolon Insertion, or ASI for short.
 * Those semicolons can be ommitted by the user (but really should not).
 * ASI works in two steps:
 *  - certain tokens can not be followed by a newline (e.g., continue)
 *    and we detect those tokens in this file.
 *  - we also insert semicolons during error recovery in parser_js.ml. After
 *    all that was what the spec says.
 * Note that we need both techniques. See parse_js.ml comment for 
 * the limitations of using just the second technique.
 *  
 * reference:
 *  -http://www.bradoncode.com/blog/2015/08/26/javascript-semi-colon-insertion
 *  -http://www.ecma-international.org/ecma-262/6.0/index.html#sec-automatic-semicolon-insertion
 *)

(*****************************************************************************)
(* Helpers *)
(*****************************************************************************)

(* obsolete *)
let is_toplevel_keyword = function
 | T.T_IMPORT _ | T.T_EXPORT _ 
 | T.T_VAR _ | T.T_LET _ | T.T_CONST _
 | T.T_FUNCTION _
 -> true
 | _ -> false

(* obsolete *)
let rparens_of_if toks = 
  let toks = Common.exclude TH.is_comment toks in

  let stack = ref [] in

  let rparens_if = ref [] in

  toks +> Common2.iter_with_previous_opt (fun prev x -> 
    (match x with
    | T.T_LPAREN _ -> 
        Common.push prev stack;
    | T.T_RPAREN info ->
        if !stack <> [] then begin
        let top = Common2.pop2 stack in
        (match top with
        | Some (T.T_IF _) -> 
            Common.push info rparens_if
        | _ ->
            ()
        )
        end
    | _ -> ()
    )
  );
  !rparens_if

(*****************************************************************************)
(* Entry point *)
(*****************************************************************************)

(* retagging:
 *  - '(' when part of an arrow expression
 *  - less: '<' when part of a polymorphic type (aka generic)
 *  - less: { when part of a pattern before an assignment
 *)
let fix_tokens toks = 
 try 
  let trees = Parse_fuzzy.mk_trees { Parse_fuzzy.
     tokf = TH.info_of_tok;
     kind = TH.token_kind_of_tok;
  } toks 
  in
  let retag_lparen = Hashtbl.create 101 in
  let retag_keywords = Hashtbl.create 101 in

  (* visit and tag *)
  let visitor = Ast_fuzzy.mk_visitor { Ast_fuzzy.default_visitor with
    Ast_fuzzy.ktrees = (fun (k, _) xs ->
      (match xs with
      | F.Parens (i1, _, _)::F.Tok ("=>",_)::_res ->
          Hashtbl.add retag_lparen i1 true
      (* TODO: also handle typed arrows! *)
      | F.Tok("import", i1)::F.Parens _::_res ->
          Hashtbl.add retag_keywords i1 true
      | _ -> ()
      );
      k xs
    )
  }
  in
  visitor trees;

  (* use the tagged information and transform tokens *)
  toks |> List.map (function
    | T.T_LPAREN info when Hashtbl.mem retag_lparen info ->
      T.T_LPAREN_ARROW (info)
    | T.T_IMPORT info when Hashtbl.mem retag_keywords info ->
      T.T_IDENTIFIER (PI.str_of_info info, info)
    | x -> x
  )

  with Parse_fuzzy.Unclosed (msg, info) ->
   if !Flag.error_recovery
   then toks
   else raise (Lexer_js.Lexical_error (msg, info))


(*****************************************************************************)
(* ASI (Automatic Semicolon Insertion) part 1 *)
(*****************************************************************************)

let fix_tokens_ASI xs =

  let res = ref [] in
  let rec aux prev f xs = 
    match xs with
    | [] -> ()
    | e::l ->
        if TH.is_comment e
        then begin 
          Common.push e res;
          aux prev f l
        end else begin
          f prev e;
          aux e f l
        end
  in

  let push_sc_before_x x = 
     let fake = Ast.fakeInfoAttach (TH.info_of_tok x) in
     Common.push (T.T_VIRTUAL_SEMICOLON fake) res; 
  in

  let f = (fun prev x ->
     (match prev, x with
     | (T.T_CONTINUE _ | T.T_BREAK _), _
        when TH.line_of_tok x <> TH.line_of_tok prev ->
        push_sc_before_x x;
     (* very conservative; should be any last(left_hand_side_expression) 
      * but for that better to rely on ASI via parse-error recovery;
      * no ambiguity like for continue because 
      *    if(true) x
      *    ++y;
      * is not valid.
      *)
     | (T.T_IDENTIFIER _ | T.T_FALSE _ | T.T_TRUE _), (T.T_INCR _ | T.T_DECR _)
        when TH.line_of_tok x <> TH.line_of_tok prev ->
        push_sc_before_x x;
     | _ -> ()
     );
     Common.push x res;
  ) in

  (* obsolete *)
  let rparens_if = rparens_of_if xs in
  let hrparens_if = Common.hashset_of_list rparens_if in

  (* history: this had too many false positives, which forced
   * to rewrite the grammar to add extra virtual semicolons which
   * then make the whole thing worse
   *)
  let _fobsolete = (fun prev x ->
    match prev, x with
    (* { } or ; } TODO: source of many issues *)
    | (T.T_LCURLY _ | T.T_SEMICOLON _), 
      T.T_RCURLY _ ->
        Common.push x res;
    (* <not } or ;> } *)
    | _, 
      T.T_RCURLY _ ->
        push_sc_before_x x;
        Common.push x res;
        
    (* ; EOF *)
    | (T.T_SEMICOLON _),
       T.EOF _ ->
        Common.push x res;
    (* <not ;> EOF *)
    | _, T.EOF _ ->
        push_sc_before_x x;
        Common.push x res;

    (* } 
     * <keyword>
     *)
    | T.T_RCURLY _, 
      (T.T_IDENTIFIER _
       | T.T_IF _ | T.T_SWITCH _ | T.T_FOR _
       | T.T_VAR _  | T.T_FUNCTION _ | T.T_LET _ | T.T_CONST _
       | T.T_RETURN _
       | T.T_BREAK _ | T.T_CONTINUE _
       (* todo: sure? *)
       | T.T_THIS _ | T.T_NEW _
      ) when TH.line_of_tok x <> TH.line_of_tok prev ->
        push_sc_before_x x;
        Common.push x res

    (* )
     * <keyword>
     *)
    (* this is valid only if the RPAREN is not the closing paren of an if*)
    | T.T_RPAREN info, 
      (T.T_VAR _ | T.T_IF _ | T.T_THIS _ | T.T_FOR _ | T.T_RETURN _ |
       T.T_IDENTIFIER _ | T.T_CONTINUE _ 
      ) when TH.line_of_tok x <> TH.line_of_tok prev 
             && not (Hashtbl.mem hrparens_if info) ->
        push_sc_before_x x;
        Common.push x res;


    (* ]
     * <keyword> 
     *)
    | T.T_RBRACKET _, 
      (T.T_FOR _ | T.T_IF _ | T.T_VAR _ | T.T_IDENTIFIER _)
      when TH.line_of_tok x <> TH.line_of_tok prev ->
        push_sc_before_x x;
        Common.push x res;

    (* <literal> 
     * <keyword> 
     *)
    | (T.T_IDENTIFIER _ 
        | T.T_NULL _ | T.T_STRING _ | T.T_REGEX _
        | T.T_FALSE _ | T.T_TRUE _
      ), 
       (T.T_VAR _ | T.T_IDENTIFIER _ | T.T_IF _ | T.T_THIS _ |
        T.T_RETURN _ | T.T_BREAK _ | T.T_ELSE _
      ) when TH.line_of_tok x <> TH.line_of_tok prev ->
        push_sc_before_x x;
        Common.push x res;

    (* } or ; or , or =
     * <keyword> col 0
     *)
    | (T.T_RCURLY _ | T.T_SEMICOLON _ | T.T_COMMA _ | T.T_ASSIGN _),
      _ 
      when is_toplevel_keyword x &&
       TH.line_of_tok x <> TH.line_of_tok prev && TH.col_of_tok x = 0
      ->
       Common.push x res;

    (* <no ; or }>
     * <keyword> col 0
     *)
    | _, _
      when is_toplevel_keyword x &&
       TH.line_of_tok x <> TH.line_of_tok prev && TH.col_of_tok x = 0
      ->
       push_sc_before_x x;
       Common.push x res;


    (* else *)
    | _, _ ->        
        Common.push x res;
  )
  in
  match xs with
  | [] -> []
  | x::_ ->
      let sentinel = 
        let fake = Ast.fakeInfoAttach (TH.info_of_tok x) in
        (T.T_SEMICOLON fake)
      in
      aux sentinel f xs;
      List.rev !res
OCaml

Innovation. Community. Security.