Source file graph_code_js.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
open Common
module E = Entity_code
module G = Graph_code
module PI = Parse_info
open Ast_js
module Ast = Ast_js
type env = {
g: Graph_code.graph;
phase: phase;
current: Graph_code.node;
file_readable: Common.filename;
root: Common.dirname;
imports: (string, qualified_name ) Hashtbl.t;
locals: string list;
vars: (string, bool) Hashtbl.t;
exports: (Common.filename, string list) Hashtbl.t;
dupes: (Graph_code.node, bool) Hashtbl.t;
db: (Ast_js.qualified_name, Ast_js.var) Hashtbl.t;
asts: (Common.filename * Ast_js.program ) list ref;
log: string -> unit;
pr2_and_log: string -> unit;
lookup_fail: env -> Graph_code.node -> Parse_info.info -> unit;
}
and phase = Defs | Uses
let error_recovery = ref true
let _hmemo = Hashtbl.create 101
let parse file =
Common.memoized _hmemo file (fun () ->
try
let cst = Parse_js.parse_program file in
Ast_js_build.program cst
with
| Timeout -> raise Timeout
| Ast_js_build.TodoConstruct (s, tok)
| Ast_js_build.UnhandledConstruct (s, tok)
->
pr2 s;
pr2 (Parse_info.error_message_info tok);
if !error_recovery
then []
else failwith s
| exn ->
pr2 (spf "PARSE ERROR with %s, exn = %s" file (Common.exn_to_s exn));
if !error_recovery
then []
else raise exn
)
let error s tok =
let err = spf "%s: %s" (Parse_info.string_of_info tok) s in
failwith err
let s_of_n n =
Ast.str_of_name n
let pos_of_tok tok file =
{ (Parse_info.token_location_of_info tok) with PI.file }
let is_undefined_ok (src, _kindsrc) (dst, _kinddst) =
src =~ "^node_modules/.*" ||
dst =~ "^src/images/"
let mk_qualified_name readable s =
assert (not (readable =~ "^\\./"));
let str =
try Filename.chop_extension readable
with Invalid_argument _ ->
failwith (spf "readable filename without any extension: %s" readable)
in
str ^ "." ^ s
let qualified_name env name =
(let s = s_of_n name in
if Hashtbl.mem env.imports s
then Hashtbl.find env.imports s
else s
)|> (fun s -> assert (not (s =~ "^\\./")); s)
let is_local env n =
let s = s_of_n n in
List.mem s env.locals || Hashtbl.mem env.vars s
let add_locals env vs =
let locals = vs |> Common.map_filter (fun v ->
let s = s_of_n v.v_name in
match v.v_kind with
| Let | Const -> Some s
| Var ->
Hashtbl.replace env.vars s true;
None
) in
{ env with locals = locals @ env.locals }
let kind_of_expr v_kind e =
match e with
| Fun _ -> E.Function
| Class _ -> E.Class
| Obj _ -> E.Class
| _ ->
if v_kind = Const
then E.Constant
else E.Global
let add_node_and_edge_if_defs_mode env (name, kind) =
let str = s_of_n name in
let str' =
match env.current with
| (readable, E.File) -> mk_qualified_name readable str
| (s, _) -> s ^ "." ^ str
in
let node = (str', kind) in
if env.phase = Defs then begin
match () with
| _ when Hashtbl.mem env.dupes env.current ->
Hashtbl.replace env.dupes node true
| _ when G.has_node node env.g ->
env.pr2_and_log (spf "DUPE entity: %s" (G.string_of_node node));
let orig_file = G.file_of_node node env.g in
env.log (spf " orig = %s" orig_file);
env.log (spf " dupe = %s" env.file_readable);
Hashtbl.replace env.dupes node true;
| _ ->
try
let pos = pos_of_tok (snd name) env.file_readable in
let nodeinfo = { Graph_code. pos; typ = None; props = []; } in
env.g |> G.add_node node;
env.g |> G.add_edge (env.current, node) G.Has;
env.g |> G.add_nodeinfo node nodeinfo;
with Not_found ->
error ("Not_found:" ^ str) (snd name)
end;
if Hashtbl.mem env.dupes node
then env
else { env with current = node }
let add_use_edge env (name, kind) =
let s = qualified_name env name in
let src = env.current in
let dst = (s, kind) in
let loc = snd name in
match () with
| _ when Hashtbl.mem env.dupes src || Hashtbl.mem env.dupes dst ->
env.pr2_and_log (spf "skipping edge (%s -> %s), one of it is a dupe"
(G.string_of_node src) (G.string_of_node dst));
| _ when not (G.has_node src env.g) ->
error (spf "SRC FAIL: %s (-> %s)"
(G.string_of_node src) (G.string_of_node dst)) loc
| _ when G.has_node dst env.g ->
G.add_edge (src, dst) G.Use env.g;
| _ -> env.lookup_fail env dst loc
let add_use_edge_candidates env (name, kind) scope =
let kind =
let s = qualified_name env name in
let dst = (s, kind) in
if G.has_node dst env.g
then kind
else
let candidates = [E.Function; E.Class; E.Constant; E.Global] in
let valids = candidates |> List.filter (fun k ->
G.has_node (s, k) env.g) in
(match valids with
| [x] -> x
| _ -> kind
)
in
add_use_edge env (name, kind);
let s = qualified_name env name in
scope := Global s;
()
let rec extract_defs_uses env ast =
if env.phase = Defs then begin
let dir = Common2.dirname env.file_readable in
G.create_intermediate_directories_if_not_present env.g dir;
let node = (env.file_readable, E.File) in
env.g |> G.add_node node;
env.g |> G.add_edge ((dir, E.Dir), node) G.Has;
end;
let env = { env with current = (env.file_readable, E.File); } in
toplevels_entities_adjust_imports env ast;
toplevels env ast
and toplevels_entities_adjust_imports env xs =
xs |> List.iter (function
| M _ | S _ -> ()
| V v ->
let str = s_of_n v.v_name in
Hashtbl.replace env.imports str
(mk_qualified_name env.file_readable str);
)
and toplevel env x =
match x with
| V {v_name; v_kind; v_init; v_resolved} ->
name_expr env v_name v_kind v_init v_resolved
| S (tok, st) ->
let kind = E.TopStmts in
let s = spf "__top__%d:%d"
(Parse_info.line_of_info tok) (Parse_info.col_of_info tok) in
let name = s, tok in
let env = add_node_and_edge_if_defs_mode env (name, kind) in
if env.phase = Uses
then stmt env st
| M x -> module_directive env x
and module_directive env x =
match x with
| Import (name1, name2, (file, tok)) ->
if env.phase = Uses then begin
let str1 = s_of_n name1 in
let str2 = s_of_n name2 in
let path_opt = Module_path_js.resolve_path
~root:env.root
~pwd:(Filename.dirname env.file_readable)
file in
let readable =
match path_opt with
| None ->
env.pr2_and_log (spf "could not resolve path %s at %s" file
(Parse_info.string_of_info tok));
spf "NOTFOUND-|%s|.js" file
| Some fullpath -> Common.readable env.root fullpath
in
Hashtbl.replace env.imports str2 (mk_qualified_name readable str1)
end
| Export (name) ->
if env.phase = Defs then begin
let exports =
try
Hashtbl.find env.exports env.file_readable
with Not_found -> []
in
let str = s_of_n name in
Hashtbl.replace env.exports env.file_readable (str::exports)
end
| ModuleAlias (name, _fileTODO) ->
let s = s_of_n name in
Hashtbl.replace env.vars s true;
| ImportCss (_file) -> ()
| ImportEffect (_file) -> ()
and toplevels env xs = List.iter (toplevel env) xs
and name_expr env name v_kind e v_resolved =
let kind = kind_of_expr v_kind e in
let env = add_node_and_edge_if_defs_mode env (name, kind) in
if env.phase = Uses
then begin
expr env e;
let (qualified, _kind) = env.current in
v_resolved := Global qualified;
Hashtbl.add env.db qualified
{ v_name = name; v_kind; v_init = e; v_resolved }
end
and stmt env = function
| VarDecl v ->
let env = add_locals env [v] in
expr env v.v_init
| Block xs -> stmts env xs
| ExprStmt e -> expr env e
| If (e, st1, st2) ->
expr env e;
stmt env st1;
stmt env st2
| Do (st, e) ->
stmt env st;
expr env e;
| While (e, st) ->
expr env e;
stmt env st
| For (header, st) ->
let env = for_header env header in
stmt env st
| Switch (e, xs) ->
expr env e;
cases env xs
| Continue lopt ->
Common.opt (label env) lopt
| Break lopt ->
Common.opt (label env) lopt
| Return e ->
expr env e
| Label (l, st) ->
label env l;
stmt env st
| Throw e ->
expr env e
| Try (st1, catchopt, finalopt) ->
stmt env st1;
catchopt |> Common.opt (fun (n, st) ->
let v = { v_name = n; v_kind = Let; v_init = Nop;
v_resolved = ref Local } in
let env = add_locals env [v] in
stmt env st
);
finalopt |> Common.opt (fun (st) -> stmt env st);
and for_header env = function
| ForClassic (e1, e2, e3) ->
let env =
match e1 with
| Left vars ->
vars |> List.iter (fun v -> stmt env (VarDecl v));
add_locals env vars
| Right e ->
expr env e;
env
in
expr env e2;
expr env e3;
env
| ForIn (e1, e2) ->
let env =
match e1 with
| Left var ->
[var] |> List.iter (fun v -> stmt env (VarDecl v));
add_locals env [var]
| Right e ->
expr env e;
env
in
expr env e2;
env
and label _env _lbl =
()
and cases env xs = List.iter (case env) xs
and case env = function
| Case (e, st) ->
expr env e;
stmt env st
| Default st ->
stmt env st
and stmts env xs =
let rec aux env = function
| [] -> ()
| x::xs ->
stmt env x;
let env =
match x with
| VarDecl v -> add_locals env [v]
| _ -> env
in
aux env xs
in
aux env xs
and expr env e =
match e with
| Bool _ | Num _ | String _ | Regexp _ -> ()
| Id (n, scope) ->
if not (is_local env n)
then
add_use_edge_candidates env (n, E.Global) scope;
| IdSpecial _ -> ()
| Nop -> ()
| Assign (e1, e2) ->
expr env e1;
expr env e2
| Obj o ->
obj_ env o
| Arr xs ->
List.iter (expr env) xs
| Class (c, nopt) ->
let env =
match nopt with
| None -> env
| Some n ->
let v = { v_name = n; v_kind = Let; v_init = Nop;
v_resolved = ref Local}
in
add_locals env [v]
in
class_ env c
| ObjAccess (e, prop) ->
(match e with
| Id (n, scope) when not (is_local env n) ->
add_use_edge_candidates env (n, E.Class) scope
| _ ->
expr env e
);
property_name env prop
| ArrAccess (e1, e2) ->
(match e1 with
| Id (n, scope) when not (is_local env n) ->
add_use_edge_candidates env (n, E.Class) scope
| _ ->
expr env e1
);
expr env e2
| Fun (f, nopt) ->
let env =
match nopt with
| None -> env
| Some n ->
let v = { v_name = n; v_kind = Let; v_init = Nop;
v_resolved = ref Local}
in
add_locals env [v]
in
fun_ env f
| Apply (e, es) ->
(match e with
| Id (n, scope) when not (is_local env n) ->
add_use_edge_candidates env (n, E.Function) scope
| IdSpecial (special, _tok) ->
(match special, es with
| New, _ -> ()
| _ -> ()
)
| _ ->
expr env e
);
List.iter (expr env) es
| Conditional (e1, e2, e3) ->
List.iter (expr env) [e1;e2;e3]
and obj_ env xs =
List.iter (property env) xs
and class_ env c =
Common.opt (expr env) c.c_extends;
List.iter (property env) c.c_body
and property env = function
| Field (pname, _props, e) ->
property_name env pname;
expr env e
| FieldSpread e ->
expr env e
and property_name env = function
| PN _n2 -> ()
| PN_Computed e ->
expr env e
and fun_ env f =
parameters env f.f_params;
let params = f.f_params |> List.map (fun p -> s_of_n p.p_name) in
let env = { env with
locals = params @ env.locals;
vars = Hashtbl.copy env.vars;
} in
stmt env f.f_body
and parameters env xs = List.iter (parameter env) xs
and parameter env p =
Common.opt (expr env) p.p_default
let build_gen ?(verbose=false) root files =
let g = G.create () in
G.create_initial_hierarchy g;
let hstat_lookup_failures = Hashtbl.create 101 in
let chan = open_out (Filename.concat root "pfff.log") in
let env = {
g;
phase = Defs;
current = G.pb;
file_readable = "__filled_later__";
root;
imports = Hashtbl.create 0;
locals = [];
vars = Hashtbl.create 0;
exports = Hashtbl.create 101;
dupes = Hashtbl.create 101;
db = Hashtbl.create 101;
asts = ref [];
log = (fun s -> output_string chan (s ^ "\n"); flush chan;);
pr2_and_log = (fun s ->
if verbose then pr2 s;
output_string chan (s ^ "\n"); flush chan;
);
lookup_fail = (fun env dst loc ->
let src = env.current in
let fprinter =
if not verbose || is_undefined_ok src dst
then env.log
else env.pr2_and_log
in
fprinter (spf "PB: lookup_fail on %s (in %s, at %s)"
(G.string_of_node dst) (G.string_of_node src)
(Parse_info.string_of_info loc));
Hashtbl.add hstat_lookup_failures dst true;
);
} in
env.pr2_and_log "\nstep1: extract defs";
(Stdlib_js.path_stdlib::files) |> Console.progress ~show:verbose (fun k ->
List.iter (fun file ->
k();
let ast = parse file in
let file_readable =
if file = Stdlib_js.path_stdlib
then "Stdlib.js"
else Common.readable ~root file
in
extract_defs_uses { env with
phase = Defs; file_readable; imports = Hashtbl.create 13;
} ast
));
let default_import =
let ast = parse Stdlib_js.path_stdlib in
let env = { env with phase = Uses; file_readable = "Stdlib.js";
locals = []; imports = Hashtbl.create 13; } in
toplevels_entities_adjust_imports env ast;
env.imports
in
env.pr2_and_log "\nstep2: extract uses";
files |> Console.progress ~show:verbose (fun k ->
List.iter (fun file ->
k();
let ast = parse file in
let file_readable = Common.readable ~root file in
extract_defs_uses { env with
phase = Uses; file_readable;
locals = []; imports = Hashtbl.copy default_import;
} ast;
Common.push (file_readable, ast) env.asts;
));
env.pr2_and_log "\nstep3: adjusting";
G.remove_empty_nodes g [G.not_found; G.dupe; G.pb];
let xs = Common2.hkeys hstat_lookup_failures in
let counts =
xs |> List.map (fun (x)->
G.string_of_node x,
List.length (Hashtbl.find_all hstat_lookup_failures x))
|> Common.sort_by_val_highfirst
|> Common.take_safe 20
in
pr2 "Top lookup failures per modules";
counts |> List.iter (fun (s, n) -> pr2 (spf "%-30s = %d" s n));
g, env.db, !(env.asts)
let build ?verbose root files =
let (g, _, _) = build_gen ?verbose root files in
g
let build_for_ai root files =
let (_, db, asts) = build_gen ~verbose:false root files in
db, asts