package atd

  1. Overview
  2. Docs

Source file expand.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
(*
  Monomorphization of type expressions.

  The goal is to inline each parametrized type definition as much as possible,
  allowing code generators to create more efficient code directly:

  type ('a, 'b) t = [ Foo of 'a | Bar of 'b ]
  type int_t = (int, int) t

  becomes:

  type int_t = _1
  type _1 = [ Foo of int | Bar of int ]

  A secondary goal is to factor out type subexpressions in order for
  the code generators to produce less code:

  type x = { x : int list }
  type y = { y : int list option }

  becomes:

  type x = { x : _1 }
  type y = { y : _2 }
  type _1 = int list   (* `int list' now occurs only once *)
  type _2 = _1 option


  By default, only parameterless type definitions are returned.
  The [keep_poly] option allows to return parametrized type definitions as
  well.

  Input:

  type 'a abs = abstract
  type int_abs = int abs
  type 'a tree = [ Leaf of 'a | Node of ('a tree * 'a tree) ]
  type t = int tree
  type x = [ Foo | Bar ] tree

  Output (pseudo-syntax where quoted strings indicate unique type identifiers):

  type "int abs" = int abs
  type int_abs = "int abs"

  type 'a tree = [ Leaf of 'a | Node of ('a tree * 'a tree) ]
    (* only if keep_poly = true *)

  type "int tree" = [ Leaf of int | Node of ("int tree" * "int tree") ]
  type t = "int tree"
  type "[ Foo | Bar ] tree" =
    [ Leaf of [ Foo | Bar ]
    | Node of ("[ Foo | Bar ] tree" * "[ Foo | Bar ] tree") ]
  type x = "[ Foo | Bar ] tree"

*)

open Import

open Ast

module S = Stdlib.Set.Make (String)
module M = Stdlib.Map.Make (String)


(*
  To support -o-name-overlap, we need to generate a few type annotations.
  But types generated by expansion like _1, _2, etc. are not actually
  written out in the interface or implementation, so they must be mapped
  back to the original polymorphic types for annotation purposes.

  This table contains the mappings. Its format is:
  key = generated type name
  value = (original type name,
           original number of parameters)

  For example, if we have the generated output:
    type 'a t = ...
    type _1 = int t
  Then the idea is, in the reader and writer functions, instead of using
  _1 in the annotation, we use _ t. The entry in original_types would be:
    ("_1", ("t", 1))

  (The alternate strategy of actually producing a definition for type _1
  aliasing int t in the implementation doesn't work, because the annotations
  will disagree with the interface in the case of recursive types.)
*)
type original_types = (string, string * int) Hashtbl.t


(*
  Format of the table:
  key = type name (without arguments)
  value = (order in the file,
           number of parameters,
           original annotations of the right-hand type expression,
           original type definition,
           rewritten type definition)

  Every entry has an original type definition except the predefined
  atoms (int, string, etc.) and newly-created type definitions
  (type _1 = ...).
*)
let init_table () =
  let seqnum = ref 0 in
  let tbl = Hashtbl.create 20 in
  List.iter (
    fun (k, n, opt_td) ->
      incr seqnum;
      Hashtbl.add tbl k (!seqnum, n, opt_td, None)
  ) Predef.list;
  seqnum, tbl


let rec mapvar_expr
    (f : string -> string) (x : Ast.type_expr) : Ast.type_expr =
  match x with
    Sum (loc, vl, a) ->
      Sum (loc, List.map (mapvar_variant f) vl, a)
  | Record (loc, fl, a) ->
      Record (loc, List.map (mapvar_field f) fl, a)
  | Tuple (loc, tl, a) ->
      Tuple (loc,
             List.map (fun (loc, x, a) -> (loc, mapvar_expr f x, a)) tl,
             a)
  | List (loc, t, a) ->
      List (loc, mapvar_expr f t, a)
  | Name (loc, (loc2, "list", [t]), a) ->
      Name (loc, (loc2, "list", [mapvar_expr f t]), a)

  | Option (loc, t, a) ->
      Option (loc, mapvar_expr f t, a)
  | Name (loc, (loc2, "option", [t]), a) ->
      Name (loc, (loc2, "option", [mapvar_expr f t]), a)

  | Nullable (loc, t, a) ->
      Nullable (loc, mapvar_expr f t, a)
  | Name (loc, (loc2, "nullable", [t]), a) ->
      Name (loc, (loc2, "nullable", [mapvar_expr f t]), a)

  | Shared (loc, t, a) ->
      Shared (loc, mapvar_expr f t, a)
  | Name (loc, (loc2, "shared", [t]), a) ->
      Name (loc, (loc2, "shared", [mapvar_expr f t]), a)

  | Wrap (loc, t, a) ->
      Wrap (loc, mapvar_expr f t, a)
  | Name (loc, (loc2, "wrap", [t]), a) ->
      Name (loc, (loc2, "wrap", [mapvar_expr f t]), a)

  | Tvar (loc, s) -> Tvar (loc, f s)

  | Name (loc, (loc2, k, args), a) ->
      Name (loc, (loc2, k, List.map (mapvar_expr f) args), a)

and mapvar_field f = function
    `Field (loc, k, t) -> `Field (loc, k, mapvar_expr f t)
  | `Inherit (loc, t) -> `Inherit (loc, mapvar_expr f t)

and mapvar_variant f = function
  | Variant (loc, k, opt_t) ->
      Variant (loc, k, (Option.map (mapvar_expr f) opt_t))
  | Inherit (loc, t) -> Inherit (loc, mapvar_expr f t)


let var_of_int i =
  let letter = i mod 26 in
  let number = i / 26 in
  let prefix = String.make 1 (Char.chr (letter + Char.code 'a')) in
  if number = 0 then prefix
  else prefix ^ string_of_int number

let vars_of_int n = List.init n var_of_int

let is_special s = String.length s > 0 && s.[0] = '@'


(*
  Standardize a type expression by numbering the type variables
  using the order in which they are encountered.

  input:

  (int, 'b, 'z) foo

  output:

  - new_name: "@(int, 'a, 'b) foo"
  - new_args: [ 'b; 'z ]
  - new_env: [ ('b, 'a); ('z, 'b) ]

  new_name and new_args constitute the type expression that replaces the
  original one:

  (int, 'b, 'z) foo   -->   ('b, 'z) "@(int, 'a, 'b) foo"


  new_env allows the substitution of the type variables of the original
  type expression into the type variables defined by the new type definition.
*)
let make_type_name loc orig_name args an =
  let tbl = Hashtbl.create 10 in
  let n = ref 0 in
  let mapping = ref [] in
  let assign_name s =
    try Hashtbl.find tbl s
    with Not_found ->
      let name = var_of_int !n in
      mapping := (s, name) :: !mapping;
      incr n;
      name
  in
  let normalized_args = List.map (mapvar_expr assign_name) args in
  let new_name =
    sprintf "@(%s)"
      (Print.string_of_type_name orig_name normalized_args an)
  in
  let mapping = List.rev !mapping in
  let new_args =
    List.map (fun (old_s, _) -> Tvar (loc, old_s)) mapping in
  let new_env =
    List.map (fun (old_s, new_s) -> old_s, Tvar (loc, new_s)) mapping
  in
  new_name, new_args, new_env

let is_abstract (x : type_expr) =
  match x with
    Name (_, (_, "abstract", _), _) -> true
  | _ -> false

let expr_of_lvalue loc name param annot =
  Name (loc, (loc, name, List.map (fun s -> Tvar (loc, s)) param), annot)


let is_cyclic lname t =
  match t with
    Name (_, (_, rname, _), _) -> lname = rname
  | _ -> false

let is_tvar = function
    Tvar _ -> true
  | _ -> false



let add_annot (x : type_expr) a : type_expr =
  Ast.map_annot (fun a0 -> Annot.merge (a @ a0)) x


let expand
    ?(keep_builtins = false) ?(keep_poly = false) (l : type_def list)
  : type_def list * original_types =

  let seqnum, tbl = init_table () in

  let original_types = Hashtbl.create 16 in

  let rec subst env (t : type_expr) : type_expr =
    match t with
      Sum (loc, vl, a) ->
        Sum (loc, List.map (subst_variant env) vl, a)
    | Record (loc, fl, a) ->
        Record (loc, List.map (subst_field env) fl, a)
    | Tuple (loc, tl, a) ->
        Tuple (loc,
               List.map (fun (loc, x, a) -> (loc, subst env x, a)) tl, a)

    | List (loc as loc2, t, a)
    | Name (loc, (loc2, "list", [t]), a) ->
        let t' = subst env t in
        if keep_builtins then
          Name (loc, (loc2, "list", [t']), a)
        else
          subst_type_name loc loc2 "list" [t'] a

    | Option (loc as loc2, t, a)
    | Name (loc, (loc2, "option", [t]), a) ->
        let t' = subst env t in
        if keep_builtins then
          Name (loc, (loc2, "option", [t']), a)
        else
          subst_type_name loc loc2 "option" [t'] a

    | Nullable (loc as loc2, t, a)
    | Name (loc, (loc2, "nullable", [t]), a) ->
        let t' = subst env t in
        if keep_builtins then
          Name (loc, (loc2, "nullable", [t']), a)
        else
          subst_type_name loc loc2 "nullable" [t'] a

    | Shared (loc as loc2, t, a)
    | Name (loc, (loc2, "shared", [t]), a) ->
        let t' = subst env t in
        if keep_builtins then
          Name (loc, (loc2, "shared", [t']), a)
        else
          subst_type_name loc loc2 "shared" [t'] a

    | Wrap (loc as loc2, t, a)
    | Name (loc, (loc2, "wrap", [t]), a) ->
        let t' = subst env t in
        if keep_builtins then
          Name (loc, (loc2, "wrap", [t']), a)
        else
          subst_type_name loc loc2 "wrap" [t'] a

    | Tvar (_, s) as x -> Option.value (List.assoc s env) ~default:x

    | Name (loc, (loc2, name, args), a) ->
        let args' = List.map (subst env) args in
        if List.for_all is_tvar args' then
          Name (loc, (loc2, name, args'), a)
        else
          subst_type_name loc loc2 name args' a

  and subst_type_name loc loc2 name args an =
    (*
      Reduce the number of arguments of the type by creating
      an intermediate type, e.g.:
      ('x, int) t   becomes   'x "('a, int) t"
      and the following type is created:
      type 'a "('a, int) t" = ...


      input:
      - type name with arguments expressed in the environment where the
        type expression was extracted
      - annotations for that type expression

      output:
      - equivalent type expression valid in the same environment

      side-effects:
      - creation of a type definition for the output type expression.
    *)
    let new_name, new_args, new_env = make_type_name loc2 name args an in
    let n_param = List.length new_env in
    if not (Hashtbl.mem tbl new_name) then
      create_type_def loc name args new_env new_name n_param an;
    (*
      Return new type name with new arguments.
      The annotation has been transferred to the right-hand
      expression of the new type definition.
    *)
    Name (loc, (loc2, new_name, new_args), [])


  and create_type_def loc orig_name orig_args env name n_param an0 =
    (*
      Create the type definition needed to support the new type name
      [name] expecting [n_param] parameters.

      The right-hand side of the definition is obtained by looking up the
      definition for type [orig_name]:

      type ('a, 'b) t = [ Foo of 'a | Bar of 'b ]
      type 'c it = (int, 'c) t

      output:

      type ('a, 'b) t = [ Foo of 'a | Bar of 'b ]
      type 'a _1 = [ Foo of int | Bar of 'a ]  (* new name = _1, n_param = 1 *)
      type 'c it = 'c _1
    *)
    incr seqnum;
    let i = !seqnum in

    (* Create entry in the table, indicating that we are working on it *)
    Hashtbl.add tbl name (i, n_param, None, None);

    Hashtbl.add original_types name (orig_name, List.length orig_args);

    (* Get the original type definition *)
    let (_, _, orig_opt_td, _) =
      try Hashtbl.find tbl orig_name
      with Not_found ->
        assert false (* All original type definitions must
                        have been put in the table initially *)
    in
    let ((_, _, _) as td') =
      match orig_opt_td with
        None ->
          assert false (* Original type definitions must all exist,
                          even for predefined types and abstract types. *)
      | Some (_, (k, pl, def_an), t) ->
          assert (k = orig_name);
          let new_params = vars_of_int n_param in
          let t = add_annot t an0 in
          let t = set_type_expr_loc loc t in

            (*
               First replace the type expression being specialized
               (orig_name, orig_args) by the equivalent expression
               in the new environment (variables 'a, 'b, ...)

               (int, 'b) foo  -->  (int, 'a) foo
            *)
          let args = List.map (subst env) orig_args in

            (*
              Then expand the expression into its definition,
              replacing each variable by the actual argument:

              original definition:

              type ('x, 'y) foo = [ Foo of 'x | Bar of 'y ]


              new definition:

              type 'a _1 = ...

              right-hand expression becomes:

              [ Foo of int | Bar of 'a ]

              using the following environment:

              'x -> int
              'y -> 'a

            *)
          let env = List.map2 (fun var value -> (var, value)) pl args in

          let t' =
            if is_abstract t then
                (*
                  e.g.: type 'a t = abstract
                  use 'a t and preserve "t"
                *)
              let t =
                expr_of_lvalue loc orig_name pl
                  (Ast.annot_of_type_expr t)
              in
              subst_only_args env t
            else
              let t' = subst env t in
              if is_cyclic name t' then
                subst_only_args env t
              else
                t'
          in
          (loc, (name, new_params, def_an), t')
    in
    Hashtbl.replace tbl name (i, n_param, None, Some td')

  and subst_field env = function
    | `Field (loc, k, t) -> `Field (loc, k, subst env t)
    | `Inherit (loc, t) -> `Inherit (loc, subst env t)

  and subst_variant env = function
      Variant (loc, k, opt_t) as x ->
        (match opt_t with
           None -> x
         | Some t -> Variant (loc, k, Some (subst env t))
        )
    | Inherit (loc, t) -> Inherit (loc, subst env t)

  and subst_only_args env = function
      List (loc, t, a)
    | Name (loc, (_, "list", [t]), a) ->
        List (loc, subst env t, a)

    | Option (loc, t, a)
    | Name (loc, (_, "option", [t]), a) ->
        Option (loc, subst env t, a)

    | Nullable (loc, t, a)
    | Name (loc, (_, "nullable", [t]), a) ->
        Nullable (loc, subst env t, a)

    | Shared (loc, t, a)
    | Name (loc, (_, "shared", [t]), a) ->
        Shared (loc, subst env t, a)

    | Wrap (loc, t, a)
    | Name (loc, (_, "wrap", [t]), a) ->
        Wrap (loc, subst env t, a)

    | Name (loc, (loc2, name, args), an) ->
        Name (loc, (loc2, name, List.map (subst env) args), an)

    | _ -> assert false
  in

  (* first pass: add all original definitions to the table *)
  List.iter (
    fun ((_, (k, pl, _), _) as td) ->
      incr seqnum;
      let i = !seqnum in
      let n = List.length pl in
      Hashtbl.add tbl k (i, n, Some td, None)
  ) l;

  (* second pass: perform substitutions and insert new definitions *)
  List.iter (
    fun ((loc, (k, pl, a), t) as td) ->
      if pl = [] || keep_poly then (
        let (i, n, _, _) =
          try Hashtbl.find tbl k
          with Not_found -> assert false
        in
        let t' = subst [] t in
        let td' = (loc, (k, pl, a), t') in
        Hashtbl.replace tbl k (i, n, Some td, Some td')
      )
  ) l;

  (* third pass: collect all parameterless definitions *)
  let l =
    Hashtbl.fold (
      fun _ (i, n, _, opt_td') l ->
        match opt_td' with
          None -> l
        | Some td' ->
            if n = 0 || keep_poly then (i, td') :: l
            else l
    ) tbl []
  in
  let l = List.sort (fun (i, _) (j, _) -> compare i j) l in
  (List.map snd l, original_types)



let replace_type_names (subst : string -> string) (t : type_expr) : type_expr =
  let rec replace (t : type_expr) : type_expr =
    match t with
      Sum (loc, vl, a) -> Sum (loc, List.map replace_variant vl, a)
    | Record (loc, fl, a) -> Record (loc, List.map replace_field fl, a)
    | Tuple (loc, tl, a) ->
        Tuple (loc, List.map (fun (loc, x, a) -> loc, replace x, a) tl, a)
    | List (loc, t, a) -> List (loc, replace t, a)
    | Option (loc, t, a) -> Option (loc, replace t, a)
    | Nullable (loc, t, a) -> Nullable (loc, replace t, a)
    | Shared (loc, t, a) -> Shared (loc, replace t, a)
    | Wrap (loc, t, a) -> Wrap (loc, replace t, a)
    | Tvar (_, _) as t -> t
    | Name (loc, (loc2, k, l), a) ->
        Name (loc, (loc2, subst k, List.map replace l), a)

  and replace_field = function
      `Field (loc, k, t) -> `Field (loc, k, replace t)
    | `Inherit (loc, t) -> `Inherit (loc, replace t)

  and replace_variant = function
      Variant (loc, k, opt_t) as x ->
        (match opt_t with
           None -> x
         | Some t -> Variant (loc, k, Some (replace t))
        )
    | Inherit (loc, t) -> Inherit (loc, replace t)
  in
  replace t

(* Prefer MD5 over Hashtbl.hash because it won't change. *)
let hex_hash_string s =
  Digest.string s
  |> Digest.to_hex
  |> fun s -> String.sub s 0 7

(*
   Remove punctuation and non-ascii symbols from a name and replace them
   with underscores. The original case is preserved.
   The result is of the form [A-Za-z][A-Za-z0-9_]+.

   Example:

     "@((@(bool wrap_) * type_) option)" -> "bool_wrap_type_option"

   The original name can contain ATD annotations. It would be nice to
   ignore them but it's not clear how. Ideally we want this:

        "@(string list <ocaml valid='fun l -> true'>)"
     -> "string_list"

   But we get this:

     "true_6a9832c"

   Since it's misleading, when we see a suspected annotation, we
   use just "x" followed by a hash of the original contents.
   The hash has the property of making the name stable i.e. it is unlikely
   to change when unrelated type definitions change.

     "x_6a9832c"
*)
let suggest_good_name =
  let rex = Re.Pcre.regexp "([^a-zA-Z0-9])+" in
  fun name_with_punct ->
    let components =
      Re.Pcre.split ~rex name_with_punct
      |> List.filter ((<>) "")
    in
    let full_name = String.concat "_" components in
    let hash = hex_hash_string full_name in
    let name =
      if String.contains name_with_punct '<' then
        (* Avoid misleading names to due ATD annotations embedded in the
           type name. See earlier comments. *)
        "x_" ^ hash
      else if List.length components > 5 then
        (* Avoid insanely long type names *)
        match List.rev components with
        | [] -> assert false
        | [_] -> assert false
        | main :: rev_details ->
            (* Place the hash after the main name rather than before because
               it often starts with a digit which would have to be prefixed
               by an extra letter so it can be a valid name. *)
            main ^ "_" ^ hash
      else
        (* A full name that's not too long and makes sense such as
           'int_bracket' for the type 'int bracket'. *)
        String.concat "_" components
    in
    (* Ensure the name starts with a letter. *)
    if name = "" then "x"
    else
      match name.[0] with
      | 'a'..'z' | 'A'..'Z' -> name
      | _ (* digit *) -> "x" ^ name

let standardize_type_names
    ~prefix ~original_types (defs : type_def list) : type_def list =
  let reserved_identifiers =
    List.map (fun (k, _, _) -> k) Predef.list
    @ List.filter_map (fun (_, (k, _, _), _) ->
      if is_special k then None
      else Some k
    ) defs
  in
  let name_registry =
    Unique_name.init
      ~reserved_identifiers
      ~reserved_prefixes:[]
      ~safe_prefix:""
  in
  (* The value v of the type is for extracting a good, short fallback name *)
  let new_id id =
    (* The leading underscore is used to identify generated type names
       in other places. *)
    Unique_name.translate
      name_registry
      ~preferred_translation:(prefix ^ suggest_good_name id)
      id
  in
  let replace_name k =
    if is_special k then
      let k' = new_id k in
      begin try
          let orig_info = Hashtbl.find original_types k in
          Hashtbl.remove original_types k;
          Hashtbl.add original_types k' orig_info
        with Not_found ->
          assert false (* Must have been added during expand *)
      end;
      k'
    else
      k
  in
  let defs =
    List.map (
      fun (loc, (k, pl, a), t) ->
        let k' = replace_name k in
        (loc, (k', pl, a), t)
    ) defs
  in
  let subst id =
    match Unique_name.translate_only name_registry id with
    | Some x -> x
    | None ->
        (* must have been defined as abstract *)
        id
  in
  List.map (fun (loc, x, t) -> (loc, x, replace_type_names subst t)) defs


let expand_module_body
    ?(prefix = "_") ?keep_builtins ?keep_poly ?(debug = false) l =
  let td_list = List.map (function (Type td) -> td) l in
  let (td_list, original_types) = expand ?keep_builtins ?keep_poly td_list in
  let td_list =
    if debug then td_list
    else standardize_type_names ~prefix ~original_types td_list
  in
  (List.map (fun td -> (Type td)) td_list, original_types)
OCaml

Innovation. Community. Security.