package ppx_custom_printf

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file ppx_custom_printf.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
open Base
open Ppxlib
open Ast_builder.Default

(* returns the index of the conversion spec (unless the end of string is reached) *)
let rec skip_over_format_flags fmt i =
  if i >= String.length fmt
  then `Eoi
  else match fmt.[i] with
  | '*' | '#' | '-' | ' ' | '+' | '_' | '0'..'9' | '.' ->
    skip_over_format_flags fmt (i + 1)
  | _ -> `Ok i

(* doesn't check to make sure the format string is well-formed *)
(* Formats with subformats are skipped for the following reasons:

   One is that they are hard to understand and not often used.

   Another is that subformats like "%(%{Module}%)" won't work, since it
   is impossible to produce a format of type [(Module.t -> 'a,...) format].
*)
let has_subformats (fmt:string) =
  let lim = String.length fmt - 1 in
  let rec loop i =
    if i > lim
    then false
    else
      if Char.equal fmt.[i] '%' then
        match skip_over_format_flags fmt (i + 1) with
        | `Eoi -> false
        | `Ok i ->
          match fmt.[i] with
          | '(' | ')' | '}' -> true
          | _ -> loop (i + 1)
      else loop (i + 1)
  in
  loop 0

(* returns a list of strings where even indexed elements are parts of the format string
   that the preprocessor won't touch and odd indexed elements are the contents of %{...}
   specifications. *)
let explode ~loc (s:string) =
  let len = String.length s in
  (* for cases where we can't parse the string with custom format specifiers, consider
     the string as a regular format string *)
  let as_normal_format_string = [s] in
  if has_subformats s
  then as_normal_format_string
  else
    let sub from to_ = String.sub s ~pos:from ~len:(to_ - from) in
    let rec loop acc from to_ =
      assert (List.length acc % 2 = 0);
      if to_ >= len
      then List.rev (
        if from >= len
        then acc
        else sub from len :: acc
      )
      else
      if Char.(<>) s.[to_] '%'
      then loop acc from (to_ + 1)
      else
        match skip_over_format_flags s (to_ + 1) with
        | `Eoi -> as_normal_format_string
        | `Ok i ->
          match s.[i] with
          | '[' ->
            (* Scan char sets are not allowed by printf-like functions. So we might as
               well disallow them at compile-time so that we can reuse them as magic
               format strings in this implementation. *)
            Location.raise_errorf ~loc
              "ppx_custom_printf: scan char sets are not allowed in \
               custom format strings"
          | '{' ->
            if to_ + 1 <> i then
              Location.raise_errorf ~loc
                "ppx_custom_printf: unexpected format flags before \
                 %%{} specification in %S" s;
            begin match String.index_from s (to_ + 2) '}' with
            | None -> as_normal_format_string
            | Some i ->
              let l =
                sub (to_ + 2) i
                :: sub from to_
                :: acc
              in
              loop l (i + 1) (i + 1)
            end
          | _ ->
            loop acc from (i + 1) (* skip the conversion spec *)
    in
    loop [] 0 0

let processed_format_string ~exploded_format_string =
  let l =
    let rec loop i l =
      match l with
      | s1 :: _s2 :: l -> s1 :: Printf.sprintf "%%%d[.]" i :: loop (i + 1) l
      | [s1] -> [s1]
      | [] -> []
    in
    loop 0 exploded_format_string
  in
  String.concat l ~sep:""

let rec evens = function
  | [] | [_] as l -> l
  | x :: _ :: l -> x :: evens l

let odds = function
  | [] -> []
  | _ :: l -> evens l

(* Returns a pair of:

   - a format string, which is [s] where all custom format specifications have been
     replaced by ["%" ^ string_of_int index ^ "[.]"] where [index] is the number of
     the custom format specification, starting from 0. This string can be passed directly
     to [CamlinternalFormat.fmt_ebb_of_string]
   - an array of custom format specifications, in the order they appear in the original
     string
*)
let extract_custom_format_specifications ~loc s =
  let exploded_format_string = explode ~loc s in
  let processed = processed_format_string ~exploded_format_string in
  let custom_specs = Array.of_list (odds exploded_format_string) in
  (processed, custom_specs)
;;

let gen_symbol = gen_symbol ~prefix:"_custom_printf"

let is_space = function
  | ' ' | '\t' | '\n' | '\r' -> true
  | _ -> false
;;

let strip s =
  let a = ref 0 in
  let b = ref (String.length s - 1) in
  while !a <= !b && is_space s.[!a] do Int.incr a done;
  while !a <= !b && is_space s.[!b] do Int.decr b done;
  if !a > !b then "" else String.sub s ~pos:!a ~len:(!b - !a + 1)
;;

let string_to_expr ~loc s =
  let sexp_converter_opt =
    match String.lsplit2 s ~on:':' with
    | None -> None
    | Some ("sexp", colon_suffix) ->
      Some ([%expr  Ppx_sexp_conv_lib.Sexp.to_string_hum ], colon_suffix)
    | Some (colon_prefix, colon_suffix) ->
      match String.chop_prefix colon_prefix ~prefix:"sexp#" with
      | None -> None
      | Some hash_suffix ->
        Some (pexp_ident ~loc
                (Located.mk ~loc
                   (Longident.parse ("Ppx_sexp_conv_lib.Sexp.to_string_" ^ hash_suffix))),
              colon_suffix)
  in
  match sexp_converter_opt with
  | Some (sexp_converter, unparsed_type) ->
    let lexbuf = Lexing.from_string unparsed_type in
    (* ~loc is the position of the string, not the position of the %{bla} group we're
       looking at. The format strings don't contain location information, so we can't
       actually find the proper positions. *)
    lexbuf.lex_abs_pos <- loc.loc_start.pos_cnum;
    lexbuf.lex_curr_p <- loc.loc_start;
    let ty = Parse.core_type lexbuf in
    let e = Ppx_sexp_conv_expander.Sexp_of.core_type ty in
    let arg = gen_symbol () in
    pexp_fun ~loc Nolabel None (pvar ~loc arg)
      (eapply ~loc sexp_converter [eapply ~loc e [evar ~loc arg]])
  | None ->
    let fail loc =
      Location.raise_errorf ~loc
        "ppx_custom_printf: string %S should be of the form <Module>, \
         <Module>.<identifier>, <Module>#identifier, sexp:<type>, or sexp#mach:<type>"
        s
    in
    let s, has_hash_suffix, to_string =
      match String.lsplit2 s ~on:'#' with
      | None -> s, false, "to_string"
      | Some (s, hash_suffix) -> s, true, "to_string_" ^ hash_suffix
    in
    let to_string_id : Longident.t =
      let s = strip s in
      match s with
      | "" -> Lident to_string
      | _ ->
        match Longident.parse s with
        | Lident n | Ldot (_, n) as id ->
          if String.(<>) n "" && Char.equal (Char.uppercase n.[0]) n.[0] then
            Longident.Ldot (id, to_string)
          else if not has_hash_suffix then
            id
          else
            fail loc
        | _ -> fail loc
    in
    let func = pexp_ident ~loc (Located.mk ~loc to_string_id) in
    (* Eta-expand as the to_string function might take optional arguments *)
    let arg = gen_symbol () in
    pexp_fun ~loc Nolabel None (pvar ~loc arg) (eapply ~loc func [evar ~loc arg])

class lifter ~loc ~custom_specs = object(self)
  inherit [expression] Format_lifter.lift as super
  inherit Ppxlib_metaquot_lifters.expression_lifters loc

  method! fmt
    : type f0 f1 f2 f3 f4 f5. (f0 -> expression)
      -> (f1 -> expression)
      -> (f2 -> expression)
      -> (f3 -> expression)
      -> (f4 -> expression)
      -> (f5 -> expression)
      -> (f0, f1, f2, f3, f4, f5) CamlinternalFormatBasics.fmt
      -> expression
    = fun f0 f1 f2 f3 f4 f5 fmt ->
      let open CamlinternalFormatBasics in
      match fmt with
      (* Recognize the special form "%index[...whatever...]" *)
      | Scan_char_set (Some idx, _, fmt)
        (* [custom_specs] is empty if [explode] couldn't parse the string. In this case we
           can have some scar char sets left. *)
        when idx >= 0 && idx < Array.length custom_specs ->
        let rest =
          self#fmt (fun _ -> assert false) f1 f2 f3 f4 f5
            fmt
        in
        let func = string_to_expr ~loc custom_specs.(idx) in
        [%expr
          Custom(Custom_succ
                   Custom_zero,
                 (fun () -> [%e func]),
                 [%e rest])
        ]
      | _ ->
        super#fmt f0 f1 f2 f3 f4 f5 fmt
end

let expand_format_string ~loc fmt_string =
  let processed_fmt_string, custom_specs =
    extract_custom_format_specifications ~loc fmt_string
  in
  let (CamlinternalFormat.Fmt_EBB fmt) =
    try
      CamlinternalFormat.fmt_ebb_of_string processed_fmt_string
    with e ->
      Location.raise_errorf ~loc "%s"
        (match e with
         (* [fmt_ebb_of_string] normally raises [Failure] on invalid input *)
         | Failure msg -> msg
         | e -> Exn.to_string e)
  in
  let lifter = new lifter ~loc ~custom_specs in
  let format6 = CamlinternalFormatBasics.Format (fmt, fmt_string) in
  let phantom _ = assert false in
  let e =
    lifter#format6
      phantom phantom phantom phantom phantom phantom format6
  in
  [%expr ([%e e] : (_, _, _, _, _, _) CamlinternalFormatBasics.format6)]

let expand e =
  match e.pexp_desc with
  | Pexp_apply ({ pexp_attributes = ident_attrs; _ },
                [ (Nolabel, { pexp_desc = Pexp_constant (Pconst_string (str, _))
                            ; pexp_loc = loc; pexp_loc_stack = _
                            ; pexp_attributes = str_attrs }) ]) ->
    assert_no_attributes ident_attrs;
    assert_no_attributes str_attrs;
    let e' = expand_format_string ~loc str in
    Some { e' with pexp_attributes = Merlin_helpers.hide_attribute :: e.pexp_attributes }
  | _ -> None
;;

let () =
  Driver.register_transformation "custom_printf"
    ~rules:[ Context_free.Rule.special_function "!" expand ]
;;
OCaml

Innovation. Community. Security.