package ppx_custom_printf

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file ppx_custom_printf.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
open Base
open Ppxlib
open Ast_builder.Default

(* returns the index of the conversion spec (unless the end of string is reached) *)
let rec skip_over_format_flags fmt i =
  if i >= String.length fmt
  then `Eoi
  else (
    match fmt.[i] with
    | '*' | '#' | '-' | ' ' | '+' | '_' | '0' .. '9' | '.' ->
      skip_over_format_flags fmt (i + 1)
    | _ -> `Ok i)
;;

(* doesn't check to make sure the format string is well-formed *)
(* Formats with subformats are skipped for the following reasons:

   One is that they are hard to understand and not often used.

   Another is that subformats like "%(%{Module}%)" won't work, since it
   is impossible to produce a format of type [(Module.t -> 'a,...) format].
*)
let has_subformats (fmt : string) =
  let lim = String.length fmt - 1 in
  let rec loop i =
    if i > lim
    then false
    else if Char.equal fmt.[i] '%'
    then (
      match skip_over_format_flags fmt (i + 1) with
      | `Eoi -> false
      | `Ok i ->
        (match fmt.[i] with
         | '(' | ')' | '}' -> true
         | _ -> loop (i + 1)))
    else loop (i + 1)
  in
  loop 0
;;

(* returns a list of strings where even indexed elements are parts of the format string
   that the preprocessor won't touch and odd indexed elements are the contents of %{...}
   specifications. *)
let explode ~loc (s : string) =
  let len = String.length s in
  (* for cases where we can't parse the string with custom format specifiers, consider
     the string as a regular format string *)
  let as_normal_format_string = [ s ] in
  if has_subformats s
  then as_normal_format_string
  else (
    let sub from to_ = String.sub s ~pos:from ~len:(to_ - from) in
    let rec loop acc from to_ =
      assert (List.length acc % 2 = 0);
      if to_ >= len
      then List.rev (if from >= len then acc else sub from len :: acc)
      else if Char.( <> ) s.[to_] '%'
      then loop acc from (to_ + 1)
      else (
        match skip_over_format_flags s (to_ + 1) with
        | `Eoi -> as_normal_format_string
        | `Ok i ->
          (match s.[i] with
           | '[' ->
             (* Scan char sets are not allowed by printf-like functions. So we might as
                well disallow them at compile-time so that we can reuse them as magic
                format strings in this implementation. *)
             Location.raise_errorf
               ~loc
               "ppx_custom_printf: scan char sets are not allowed in custom format \
                strings"
           | '{' ->
             if to_ + 1 <> i
             then
               Location.raise_errorf
                 ~loc
                 "ppx_custom_printf: unexpected format flags before %%{} specification \
                  in %S"
                 s;
             (match String.index_from s (to_ + 2) '}' with
              | None -> as_normal_format_string
              | Some i ->
                let l = sub (to_ + 2) i :: sub from to_ :: acc in
                loop l (i + 1) (i + 1))
           | _ -> loop acc from (i + 1)))
      (* skip the conversion spec *)
    in
    loop [] 0 0)
;;

let processed_format_string ~exploded_format_string =
  let l =
    let rec loop i l =
      match l with
      | s1 :: _s2 :: l -> s1 :: Printf.sprintf "%%%d[.]" i :: loop (i + 1) l
      | [ s1 ] -> [ s1 ]
      | [] -> []
    in
    loop 0 exploded_format_string
  in
  String.concat l ~sep:""
;;

let rec evens = function
  | ([] | [ _ ]) as l -> l
  | x :: _ :: l -> x :: evens l
;;

let odds = function
  | [] -> []
  | _ :: l -> evens l
;;

(* Returns a pair of:

   - a format string, which is [s] where all custom format specifications have been
     replaced by ["%" ^ string_of_int index ^ "[.]"] where [index] is the number of
     the custom format specification, starting from 0. This string can be passed directly
     to [CamlinternalFormat.fmt_ebb_of_string]
   - an array of custom format specifications, in the order they appear in the original
     string
*)
let extract_custom_format_specifications ~loc s =
  let exploded_format_string = explode ~loc s in
  let processed = processed_format_string ~exploded_format_string in
  let custom_specs = Array.of_list (odds exploded_format_string) in
  processed, custom_specs
;;

let gen_symbol = gen_symbol ~prefix:"_custom_printf"

let is_space = function
  | ' ' | '\t' | '\n' | '\r' -> true
  | _ -> false
;;

let strip s =
  let a = ref 0 in
  let b = ref (String.length s - 1) in
  while !a <= !b && is_space s.[!a] do
    Int.incr a
  done;
  while !a <= !b && is_space s.[!b] do
    Int.decr b
  done;
  if !a > !b then "" else String.sub s ~pos:!a ~len:(!b - !a + 1)
;;

let string_to_expr ~loc s =
  let sexp_converter_opt =
    match String.lsplit2 s ~on:':' with
    | None -> None
    | Some ("sexp", colon_suffix) ->
      Some ([%expr Ppx_sexp_conv_lib.Sexp.to_string_hum], colon_suffix)
    | Some (colon_prefix, colon_suffix) ->
      (match String.chop_prefix colon_prefix ~prefix:"sexp#" with
       | None -> None
       | Some hash_suffix ->
         Some
           ( pexp_ident
               ~loc
               (Located.mk
                  ~loc
                  (Longident.parse ("Ppx_sexp_conv_lib.Sexp.to_string_" ^ hash_suffix)))
           , colon_suffix ))
  in
  match sexp_converter_opt with
  | Some (sexp_converter, unparsed_type) ->
    let lexbuf = Lexing.from_string unparsed_type in
    (* ~loc is the position of the string, not the position of the %{bla} group we're
       looking at. The format strings don't contain location information, so we can't
       actually find the proper positions. *)
    lexbuf.lex_abs_pos <- loc.loc_start.pos_cnum;
    lexbuf.lex_curr_p <- loc.loc_start;
    let ty = Parse.core_type lexbuf in
    let e = Ppx_sexp_conv_expander.Sexp_of.core_type ty in
    let arg = gen_symbol () in
    pexp_fun
      ~loc
      Nolabel
      None
      (pvar ~loc arg)
      (eapply ~loc sexp_converter [ eapply ~loc e [ evar ~loc arg ] ])
  | None ->
    let fail loc =
      Location.raise_errorf
        ~loc
        "ppx_custom_printf: string %S should be of the form <Module>, \
         <Module>.<identifier>, <Module>#identifier, sexp:<type>, or sexp#mach:<type>"
        s
    in
    let s, has_hash_suffix, to_string =
      match String.lsplit2 s ~on:'#' with
      | None -> s, false, "to_string"
      | Some (s, hash_suffix) -> s, true, "to_string_" ^ hash_suffix
    in
    let to_string_id : Longident.t =
      let s = strip s in
      match s with
      | "" -> Lident to_string
      | _ ->
        (match Longident.parse s with
         | (Lident n | Ldot (_, n)) as id ->
           if String.( <> ) n "" && Char.equal (Char.uppercase n.[0]) n.[0]
           then Longident.Ldot (id, to_string)
           else if not has_hash_suffix
           then id
           else fail loc
         | _ -> fail loc)
    in
    let func = pexp_ident ~loc (Located.mk ~loc to_string_id) in
    (* Eta-expand as the to_string function might take optional arguments *)
    let arg = gen_symbol () in
    pexp_fun ~loc Nolabel None (pvar ~loc arg) (eapply ~loc func [ evar ~loc arg ])
;;

class lifter ~loc ~custom_specs =
  object (self)
    inherit [expression] Format_lifter.lift as super
    inherit Ppxlib_metaquot_lifters.expression_lifters loc

    method! fmt
      : type f0
      f1
      f2
      f3
      f4
      f5.  (f0 -> expression)
          -> (f1 -> expression)
          -> (f2 -> expression)
          -> (f3 -> expression)
          -> (f4 -> expression)
          -> (f5 -> expression)
          -> (f0, f1, f2, f3, f4, f5) CamlinternalFormatBasics.fmt
          -> expression =
      fun f0 f1 f2 f3 f4 f5 fmt ->
        let open CamlinternalFormatBasics in
        match fmt with
        (* Recognize the special form "%index[...whatever...]" *)
        | Scan_char_set (Some idx, _, fmt)
        (* [custom_specs] is empty if [explode] couldn't parse the string. In this case we
           can have some scar char sets left. *)
          when idx >= 0 && idx < Array.length custom_specs ->
          let rest = self#fmt (fun _ -> assert false) f1 f2 f3 f4 f5 fmt in
          let func = string_to_expr ~loc custom_specs.(idx) in
          [%expr Custom (Custom_succ Custom_zero, (fun () -> [%e func]), [%e rest])]
        | _ -> super#fmt f0 f1 f2 f3 f4 f5 fmt
  end

let expand_format_string ~loc fmt_string =
  let processed_fmt_string, custom_specs =
    extract_custom_format_specifications ~loc fmt_string
  in
  let (CamlinternalFormat.Fmt_EBB fmt) =
    try CamlinternalFormat.fmt_ebb_of_string processed_fmt_string with
    | e ->
      Location.raise_errorf
        ~loc
        "%s"
        (match e with
         (* [fmt_ebb_of_string] normally raises [Failure] on invalid input *)
         | Failure msg -> msg
         | e -> Exn.to_string e)
  in
  let lifter = new lifter ~loc ~custom_specs in
  let format6 = CamlinternalFormatBasics.Format (fmt, fmt_string) in
  let phantom _ = assert false in
  let e = lifter#format6 phantom phantom phantom phantom phantom phantom format6 in
  [%expr ([%e e] : (_, _, _, _, _, _) CamlinternalFormatBasics.format6)]
;;

let expand e =
  match e.pexp_desc with
  | Pexp_apply
      ( { pexp_attributes = ident_attrs; _ }
      , [ ( Nolabel
          , { pexp_desc = Pexp_constant (Pconst_string (str, _, _))
            ; pexp_loc = loc
            ; pexp_loc_stack = _
            ; pexp_attributes = str_attrs
            } )
        ] ) ->
    assert_no_attributes ident_attrs;
    assert_no_attributes str_attrs;
    let e' = expand_format_string ~loc str in
    Some { e' with pexp_attributes = Merlin_helpers.hide_attribute :: e.pexp_attributes }
  | _ -> None
;;

let () =
  Driver.register_transformation
    "custom_printf"
    ~rules:[ Context_free.Rule.special_function "!" expand ]
;;
OCaml

Innovation. Community. Security.