package mdx

  1. Overview
  2. Docs

Source file mli_parser.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
module Code_block = struct
  type metadata = { language_tag : string; labels : string option }

  type t = {
    metadata : metadata option;
    content : Location.t; (* Location of the content *)
    code_block : Location.t; (* Location of the enclosing code block *)
  }
end

(* Parse and extract code block metadata from an odoc formatted docstring.

   Code blocks are the only thing we're interested in. This function parses
   the given text and extracts the metadata and enough location information
   from the code blocks be able to String.sub them out of the original text.

   [location] is the location of this docstring within the original file
   (ie, the location of the contents of the documentation comment). This is
   required so we can splice out the code blocks from the original file.

   The results are prepended in reverse order onto [acc]. *)
let extract_code_block_info acc ~(location : Lexing.position) ~docstring =
  let module O = Odoc_parser in
  let parsed = O.parse_comment ~location ~text:docstring in

  (* If odoc-parser produced any warnings, we raise them as errors here *)
  List.iter
    (fun error -> failwith (O.Warning.to_string error))
    (O.warnings parsed);

  (* Extract the useful info from what odoc has given us.

     Note, we don't use the contents of the code block that odoc has handed us
     as that has been stripped and we need all the relevant whitespace.
     Fortunately the location info give us enough info to be able to extract
     the code from the original text, whitespace and all.
  *)
  let handle_code_block : O.Loc.span -> _ -> Code_block.t =
    let convert_loc (sp : O.Loc.span) =
      Location.
        {
          loc_start = O.position_of_point parsed sp.start;
          loc_end = O.position_of_point parsed sp.end_;
          loc_ghost = false;
        }
    in
    fun location (metadata, { O.Loc.location = span; _ }) ->
      let metadata =
        Option.map
          (fun (lang, labels) ->
            let language_tag = O.Loc.value lang in
            let labels = Option.map O.Loc.value labels in
            Code_block.{ language_tag; labels })
          metadata
      in
      let content = convert_loc span in
      let code_block = convert_loc location in
      { metadata; content; code_block }
  in

  (* Fold over the results from odoc-parser, recurse where necessary
     and extract the code block metadata *)
  let rec fold_fn acc (elt : O.Ast.block_element O.Loc.with_location) =
    match elt with
    | { O.Loc.value = `Code_block c; location } ->
        handle_code_block location c :: acc
    | { O.Loc.value = `List (_, _, lists); _ } ->
        List.fold_left (List.fold_left fold_fn) acc (lists :> O.Ast.t list)
    | { O.Loc.value = `Tag tag; _ } -> (
        match tag with
        | `Deprecated blocks
        | `Param (_, blocks)
        | `Raise (_, blocks)
        | `Return blocks
        | `See (_, _, blocks)
        | `Before (_, blocks) ->
            List.fold_left fold_fn acc (blocks :> O.Ast.t)
        | _ -> acc)
    | _ -> acc
  in

  List.fold_left fold_fn acc (O.ast parsed)

(* This function handles string containing ocaml code. It parses it as ocaml
   via compiler-libs, then for each odoc-formatted comment it then parses
   that via odoc-parser. The end result is a list of metadata about the code
   blocks within the comments. The result is given as an in-order list of
   [Code_block.t] values. *)
let docstring_code_blocks str =
  let initial_handle_docstrings = !Lexer.handle_docstrings in
  Fun.protect
    ~finally:(fun () -> Lexer.handle_docstrings := initial_handle_docstrings)
    (fun () ->
      Lexer.handle_docstrings := true;
      Lexer.init ();
      let lexbuf = Lexing.from_string str in
      let rec loop list =
        match Lexer.token_with_comments lexbuf with
        | Parser.EOF -> list
        | Parser.DOCSTRING docstring ->
            let body = Docstrings.docstring_body docstring in
            let loc = Docstrings.docstring_loc docstring in

            (* odoc-parser adjusts for the initial [** *)
            let adjustment = 3 (* String.length "(**" *) in

            let location =
              {
                loc.loc_start with
                pos_cnum = loc.loc_start.pos_cnum + adjustment;
              }
            in
            loop (extract_code_block_info list ~location ~docstring:body)
        | _ -> loop list
      in
      loop [] |> List.rev)

(* Given code block metadata and the original file, this function splices the
   contents of the code block from the original text and creates an Mdx
   Block.t, or reports the error (e.g., from invalid tags) *)
let make_block code_block file_contents =
  let handle_header = function
    | Some Code_block.{ language_tag; labels } ->
        let open Util.Result.Infix in
        let header = Block.Header.of_string language_tag in
        let* labels =
          match labels with
          | None -> Ok []
          | Some labels -> (
              match Label.of_string (String.trim labels) with
              | Ok labels -> Ok labels
              | Error msgs ->
                  Error (List.hd msgs) (* TODO: Report precise location *))
        in
        let language_label = Label.Language_tag language_tag in
        Ok (header, language_label :: labels)
    | None ->
        (* If not specified, blocks are run as ocaml blocks *)
        Ok (Some OCaml, [])
  in
  match handle_header code_block.Code_block.metadata with
  | Error _ as e -> e
  | Ok (header, labels) ->
      let slice (loc : Location.t) =
        let start = loc.loc_start.pos_cnum in
        let len = loc.loc_end.pos_cnum - start in
        String.sub file_contents start len
      in
      let contents = slice code_block.content |> String.split_on_char '\n' in
      Block.mk ~loc:code_block.code_block ~section:None ~labels ~header
        ~contents ~legacy_labels:false ~errors:[]

(* Given the locations of the code blocks within [file_contents], then slice it up into
   [Text] and [Block] parts by using the starts and ends of those blocks as
   boundaries. *)
let extract_blocks code_blocks file_contents =
  let cursor, tokens =
    List.fold_left
      (fun (cursor, code_blocks) (code_block : Code_block.t) ->
        let pre_text =
          Document.Text
            (String.sub file_contents cursor
               (code_block.code_block.loc_start.pos_cnum - cursor))
        in
        let block =
          match make_block code_block file_contents with
          | Ok block -> Document.Block block
          | Error (`Msg msg) -> Fmt.failwith "Error creating block: %s" msg
        in
        (* append them in reverse order, since this is a fold_left *)
        let code_blocks = block :: pre_text :: code_blocks in
        (code_block.code_block.loc_end.pos_cnum, code_blocks))
      (0, []) code_blocks
  in
  let tokens = List.rev tokens in
  if cursor < String.length file_contents then
    let remainder =
      String.sub file_contents cursor (String.length file_contents - cursor)
    in
    if not (String.equal remainder "") then tokens @ [ Text remainder ]
    else tokens
  else tokens

let parse_mli file_contents =
  try
    let code_blocks = docstring_code_blocks file_contents in
    Ok (extract_blocks code_blocks file_contents)
  with exn -> Error [ `Msg (Printexc.to_string exn) ]

let parse_mld ?(filename = "_none_") file_contents =
  let location =
    Lexing.{ pos_bol = 0; pos_lnum = 1; pos_cnum = 0; pos_fname = filename }
  in
  let code_blocks =
    extract_code_block_info [] ~location ~docstring:file_contents |> List.rev
  in
  Ok (extract_blocks code_blocks file_contents)
OCaml

Innovation. Community. Security.