package mdx

  1. Overview
  2. Docs

Source file mli_parser.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
module Code_block = struct
  type metadata = {
    language_tag : string Odoc_parser.Loc.with_location;
    labels : string Odoc_parser.Loc.with_location option;
  }

  type t = {
    location : Odoc_parser.Loc.span;
    metadata : metadata option;
    contents : string;
  }
end

(* odoc-parser adjusts for the initial [** *)
let docstring_start_adjustment = String.length "(**"

let drop_last lst =
  match List.rev lst with
  | [] -> None
  | last :: rev_tl -> Some (List.rev rev_tl, last)

(* drop_first_and_last [1; 2; 3; 4] = Some (1, Some ([2; 3], 4)). *)
let drop_first_and_last = function
  | [] -> None
  | first :: tl -> Some (first, drop_last tl)

let slice lines ~(start : Odoc_parser.Loc.point) ~(end_ : Odoc_parser.Loc.point)
    =
  let lines_to_include =
    Util.Array.slice lines ~from:(start.line - 1) ~to_:(end_.line - 1)
    |> Array.to_list
  in
  match drop_first_and_last lines_to_include with
  | None -> ""
  | Some (line, None) ->
      String.sub line start.column (end_.column - start.column)
  (* Imagine we were slicing the file from (Line 2, Column 3) to (Line 6, Column 7):

       0123456789
       1 ----------
       2 ---[---
       3 ---------
       4 --
       5 ----------
       6 -------]--
       7 ----------
       8 ----------

       The case below handles this multiline case, concatenating the included substrings
       from lines 2-6 ([lines_to_include]). *)
  | Some (first_line, Some (stripped, last_line)) ->
      let first_line =
        String.sub first_line start.column
          (String.length first_line - start.column)
      in
      let last_line = String.sub last_line 0 end_.column in
      String.concat "\n" ([ first_line ] @ stripped @ [ last_line ])

let is_newline c = c = '\n'

let find_nth_line s =
  let max_index = String.length s - 1 in
  let indexes_of_newlines =
    s |> String.to_seqi
    |> Seq.filter_map (fun (i, c) ->
           match is_newline c with true -> Some i | false -> None)
  in
  let indexes_of_line_starts =
    indexes_of_newlines
    |> Seq.filter_map (fun i ->
           match i < max_index with true -> Some (i + 1) | false -> None)
  in
  (* first line always starts at index zero, even if there is no preceeding newline *)
  let indexes = 0 :: List.of_seq indexes_of_line_starts in
  fun nth ->
    (* index starts at zero but lines go from 1 *)
    List.nth_opt indexes (nth - 1)

let point_to_index offset_of_line_start (point : Odoc_parser.Loc.point) =
  let offset = offset_of_line_start point.line + point.column in
  (* on line 1 odoc-parser adjusts by the start of the docstring, undo *)
  match point.line with 1 -> offset - docstring_start_adjustment | _ -> offset

let initial_line_number = 1

let dislocate_point ~(location : Lexing.position)
    (point : Odoc_parser.Loc.point) =
  { point with line = point.line - location.pos_lnum + initial_line_number }

let slice_location ~(location : Lexing.position) offset_of_line_start
    (span : Odoc_parser.Loc.span) s =
  let start = dislocate_point ~location span.start in
  let end_ = dislocate_point ~location span.end_ in
  let start_index = point_to_index offset_of_line_start start in
  let end_index = point_to_index offset_of_line_start end_ in
  let len = end_index - start_index in
  String.sub s start_index len

let extract_code_blocks ~(location : Lexing.position) ~docstring =
  let offset_in_string = find_nth_line docstring in
  let offset_of_line_start nth =
    match offset_in_string nth with
    | None -> Fmt.failwith "Attempting to reach invalid line"
    | Some offset -> offset
  in
  let rec acc blocks =
    List.map
      (fun block ->
        match Odoc_parser.Loc.value block with
        | `Code_block (metadata, { Odoc_parser.Loc.value = _; location = span })
          ->
            let metadata =
              Option.map
                (fun (language_tag, labels) ->
                  Code_block.{ language_tag; labels })
                metadata
            in
            let contents =
              slice_location ~location offset_of_line_start span docstring
            in
            [ { Code_block.location = block.location; metadata; contents } ]
        | `List (_, _, lists) -> List.map acc lists |> List.concat
        | _ -> [])
      blocks
    |> List.concat
  in
  let parsed = Odoc_parser.parse_comment ~location ~text:docstring in
  List.iter
    (fun error -> failwith (Odoc_parser.Warning.to_string error))
    (Odoc_parser.warnings parsed);
  List.map
    (fun element ->
      match element with
      | { Odoc_parser.Loc.value = #Odoc_parser.Ast.nestable_block_element; _ }
        as e ->
          acc [ e ]
      | { value = `Tag tag; _ } -> (
          match tag with
          | `Deprecated blocks -> acc blocks
          | `Param (_, blocks) -> acc blocks
          | `Raise (_, blocks) -> acc blocks
          | `Return blocks -> acc blocks
          | `See (_, _, blocks) -> acc blocks
          | `Before (_, blocks) -> acc blocks
          | _ -> [])
      | { value = `Heading _; _ } -> [])
    (Odoc_parser.ast parsed)
  |> List.concat

let docstrings lexbuf =
  let rec loop list =
    match Lexer.token_with_comments lexbuf with
    | Parser.EOF -> list
    | Parser.DOCSTRING docstring ->
        let docstring =
          ( Docstrings.docstring_body docstring,
            Docstrings.docstring_loc docstring )
        in
        loop (docstring :: list)
    | _ -> loop list
  in
  loop [] |> List.rev

let convert_pos (p : Lexing.position) (pt : Odoc_parser.Loc.point) =
  { p with pos_lnum = pt.line; pos_cnum = pt.column }

let convert_loc (loc : Location.t) (sp : Odoc_parser.Loc.span) =
  let loc_start = convert_pos loc.loc_start sp.start in
  let loc_end = convert_pos loc.loc_end sp.end_ in
  { loc with loc_start; loc_end }

let docstring_code_blocks str =
  Lexer.handle_docstrings := true;
  Lexer.init ();
  List.map
    (fun (docstring, (cmt_loc : Location.t)) ->
      let location =
        {
          cmt_loc.loc_start with
          pos_cnum = cmt_loc.loc_start.pos_cnum + docstring_start_adjustment;
        }
      in
      let blocks = extract_code_blocks ~location ~docstring in
      List.map
        (fun (b : Code_block.t) -> (b, convert_loc cmt_loc b.location))
        blocks)
    (docstrings (Lexing.from_string str))
  |> List.concat

let make_block ~loc code_block =
  let handle_header = function
    | Some Code_block.{ language_tag; labels } ->
        let open Util.Result.Infix in
        let language_tag = Odoc_parser.Loc.value language_tag in
        let header = Block.Header.of_string language_tag in
        let* labels =
          match labels with
          | None -> Ok []
          | Some labels -> (
              let labels = Odoc_parser.Loc.value labels |> String.trim in
              match Label.of_string labels with
              | Ok labels -> Ok labels
              | Error msgs -> Error (List.hd msgs)
              (* TODO: Report precise location *))
        in
        let language_label = Label.Language_tag language_tag in
        Ok (header, language_label :: labels)
    | None ->
        (* If not specified, blocks are run as ocaml blocks *)
        Ok (Some OCaml, [])
  in
  match handle_header code_block.Code_block.metadata with
  | Error _ as e -> e
  | Ok (header, labels) ->
      let contents = String.split_on_char '\n' code_block.contents in
      Block.mk ~loc ~section:None ~labels ~header ~contents ~legacy_labels:false
        ~errors:[]

let parse_mli file_contents =
  (* Find the locations of the code blocks within [file_contents], then slice it up into
     [Text] and [Block] parts by using the starts and ends of those blocks as
     boundaries. *)
  let code_blocks = docstring_code_blocks file_contents in
  let cursor = { Odoc_parser.Loc.line = 1; column = 0 } in
  let lines = String.split_on_char '\n' file_contents |> Array.of_list in
  let cursor, tokens =
    List.fold_left
      (fun (cursor, code_blocks) ((code_block : Code_block.t), loc) ->
        let pre_text =
          Document.Text
            (slice lines ~start:cursor ~end_:code_block.location.start)
        in
        let block =
          match make_block ~loc code_block with
          | Ok block -> Document.Block block
          | Error (`Msg msg) -> Fmt.failwith "Error creating block: %s" msg
        in
        let cursor = code_block.location.end_ in
        (* append them in reverse order, since this is a fold_left *)
        let code_blocks = block :: pre_text :: code_blocks in
        (cursor, code_blocks))
      (cursor, []) code_blocks
  in
  let tokens = List.rev tokens in
  let eof =
    {
      Odoc_parser.Loc.line = Array.length lines;
      column = String.length lines.(Array.length lines - 1);
    }
  in
  let eof_is_beyond_location (loc : Odoc_parser.Loc.point) =
    eof.line > loc.line || (eof.line = loc.line && eof.column > loc.column)
  in
  if eof_is_beyond_location cursor then
    let remainder = slice lines ~start:cursor ~end_:eof in
    if not (String.equal remainder "") then tokens @ [ Text remainder ]
    else tokens
  else tokens

let parse_mli file_contents =
  try Ok (parse_mli file_contents)
  with exn -> Error [ `Msg (Printexc.to_string exn) ]
OCaml

Innovation. Community. Security.