package mdx

  1. Overview
  2. Docs

Source file mli_parser.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
open! Compat

module Code_block = struct
  type t = { location : Odoc_model.Location_.span; contents : string }
end

let drop_last lst =
  match List.rev lst with
  | [] -> None
  | last :: rev_tl -> Some (List.rev rev_tl, last)

(* drop_first_and_last [1; 2; 3; 4] = Some (1, Some ([2; 3], 4)). *)
let drop_first_and_last = function
  | [] -> None
  | first :: tl -> Some (first, drop_last tl)

let slice lines ~(start : Odoc_model.Location_.point)
    ~(end_ : Odoc_model.Location_.point) =
  let lines_to_include =
    Util.Array.slice lines ~from:(start.line - 1) ~to_:(end_.line - 1)
    |> Array.to_list
  in
  match drop_first_and_last lines_to_include with
  | None -> ""
  | Some (line, None) ->
      String.sub line start.column (end_.column - start.column)
  (* Imagine we were slicing the file from (Line 2, Column 3) to (Line 6, Column 7):

       0123456789
       1 ----------
       2 ---[---
       3 ---------
       4 --
       5 ----------
       6 -------]--
       7 ----------
       8 ----------

       The case below handles this multiline case, concatenating the included substrings
       from lines 2-6 ([lines_to_include]). *)
  | Some (first_line, Some (stripped, last_line)) ->
      let first_line =
        String.sub first_line start.column
          (String.length first_line - start.column)
      in
      let last_line = String.sub last_line 0 end_.column in
      String.concat "\n" ([ first_line ] @ stripped @ [ last_line ])

(* Imagine a docstring that is within a file with four characters # of indentation. (I'll
   use square brackets rather than parens to avoid escaping):

   ####[** foo
   ####
   ####bar
   ####
   ####baz *]
   ####val x : int
   ####val y : int

   According to odoc, the "b" in "bar" is at column 0 inside the docstring and at column 4
   within the broader file. That is correct. But it says the "f" in "foo" is at column 1
   inside the docstring and column 5 within the file. This isn't right.

   The problem is that it starts counting the inside-the-docstring column number from the
   end of "[**", but doesn't add those three characters to the within-the-file column
   number. Here, we make the adjustment.
*)
let account_for_docstring_open_token (location : Odoc_model.Location_.span) =
  let start_shift = 3 in
  let end_shift = if location.start.line = location.end_.line then 3 else 0 in
  {
    location with
    start = { location.start with column = location.start.column + start_shift };
    end_ = { location.end_ with column = location.end_.column + end_shift };
  }

let extract_code_blocks ~(location : Lexing.position) ~docstring =
  let rec acc blocks =
    List.map
      (fun block ->
        match Odoc_model.Location_.value block with
        | `Code_block contents ->
            let location =
              if location.pos_lnum = block.location.start.line then
                account_for_docstring_open_token block.location
              else block.location
            in
            [ { Code_block.location; contents } ]
        | `List (_, _, lists) -> List.map acc lists |> List.concat
        | _ -> [])
      blocks
    |> List.concat
  in
  let parsed = Odoc_parser.parse_comment_raw ~location ~text:docstring in
  List.iter
    (fun error -> failwith (Odoc_model.Error.to_string error))
    parsed.warnings;
  List.map
    (fun element ->
      match Odoc_model.Location_.value element with
      | #Odoc_parser.Ast.nestable_block_element as e ->
          acc
            [ { Odoc_model.Location_.location = element.location; value = e } ]
      | `Tag tag -> (
          match tag with
          | `Deprecated blocks -> acc blocks
          | `Param (_, blocks) -> acc blocks
          | `Raise (_, blocks) -> acc blocks
          | `Return blocks -> acc blocks
          | `See (_, _, blocks) -> acc blocks
          | `Before (_, blocks) -> acc blocks
          | _ -> [])
      | `Heading _ -> [])
    parsed.value
  |> List.concat

let docstrings lexbuf =
  let rec loop list =
    match Lexer.token_with_comments lexbuf with
    | Parser.EOF -> list
    | Parser.DOCSTRING docstring ->
        let docstring =
          ( Docstrings.docstring_body docstring,
            Docstrings.docstring_loc docstring )
        in
        loop (docstring :: list)
    | _ -> loop list
  in
  loop [] |> List.rev

let convert_pos (p : Lexing.position) (pt : Odoc_model.Location_.point) =
  { p with pos_lnum = pt.line; pos_cnum = pt.column }

let convert_loc (loc : Location.t) (sp : Odoc_model.Location_.span) =
  let loc_start = convert_pos loc.loc_start sp.start in
  let loc_end = convert_pos loc.loc_end sp.end_ in
  { loc with loc_start; loc_end }

let docstring_code_blocks str =
  Lexer.handle_docstrings := true;
  Lexer.init ();
  List.map
    (fun (docstring, (location : Location.t)) ->
      let blocks =
        extract_code_blocks ~location:location.loc_start ~docstring
      in
      List.map
        (fun (b : Code_block.t) -> (b, convert_loc location b.location))
        blocks)
    (docstrings (Lexing.from_string str))
  |> List.concat

let parse_mli file_contents =
  (* Find the locations of the code blocks within [file_contents], then slice it up into
     [Text] and [Block] parts by using the starts and ends of those blocks as
     boundaries. *)
  let code_blocks = docstring_code_blocks file_contents in
  let cursor = ref { Odoc_model.Location_.line = 1; column = 0 } in
  let lines = String.split_on_char '\n' file_contents |> Array.of_list in
  let tokens =
    List.map
      (fun ((code_block : Code_block.t), loc) ->
        let pre_text =
          Document.Text
            (slice lines ~start:!cursor ~end_:code_block.location.start)
        in
        let column = code_block.location.start.column in
        let contents = Compat.String.split_on_char '\n' code_block.contents in
        let block =
          match
            Block.mk ~loc ~section:None ~labels:[] ~header:(Some OCaml)
              ~contents ~legacy_labels:false ~errors:[]
          with
          | Ok block -> Document.Block block
          | Error _ -> failwith "Error creating block"
        in
        let hpad =
          if List.length contents = 1 then ""
          else Astring.String.v ~len:column (fun _ -> ' ')
        in
        cursor := code_block.location.end_;
        [ pre_text; Text "{["; block; Text (hpad ^ "]}") ])
      code_blocks
    |> List.concat
  in
  let eof =
    {
      Odoc_model.Location_.line = Array.length lines;
      column = String.length lines.(Array.length lines - 1);
    }
  in
  let eof_is_beyond_location (loc : Odoc_model.Location_.point) =
    eof.line > loc.line || (eof.line = loc.line && eof.column > loc.column)
  in
  if eof_is_beyond_location !cursor then
    let remainder = slice lines ~start:!cursor ~end_:eof in
    if not (Compat.String.equal remainder "") then tokens @ [ Text remainder ]
    else tokens
  else tokens

let parse_mli file_contents =
  try Result.Ok (parse_mli file_contents)
  with exn -> Util.Result.errorf "%s" (Printexc.to_string exn)
OCaml

Innovation. Community. Security.