package acgtk

  1. Overview
  2. Docs
Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source

Source file lexer.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
(* We need to compile the parser twice, once with menhir's table mode and once
   with menhir's code mode. The table mode is needed for the interactive mode of
   acg (because we need to manipulate the parsing flow for the auto-completion,
   using menhir's incremental API, only supported in table mode) and the code
   mode is needed for the script mode of acg (which uses menhir's monolithic
   API) because we need to know in which state an error occured, with menhir
   option --exn-carries-state, only supported in code mode. *)

type lex_token =
  | PartialToken of string * Errors.Lexing_l.t
  | Token of Parser.token * string

let rec eat_comment buf n acc =
  if n = 0 then lex_rec buf acc
  else
    match%sedlex buf with
    | "(*" -> eat_comment buf (n + 1) (acc ^ (Sedlexing.Utf8.lexeme buf))
    | "*)" -> eat_comment buf (n - 1) (acc ^ (Sedlexing.Utf8.lexeme buf))
    | eof -> PartialToken (acc, Errors.Lexing_l.UnterminatedComment)
    | Star (Sub (any, '*'))
    | '*' -> eat_comment buf n (acc ^ (Sedlexing.Utf8.lexeme buf))
    | _ -> assert false

and lex_rec buf acc =
  try
    match%sedlex buf with
    | ' ' | '\t' | '\n' | '\013' -> lex_rec buf (acc ^ (Sedlexing.Utf8.lexeme buf))
    | "(*" -> eat_comment buf 1 (acc ^ (Sedlexing.Utf8.lexeme buf))
    | "*)" -> let loc = Sedlexing.lexing_positions buf in
      Errors.(LexingErrors.emit Lexing_l.UnstartedComment ~loc)
    | '|' -> Token (Parser.PIPE, acc ^ (Sedlexing.Utf8.lexeme buf))
    | ';' -> Token (Parser.SEMI, acc ^ (Sedlexing.Utf8.lexeme buf))
    | ":=" -> Token (Parser.COLON_EQUAL, acc ^ (Sedlexing.Utf8.lexeme buf))
    | '=' -> Token (Parser.EQUAL, acc ^ (Sedlexing.Utf8.lexeme buf))
    | '<' -> Token (Parser.INF, acc ^ (Sedlexing.Utf8.lexeme buf))
    | '+' -> Token (Parser.PLUS, acc ^ (Sedlexing.Utf8.lexeme buf))
    | '-' -> Token (Parser.MINUS, acc ^ (Sedlexing.Utf8.lexeme buf))
    | ',' -> Token (Parser.COMMA, acc ^ (Sedlexing.Utf8.lexeme buf))
    | "let" -> Token (Parser.LET, acc ^ (Sedlexing.Utf8.lexeme buf))
    | '"', Star (Sub (any, '"')), '"' ->
      let t = Sedlexing.Utf8.lexeme buf in
      Token (Parser.DATA (String.sub t 1 (String.length t - 2)), acc ^ (Sedlexing.Utf8.lexeme buf))
    | Star ('0' .. '9') -> Token (Parser.INT (int_of_string (Sedlexing.Utf8.lexeme buf)), acc ^ (Sedlexing.Utf8.lexeme buf))
    | (id_continue | '.' | '/'), Star (id_continue | '-' | '.' | '/') ->
      Token (Parser.ID (Sedlexing.Utf8.lexeme buf), acc ^ (Sedlexing.Utf8.lexeme buf))
    | '"', Star (Sub (any, '"')), eof ->
      PartialToken (Sedlexing.Utf8.lexeme buf, (Errors.Lexing_l.Unclosed "\""))
    | eof -> Token (Parser.EOI, (acc ^ (Sedlexing.Utf8.lexeme buf)))
    | any -> let loc = Sedlexing.lexing_positions buf in
      Errors.(LexingErrors.emit (Lexing_l.BadChar (Sedlexing.Utf8.lexeme buf)) ~loc)
    | _ -> assert false
  with
  | Sedlexing.MalFormed ->
    let loc = Sedlexing.lexing_positions buf in
    Errors.(LexingErrors.emit Lexing_l.Malformed ~loc)

let lex buf =
  lex_rec buf ""
OCaml

Innovation. Community. Security.