package higlo

  1. Overview
  2. Docs

Source file ocaml.ml

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
(*********************************************************************************)
(*                Higlo                                                          *)
(*                                                                               *)
(*    Copyright (C) 2014-2021 Institut National de Recherche en Informatique     *)
(*    et en Automatique. All rights reserved.                                    *)
(*                                                                               *)
(*    This program is free software; you can redistribute it and/or modify       *)
(*    it under the terms of the GNU Lesser General Public License version        *)
(*    3 as published by the Free Software Foundation.                            *)
(*                                                                               *)
(*    This program is distributed in the hope that it will be useful,            *)
(*    but WITHOUT ANY WARRANTY; without even the implied warranty of             *)
(*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *)
(*    GNU Library General Public License for more details.                       *)
(*                                                                               *)
(*    You should have received a copy of the GNU Lesser General Public           *)
(*    License along with this program; if not, write to the Free Software        *)
(*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA                   *)
(*    02111-1307  USA                                                            *)
(*                                                                               *)
(*    Contact: Maxence.Guesdon@inria.fr                                          *)
(*                                                                               *)
(*                                                                               *)
(*********************************************************************************)

open Lang

let lexeme = Sedlexing.Utf8.lexeme;;

let digit = [%sedlex.regexp? '0'..'9'|'_']
let hex = [%sedlex.regexp? digit | 'A'..'F' | 'a'..'f']
let integer = [%sedlex.regexp? Plus(digit)]
let decimal = [%sedlex.regexp? Star('0'..'9'), '.', Plus('0'..'9')]
let exponent = [%sedlex.regexp? ('e'|'E'), Opt('+'|'-'), Plus('0'..'9')]
let double = [%sedlex.regexp? (Plus('0'..'9'), '.', Star('0'..'9'), exponent) | ('.', Plus('0'..'9'), exponent) | (Plus('0'..'9'), exponent)]
let integer_positive = [%sedlex.regexp? '+',integer]
let decimal_positive = [%sedlex.regexp? '+',decimal]
let double_positive = [%sedlex.regexp? '+',double]
let integer_negative = [%sedlex.regexp? '-',integer]
let decimal_negative = [%sedlex.regexp? '-',decimal]
let double_negative = [%sedlex.regexp? '-',double]

let binary = [%sedlex.regexp? "0b",Plus('0'|'1')]
let octal = [%sedlex.regexp? "0o",Plus('0'..'7')]
let hexa = [%sedlex.regexp? "0x",Plus(hex)]

let numeric = [%sedlex.regexp? integer_positive | decimal_positive | double_positive | integer_negative | decimal_negative | double_negative | integer | decimal | double | binary | octal | hexa]

let boolean = [%sedlex.regexp? "true" | "false"]
let echar = [%sedlex.regexp? 't' | 'b' | 'n' | 'r' | 'f' | '\\' | '"' | '\'']

let escaped_char = [%sedlex.regexp? '\\', echar]
let string = [%sedlex.regexp? '"', Star(Compl(0x22)| escaped_char),'"']
let char = [%sedlex.regexp? "'", (Compl(0x27)| escaped_char), "'"]

let space = [%sedlex.regexp? Plus(' ' | '\n' | '\t' | '\r') ]

let capchar = [%sedlex.regexp? 'A'..'Z']
let lowchar = [%sedlex.regexp? 'a'..'z']
let idchar = [%sedlex.regexp? lowchar | capchar | '_' | digit]

let modname = [%sedlex.regexp? capchar, Star(idchar)]

let comment = [%sedlex.regexp? "(*", (Star(Compl(0x2A) | ('*',Compl(')') )) ), "*)"]

let id = [%sedlex.regexp? ('_'|lowchar), Star(idchar)]
let cap_id = [%sedlex.regexp? capchar, id]
let attr_id = [%sedlex.regexp? (id | cap_id), Star('.', Plus(id | cap_id))]

let percent_id = [%sedlex.regexp? '%', attr_id]

let decl_kw = [%sedlex.regexp? "and" |"class" |"constraint" |"exception" |"external" |"let" |"fun" |"function" |"functor" |"in" |"include" |"inherit" |"initializer" |"method" |"module" |"mutable" | "nonrec" | "of" |"open" |"private" |"rec" |"type" |"val" |"virtual"]

(* split keyword list because of sedlex bug:
  https://github.com/ocaml-community/sedlex/issues/97 *)
let expr_kw = [%sedlex.regexp? "asr" |"do" |"else" |"for" |"if" |"while" |"as" |"assert" |"begin" |"do" |"done" |"downto"]
let expr_kw2 = [%sedlex.regexp? "else" |"end" |"for" |"if" |"land" |"lazy" |"lor" |"lsl" |"lsr" |"lxor" |"match" |"mod"]
let expr_kw3 = [%sedlex.regexp? "new" |"object" |"or" | "ref" |"sig" |"struct" |"then" |"to"|"try" |"when" |"while" |"with" |"#" ]

let type_kw = [%sedlex.regexp? "bool" | "int" |"string" |"list" |"array" |"float" |"char" |"unit"]

let lwt_kw = [%sedlex.regexp? "lwt" | "raise_lwt" | ">>=" | ">>" | "=<<" | "for_lwt" | "assert_lwt" | "match_lwt" | "while_lwt"]
let label = [%sedlex.regexp? '~', id]

let directive = [%sedlex.regexp? Opt('\n',Opt('\r')), '#', lowchar, Star(idchar)]

let main lexbuf =
  match%sedlex lexbuf with
| eof -> []
| space -> [Text (lexeme lexbuf)]
| numeric -> [Numeric (lexeme lexbuf)]
| boolean -> [Constant (lexeme lexbuf)]
| directive ->
    begin
      let s = lexeme lexbuf in
      match String.get s 0 with
        '\n' -> [Directive s]
      | _ ->
         match Sedlexing.lexeme_start lexbuf with
           0 -> [Directive s]
         | _ ->
           [Keyword (1, "#") ; Id (String.sub s 1 (String.length s - 1))]
    end
| decl_kw -> [Keyword (0, lexeme lexbuf)]
| expr_kw -> [Keyword (1, lexeme lexbuf)]
| expr_kw2 -> [Keyword (1, lexeme lexbuf)]
| expr_kw3 -> [Keyword (1, lexeme lexbuf)]
| modname -> [Keyword (2, lexeme lexbuf)]
| type_kw -> [Keyword (3, lexeme lexbuf)]
| percent_id ->
    begin
      let lexeme = lexeme lexbuf in
      [ Keyword (5, lexeme) ]
    end
| lwt_kw -> [Keyword (10, lexeme lexbuf)]
| label -> [Keyword (4, lexeme lexbuf)]
| id -> [Id (lexeme lexbuf)]
| string -> [String (lexeme lexbuf)]
| char -> [String (lexeme lexbuf)]
| comment -> [Bcomment (lexeme lexbuf)]
| any -> [Text (lexeme lexbuf)]
| _ -> failwith "Invalid state"
;;

let () = Lang.register_lang "ocaml" main;;
OCaml

Innovation. Community. Security.