Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
common.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297
(* This file is part of Markup.ml, released under the BSD 2-clause license. See doc/LICENSE for details, or visit https://github.com/aantron/markup.ml. *) (* Aliases for reducing the number of deprecation warings. *) module String = struct include String let lowercase = lowercase end module Char = struct include Char let lowercase = lowercase end type 'a cont = 'a -> unit type 'a cps = exn cont -> 'a cont -> unit type location = int * int let compare_locations (line, column) (line', column') = match line - line' with | 0 -> column - column' | order -> order type name = string * string let (|>) x f = f x let (@@) f x = f x let xml_ns = "http://www.w3.org/XML/1998/namespace" let xmlns_ns = "http://www.w3.org/2000/xmlns/" let xlink_ns = "http://www.w3.org/1999/xlink" let html_ns = "http://www.w3.org/1999/xhtml" let svg_ns = "http://www.w3.org/2000/svg" let mathml_ns = "http://www.w3.org/1998/Math/MathML" module Token_tag = struct type t = {name : string; attributes : (string * string) list; self_closing : bool} end type xml_declaration = {version : string; encoding : string option; standalone : bool option} type doctype = {doctype_name : string option; public_identifier : string option; system_identifier : string option; raw_text : string option; force_quirks : bool} type signal = [ `Start_element of name * (name * string) list | `End_element | `Text of string list | `Xml of xml_declaration | `Doctype of doctype | `PI of string * string | `Comment of string ] type content_signal = [ `Start_element of name * (name * string) list | `End_element | `Text of string list ] type general_token = [ `Xml of xml_declaration | `Doctype of doctype | `Start of Token_tag.t | `End of Token_tag.t | `Chars of string list | `Char of int | `PI of string * string | `Comment of string | `EOF ] let u_rep = Uchar.to_int Uutf.u_rep let add_utf_8 buffer c = Uutf.Buffer.add_utf_8 buffer (Uchar.unsafe_of_int c) let format_char = Printf.sprintf "U+%04X" (* Type constraints are necessary to avoid polymorphic comparison, which would greatly reduce performance: https://github.com/aantron/markup.ml/pull/15. *) let is_in_range (lower : int) (upper : int) c = c >= lower && c <= upper (* HTML 8.2.2.5. *) let is_control_character = function | 0x000B -> true | c when is_in_range 0x0001 0x0008 c -> true | c when is_in_range 0x000E 0x001F c -> true | c when is_in_range 0x007F 0x009F c -> true | _ -> false (* HTML 8.2.2.5. *) let is_non_character = function | c when is_in_range 0xFDD0 0xFDEF c -> true | c when (c land 0xFFFF = 0xFFFF) || (c land 0xFFFF = 0xFFFE) -> true | _ -> false let is_digit = is_in_range 0x0030 0x0039 let is_hex_digit = function | c when is_digit c -> true | c when is_in_range 0x0041 0x0046 c -> true | c when is_in_range 0x0061 0x0066 c -> true | _ -> false let is_scalar = function | c when (c >= 0x10FFFF) || ((c >= 0xD800) && (c <= 0xDFFF)) -> false | _ -> true let is_uppercase = is_in_range 0x0041 0x005A let is_lowercase = is_in_range 0x0061 0x007A let is_alphabetic = function | c when is_uppercase c -> true | c when is_lowercase c -> true | _ -> false let is_alphanumeric = function | c when is_alphabetic c -> true | c when is_digit c -> true | _ -> false let is_whitespace c = c = 0x0020 || c = 0x000A || c = 0x0009 || c = 0x000D let is_whitespace_only s = try s |> String.iter (fun c -> if is_whitespace (int_of_char c) then () else raise Exit); true with Exit -> false let to_lowercase = function | c when is_uppercase c -> c + 0x20 | c -> c let is_printable = is_in_range 0x0020 0x007E let char c = if is_printable c then begin let buffer = Buffer.create 4 in add_utf_8 buffer c; Buffer.contents buffer end else format_char c let is_valid_html_char c = not (is_control_character c || is_non_character c) let is_valid_xml_char c = is_in_range 0x0020 0xD7FF c || c = 0x0009 || c = 0x000A || c = 0x000D || is_in_range 0xE000 0xFFFD c || is_in_range 0x10000 0x10FFFF c let signal_to_string = function | `Comment s -> Printf.sprintf "<!--%s-->" s | `Doctype d -> let text = match d.doctype_name with | None -> begin match d.raw_text with | None -> "" | Some s -> " " ^ s end | Some name -> match d.public_identifier, d.system_identifier with | None, None -> name | Some p, None -> Printf.sprintf " %s PUBLIC \"%s\"" name p | None, Some s -> Printf.sprintf " %s SYSTEM \"%s\"" name s | Some p, Some s -> Printf.sprintf " %s PUBLIC \"%s\" \"%s\"" name p s in Printf.sprintf "<!DOCTYPE %s>" text | `Start_element (name, attributes) -> let name_to_string = function | "", local_name -> local_name | ns, local_name -> ns ^ ":" ^ local_name in let attributes = attributes |> List.map (fun (name, value) -> Printf.sprintf " %s=\"%s\"" (name_to_string name) value) |> String.concat "" in Printf.sprintf "<%s%s>" (name_to_string name) attributes | `End_element -> "</...>" | `Text ss -> String.concat "" ss | `Xml x -> let s = Printf.sprintf "<?xml version=\"%s\">" x.version in let s = match x.encoding with | None -> s | Some encoding -> Printf.sprintf "%s encoding=\"%s\"" s encoding in let s = match x.standalone with | None -> s | Some standalone -> Printf.sprintf "%s standalone=\"%s\"" s (if standalone then "yes" else "no") in s ^ "?>" | `PI (target, s) -> Printf.sprintf "<?%s %s?>" target s let token_to_string = function | `Xml x -> signal_to_string (`Xml x) | `Doctype d -> signal_to_string (`Doctype d) | `Start t -> let name = "", t.Token_tag.name in let attributes = t.Token_tag.attributes |> List.map (fun (n, v) -> ("", n), v) in let s = signal_to_string (`Start_element (name, attributes)) in if not t.Token_tag.self_closing then s else (String.sub s 0 (String.length s - 1)) ^ "/>" | `End t -> Printf.sprintf "</%s>" t.Token_tag.name | `Chars ss -> String.concat "" ss | `Char i -> char i | `PI v -> signal_to_string (`PI v) | `Comment s -> signal_to_string (`Comment s) | `EOF -> "EOF" let whitespace_chars = " \t\n\r" let whitespace_prefix_length s = let rec loop index = if index = String.length s then index else if String.contains whitespace_chars s.[index] then loop (index + 1) else index in loop 0 let whitespace_suffix_length s = let rec loop rindex = if rindex = String.length s then rindex else if String.contains whitespace_chars s.[String.length s - rindex - 1] then loop (rindex + 1) else rindex in loop 0 let trim_string_left s = let prefix_length = whitespace_prefix_length s in String.sub s prefix_length (String.length s - prefix_length) let trim_string_right s = let suffix_length = whitespace_suffix_length s in String.sub s 0 (String.length s - suffix_length) (* String.trim not available for OCaml < 4.00. *) let trim_string s = s |> trim_string_left |> trim_string_right (* Specialization of List.mem at string list, to avoid polymorphic comparison. *) let list_mem_string (s : string) l = List.exists (fun s' -> s' = s) l