Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Page
Library
Module
Module type
Parameter
Class
Class type
Source
unicode.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
(* For more informations about unicode to utf-8 converting method used see: * http://www.ietf.org/rfc/rfc3629.txt (Page3, section "3. UTF-8 definition") *) (* decimal conversions of binary used: * 10000000 -> 128; 11000000 -> 192; 11100000 -> 224 *) (* This function convert Unicode escaped XXXX to utf-8 encoded string *) let to_utf8 u = let dec = int_of_string @@ "0x" ^ u in let update_byte s i mask shift = Char.chr @@ (Char.code (Bytes.get s i) + ((dec lsr shift) land int_of_string mask)) |> Bytes.set s i in if dec > 0xFFFF then failwith ("Invalid escaped unicode \\u" ^ u) else if dec > 0x7FF then ( let s = Bytes.of_string "\224\128\128" in update_byte s 2 "0b00111111" 0; update_byte s 1 "0b00111111" 6; update_byte s 0 "0b00001111" 12; Bytes.to_string s ) else if dec > 0x7F then ( let s = Bytes.of_string "\192\128" in update_byte s 1 "0b00111111" 0; update_byte s 0 "0b00011111" 6; Bytes.to_string s ) else let s = Bytes.of_string "\000" in update_byte s 0 "0b01111111" 0; Bytes.to_string s