package htmlfromtexbooks
From TeX To Human-Readable HTML
Install
Dune Dependency
Authors
Maintainers
Sources
v3.3.2.tar.gz
md5=8eafd939e9dbcb31919c5be0ed144840
sha512=4e7d13452e0a6b358e013711085500d82faf034eb03845be7de86a631fbc7b66a9aede7eba63cfd35460992c16e74de6c7db9aa152234f7dbc3acf9ab31c12ee
doc/src/htmlfromtexbooks.lib/parser.ml.html
Source file parser.ml
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212
(**Main module for parsing string to an AST representing LaTeX Books*) open Utils (**Commande type : an internal used in the first phase of the parsing*) type cmd = | NullCommand (**A nullcommand is nothing*) | AtomCmd of string * string list (**An atomic command just have a name and eventual parameters like \newline*) | SimpleCmd of string * string list * string (**A command with extactly one argument e.g. \chapter{name}*) | MultipleCmd of string * string list * string list (**A command with multiple arguments e.g. \frac{x}{3}*) (**Structure used to represent in an AST-ish (just one node with a list of structure children) LaTeX*) type structure = | Nul | Line of string (**A line of just plain text like "It's the Enterprise!"*) | Cmd of cmd (**A command of our internal type used in the parsing*) | AtomicCmd of string * string list (**The final type to represent an atomic command like \newline*) | OneArgCmd of string * string list * structure list (**Structure version of SimpleCdm*) | MultipleArgCmd of string * string list * structure list list (**Structure version of MultipleCmd*) | Env of string * structure list (**An environment like document,center,equation...*) | Math of string (**A re-latexified math literal to be sent to an online image processing*) | Subsubsection of string * structure list (**Represent the LaTeX subsubsection, a subsubsection have a name and a list of structure children*) | Subsection of string * structure list (**See above*) | Section of string * structure list (**See above*) | Chapter of string * structure list (**See above*) let preamble = Hashtbl.create 1;; let commands = Hashtbl.create 1;; (**Parses an accolade for the first time (command reading)*) let parse_interior_of_an_accolade list_of_chars acc = let stack = Stack.create () in let rec parse list_of_chars acc = match list_of_chars with | [] -> acc,[] | t::q when t='{' -> Stack.push t stack; parse q (acc^(String.make 1 t)) | t::q when t='}' -> if Stack.is_empty stack then acc,q else (Stack.pop stack |> ignore; parse q (acc^(String.make 1 t))) | t::q -> parse q (acc^(String.make 1 t)) in parse list_of_chars acc;; (**Parses arguments of a function [colorlinks,12pt] to a list of string ["colorlinks","12pt"]*) let rec parse_arguments list_of_chars current_acc acc = match list_of_chars with | [] -> current_acc::acc,[] | t::q when t=']' -> (current_acc::acc),q | t::q when t=',' -> parse_arguments q "" (current_acc::acc) | t::q when t=' ' -> parse_arguments q current_acc acc | t::q -> parse_arguments q (current_acc^(String.make 1 t)) acc;; (**Parses a command recursively, called when a \ is detected in the parsing of a string*) let parse_command list_of_chars = let rec parse_command_rec list_of_chars acc = match list_of_chars with | [] -> acc,[] | t::q when t='{' -> let a,q = parse_interior_of_an_accolade q "" in let acc = (match acc with | NullCommand -> AtomCmd (a,[]) | AtomCmd (b,c) -> SimpleCmd (b,c,a) | SimpleCmd (b,e,c) -> MultipleCmd (b,e,a::c::[]) | MultipleCmd (b,e,c) -> MultipleCmd(b,e,a::c)) in parse_command_rec q acc | t::q when t='[' -> let a,q = parse_arguments q "" [] in let acc = (match acc with | NullCommand -> NullCommand | AtomCmd (c,b) -> AtomCmd (c,b@a) | SimpleCmd(b,e,c) -> SimpleCmd (b,e@a,c) | MultipleCmd(b,e,c) -> MultipleCmd(b,e@a,c)) in parse_command_rec q acc | t::q when (t='\n' || t=' ') -> acc,q | t::q when (t='$') -> acc,q | t::q -> match acc with | NullCommand -> parse_command_rec q (AtomCmd ((String.make 1 t),[])) | AtomCmd (b,_) -> parse_command_rec q (AtomCmd ((b^(String.make 1 t),[]))) | _ -> acc,(t::q) in let cmd,l = parse_command_rec list_of_chars (NullCommand) in match cmd with | MultipleCmd (s,e,l2) -> MultipleCmd (s,e,List.rev l2),l | e -> e,l;; (**Appends a line to a list if the line is not empty*) let append_line str q = match (String.trim (str)) with | "" -> q | e -> Line(e)::q (**Parses inline $$ math*) let parse_math l = let rec parse acc l = match l with | [] -> acc,[] | t::q when t='$' -> acc,q | t::q -> parse (acc^(String.make 1 t)) q in let a,b = parse "" l in Math(a),b;; (**Parses recursively a string into a list of structure, transforms commands to their final type*) let rec parse_string str = let rec parse current_acc acc lst = match lst with | [] -> append_line current_acc acc | t::q when t='$' -> let a,q2 = parse_math q in parse "" (a::(append_line current_acc acc)) q2 | t::q when t='\\' -> let cmd,l = parse_command q in parse "" (Cmd(cmd)::(append_line current_acc acc)) l | t::q -> parse (current_acc^(String.make 1 t)) acc q and parse_nested_commands ast_list = match ast_list with | [] -> [] | Line s::q -> (Line s)::(parse_nested_commands q) | Math e::q -> (Math e)::(parse_nested_commands q) | Cmd c::q -> let a = (match c with | NullCommand -> Nul | AtomCmd (s,e) -> AtomicCmd (s,e) | SimpleCmd (s,e,s2) -> OneArgCmd (s,e,(parse_string s2)) | MultipleCmd (s,e,s2) -> MultipleArgCmd (s,e,(List.map (parse_string) s2))) in a::(parse_nested_commands q) | _::q -> parse_nested_commands q in let a = parse "" [] (string_to_list str) in let a = parse_nested_commands a in List.rev a;; (**Take the list of structure and gives the preamble (everything before the start of the document) and the document itself*) let separate_preamble lst = let rec iter ast_list a0 a1 = match ast_list with | (OneArgCmd (s,e,(Line s1)::_))::q when (String.equal s "begin") && (String.equal s1 "document") -> iter q a0 (OneArgCmd (s,e,(Line s1)::[])::a1) | e::q when (List.length a1 = 0) -> iter q (e::a0) a1 | e::q -> iter q a0 (e::a1) | [] -> List.rev a0,List.rev a1 in iter lst [] [];; (**Recursively generates the environments (begin...end) statements*) let calculate_environments lst = let rec extract_env acc lst = match lst with | [] -> acc,[] | (OneArgCmd (s,_,(Line s1)::_))::q when (String.equal s "begin") -> let env,l = extract_env [] q in let env = Env (s1,List.rev env) in extract_env (env::acc) l | (OneArgCmd (s,_,_))::q when (String.equal s "end") -> acc,q | Chapter(s,l)::q -> let l2,q2 = extract_env [] l in let l2 = List.rev l2 in extract_env ((Chapter(s,l2@q2))::acc) q | Section(s,l)::q -> let l2,q2 = extract_env [] l in let l2 = List.rev l2 in extract_env ((Section(s,l2@q2))::acc) q | Subsection(s,l)::q -> let l2,q2 = extract_env [] l in let l2 = List.rev l2 in extract_env ((Section(s,l2@q2))::acc) q | Subsubsection(s,l)::q -> let l2,q2 = extract_env [] l in let l2 = List.rev l2 in extract_env ((Section(s,l2@q2))::acc) q | e::q -> extract_env (e::acc) q in let a,_ = extract_env [] lst in a;; (**Reads the preamble for type,title,author and eventual glossary input*) let rec read_preamble ast = match ast with | [] -> () | OneArgCmd (s,_,(Line s1)::_)::q when (s="documentclass") -> Hashtbl.add preamble "type" s1; read_preamble q | OneArgCmd (s,_,(Line s1)::_)::q when (s="input") -> Hashtbl.add preamble "glossary" s1; read_preamble q | OneArgCmd (s,_,(Line s1)::_)::q when (s="title") -> Hashtbl.add preamble "title" s1; read_preamble q | OneArgCmd (s,_,(Line s1)::_)::q when (s="author") -> Hashtbl.add preamble "author" s1; read_preamble q | MultipleArgCmd (s,_,l)::q when (s="newcommand") -> (match l with | [AtomicCmd (s,_)]::n::_ when not (s="sep") -> let env = calculate_environments n in Hashtbl.add commands s env; read_preamble q | _ -> read_preamble q) | _::q -> read_preamble q;; (**Take the structure list and order of the Chapters,section etc.*) let separate_sections lst = let tab = [|false;false;false;false|] in let rec extract_section acc lst = match lst with | [] -> acc,[] | (OneArgCmd (s,e,(Line s1)::_))::q when (s="chapter" || s="chapter*") -> if tab.(0) = true then (tab.(0) <- false; tab.(1) <- false; tab.(2) <- false; tab.(3) <- false; acc,(OneArgCmd (s,e,(Line s1)::[]))::q) else (tab.(0) <- true; let a,l = extract_section [] q in let chap = Chapter(s1,List.rev a) in extract_section (chap::acc) l) | (OneArgCmd (s,e,(Line s1)::_))::q when (s="section" || s="section*") -> if tab.(1) = true then (tab.(1) <- false; tab.(2) <- false; tab.(3) <- false; acc,(OneArgCmd (s,e,(Line s1)::[]))::q) else (tab.(1) <- true; let a,l = extract_section [] q in let chap = Section(s1,List.rev a) in extract_section (chap::acc) l) | (OneArgCmd (s,e,(Line s1)::_))::q when (s="subsection" || s="subsection*") -> if tab.(2) = true then (tab.(2) <- false; tab.(3) <- false; acc,(OneArgCmd (s,e,(Line s1)::[]))::q) else (tab.(2) <- true; let a,l = extract_section [] q in let chap = Subsection(s1,List.rev a) in extract_section (chap::acc) l); | (OneArgCmd (s,e,(Line s1)::_))::q when (s="subsubsection" || s="subsubsection*") -> if tab.(3) = true then (tab.(3) <- false; acc,(OneArgCmd (s,e,(Line s1)::[]))::q) else (tab.(3) <- true; let a,l = extract_section [] q in let chap = Subsubsection(s1,List.rev a) in extract_section (chap::acc) l) | e::q -> extract_section (e::acc) q in let a,_ = extract_section [] lst in List.rev a;;
sectionYPositions = computeSectionYPositions($el), 10)"
x-init="setTimeout(() => sectionYPositions = computeSectionYPositions($el), 10)"
>