Legend:
Page
Library
Module
Module type
Parameter
Class
Class type
Source
Source file parserReport.ml
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300(** Handles errors in {!Parse.parse} produced by the menhir in
incremental parser.
[report] is the main function. *)(* To understand what is a checkpoint and everything, you can
check the menhir incremental parser API in
IncrementalEngine.ml (google it) *)(* Project TouIST, 2015. Easily formalize and solve real-world sized problems
* using propositional logic and linear theory of reals with a nice language and GUI.
*
* https://github.com/FredMaris/touist
*
* Copyright Institut de Recherche en Informatique de Toulouse, France
* This program and the accompanying materials are made available
* under the terms of the GNU Lesser General Public License (LGPL)
* version 2.1 which accompanies this distribution, and is available at
* http://www.gnu.org/licenses/lgpl-2.1.html
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
*
* A huge part of this code has been inspired by cparser/ErrorReports.ml
* contained in the project AbsInt/CompCert. Here are the terms:
*
* The Compcert verified compiler
* Jacques-Henri Jourdan, INRIA Paris-Rocquencourt
* Copyright Institut National de Recherche en Informatique et en
* Automatique. All rights reserved. This file is distributed
* under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version. This file is also distributed
* under the terms of the INRIA Non-Commercial License Agreement. *)openLexingopenParser.MenhirInterpretermoduleS=MenhirLib.General(* Streams *)(* -------------------------------------------------------------------------- *)(* There are places where we may hit an internal error and we would like to fail
abruptly because "this cannot happen". Yet, it is safer when shipping to
silently cover up for our internal error. Thus, we typically use an idiom of
the form [if debug then assert false else <some default value>]. *)letdebug=reffalse(* This variable is set when calling the function [report]*)(* -------------------------------------------------------------------------- *)(* The parser keeps track of the last two tokens in a two-place buffer. *)type'abuffer=|Zero|Oneof'a|Twoof'a*(* most recent: *)'a(* [push buffer x] pushes [x] into [buffer], causing the buffer to slide. *)letupdatebufferx:_buffer=matchbuffer,xwith|Zero,_->Onex|Onex1,x2|Two(_,x1),x2->Two(x1,x2)(* [show f buffer] prints the contents of the buffer. The function [f] is
used to print an element. *)letshowfbuffer:string=matchbufferwith|Zero->(* The buffer cannot be empty. If we have read no tokens, we
cannot have detected a syntax error. *)if!debugthenassertfalseelse""|Oneinvalid->(* It is unlikely, but possible, that we have read just one token. *)Printf.sprintf"before '%s'"(finvalid)|Two(valid,invalid)->(* In the most likely case, we have read two tokens. *)Printf.sprintf"after '%s' and before '%s'"(fvalid)(finvalid)(* [exact_pos] returns the start position of the invalid token;
this is useful for giving the user a one-position indication of where
the error is. *)letexact_posbuffer:position=matchbufferwith|Zero->(* The buffer cannot be empty. If we have read no tokens, we
cannot have detected a syntax error. *)assertfalse|Oneinvalid|Two(_,invalid)->letstart_pos,_=invalidinstart_pos(* [area_pos] returns the area (= start and end positions) where the error
was found; this is useful for red-underlying the error in an IDE.
It will give the beginning of the token preceeding the invalid token
and the end of the invalid token.
If there is no preceeding token, returns the invalid token twice.
NOTE: position = Lexing.position *)letarea_pos(buffer:(position*position)buffer):position*position=matchbufferwith|Zero->(* The buffer cannot be empty. If we have read no tokens, we
cannot have detected a syntax error. *)assertfalse|Oneinvalid->let(startpos,endpos)=invalidin(startpos,endpos)|Two(previous,invalid)->let(startpos,_),(_,endpos)=previous,invalidinstartpos,endpos(* -------------------------------------------------------------------------- *)(* [extract text (pos1, pos2)] extracts the sub-string of [text] delimited
by the positions [pos1] and [pos2]. *)letextracttext(pos1,pos2):string=letofs1=pos1.pos_cnumandofs2=pos2.pos_cnuminletlen=ofs2-ofs1intryString.subtextofs1lenwithInvalid_argument_->(* In principle, this should not happen, but if it does, let's make this
a non-fatal error. *)if!debugthenassertfalseelse"???"(* -------------------------------------------------------------------------- *)(* [compress text] replaces every run of at least one whitespace character
with exactly one space character. *)letcompresstext=Re_str.global_replace(Re_str.regexp"[ \t\n\r]+")" "text(* -------------------------------------------------------------------------- *)(* [sanitize text] eliminates any special characters from the text [text].
They are (arbitrarily) replaced with a single dot character. *)letsanitizetext=String.map(func->ifChar.codec<32||Char.codec>=127then'.'elsec)text(* -------------------------------------------------------------------------- *)(* [shorten k text] limits the length of [text] to [2k+3] characters. If the
text is too long, a fragment in the middle is replaced with an ellipsis. *)letshortenktext=letn=String.lengthtextinifn<=2*k+3thentextelseString.subtext0k^"..."^String.subtext(n-k)k(* -------------------------------------------------------------------------- *)(* [stack checkpoint] extracts the parser's stack out of a checkpoint. *)letstackcheckpoint=matchcheckpointwith|HandlingErrorenv->stackenv|_->assertfalse(* this cannot happen, I promise *)(* -------------------------------------------------------------------------- *)(* [state checkpoint] extracts the number of the current state out of a
parser checkpoint. *)letstatecheckpoint:int=matchLazy.force(stackcheckpoint)with|S.Nil->(* Hmm... The parser is in its initial state. Its number is
usually 0. This is a BIG HACK. TEMPORARY *)0|S.Cons(Element(s,_,_,_),_)->numbers(* -------------------------------------------------------------------------- *)(* TEMPORARY move to MenhirLib.General *)letrecdropn(xs:'aS.stream):'aS.stream=matchn,xswith|0,_|_,lazy(S.Nil)->xs|_,lazy(S.Cons(_,xs))->drop(n-1)xs(* -------------------------------------------------------------------------- *)(* [element checkpoint i] returns the [i]-th cell of the parser stack. The index
[i] is 0-based. [i] should (ideally) be within bounds; we raise [Not_found]
if it isn't. *)letelementcheckpointi:element=leti'=ifi>0then(i-1)elseiinmatchLazy.force(dropi'(stackcheckpoint))with|S.Nil->(* [i] is out of range. This could happen if the handwritten error
messages are out of sync with the grammar, or if a mistake was
made. We fail in a non-fatal way. *)raiseNot_found|S.Cons(Element(a,b,p1,p2),_)->matchiwith|0->letp1',p2'=(positions(matchcheckpointwithHandlingErrorenv->env|_->failwith"Shouldnt happen"))inElement(a,b,p1',p2')|_->Element(a,b,p1,p2)(* -------------------------------------------------------------------------- *)(* [range text e] converts the stack element [e] to the fragment of the source
text that corresponds to this stack element. The fragment is placed within
single quotes and shortened if it is too long. We also ensure that it does
not contain any special characters. *)letwidth=30letrangetext(e:element):string=(* Extract the start and positions of this stack element. *)letElement(_,_,pos1,pos2)=ein(* Get the underlying source text fragment. *)letfragment=extracttext(pos1,pos2)in(* Sanitize it and limit its length. Enclose it in single quotes. *)"'"^shortenwidth(sanitize(compressfragment))^"'"(* -------------------------------------------------------------------------- *)(* We allow an error message to contain the special form $i, where is a 0-based
index into the stack. We replace it with the fragment of the source text that
corresponds to this stack entry. *)letfragmenttextcheckpointmessage=tryleti=int_of_string(Re_str.matched_group1message)inrangetext(elementcheckpointi)with|Failure_->(* In principle, this should not happen, but if it does, let's cover up
for our internal error. *)if!debugthenassertfalseelse"???"|Not_found->(* In principle, this should not happen, but if it does, let's cover up
for our internal error. *)if!debugthenassertfalseelse"???"letfragmentstextcheckpoint(message:string):string=Re_str.global_substitute(Re_str.regexp"\\$\\([0-9]+\\)")(fragmenttextcheckpoint)message(* -------------------------------------------------------------------------- *)(* [report text buffer checkpoint] constructs an error message. The C source
code must be stored in the string [text]. The start and end positions of the
last two tokens that were read must be stored in [buffer]. The parser state
(i.e., the automaton's state and stack) must be recorded in the checkpoint
[checkpoint]. *)(* The start and end positions of the invalid token are [lexbuf.lex_start_p]
and [lexbuf.lex_curr_p], since this is the last token that was read. But
we need not care about that here. *)letreporttextbuffercheckpointdebug':string=debug:=debug';(* Sets the debug flag for the whole ErrorReporting.ml file *)letwhere=show(extracttext)bufferin(* Find out in which state the parser failed. *)lets:int=statecheckpointin(* Choose an error message, based on the state number [s].
Then, customize it, based on dynamic information. *)letmessage=tryParserMsgs.messages|>fragmentstextcheckpointwithNot_found->(* If the state number cannot be found -- which, in principle,
should not happen, since our list of erroneous states is
supposed to be complete! -- produce a generic message. *)Printf.sprintf"This is an unknown syntax error (number %d). This error \
is missing in parser.messages (see HOWTODEBUG.md).\n"sin(* Construct the full error message. *)letmessage=Printf.sprintf"syntax error %s. %s"wheremessage^if!debugthenPrintf.sprintf"Debug: Automaton state: %d (see src/parser.messages)\n"selse""inmessage