Line data Source code
1 : /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
2 : * Use of this file is governed by the BSD 3-clause license that
3 : * can be found in the LICENSE.txt file in the project root.
4 : */
5 :
6 : #pragma once
7 :
8 : #include "atn/ATNSimulator.h"
9 : #include "atn/LexerATNConfig.h"
10 : #include "atn/ATNConfigSet.h"
11 :
12 : namespace antlr4 {
13 : namespace atn {
14 :
15 : /// "dup" of ParserInterpreter
16 : class ANTLR4CPP_PUBLIC LexerATNSimulator : public ATNSimulator {
17 : protected:
18 : class SimState {
19 : public:
20 : virtual ~SimState();
21 :
22 : protected:
23 : size_t index;
24 : size_t line;
25 : size_t charPos;
26 : dfa::DFAState *dfaState;
27 : virtual void reset();
28 : friend class LexerATNSimulator;
29 :
30 : private:
31 : void InitializeInstanceFields();
32 :
33 : public:
34 : SimState() {
35 : InitializeInstanceFields();
36 : }
37 : };
38 :
39 :
40 : public:
41 : static const size_t MIN_DFA_EDGE = 0;
42 : static const size_t MAX_DFA_EDGE = 127; // forces unicode to stay in ATN
43 :
44 : protected:
45 : /// <summary>
46 : /// When we hit an accept state in either the DFA or the ATN, we
47 : /// have to notify the character stream to start buffering characters
48 : /// via <seealso cref="IntStream#mark"/> and record the current state. The current sim state
49 : /// includes the current index into the input, the current line,
50 : /// and current character position in that line. Note that the Lexer is
51 : /// tracking the starting line and characterization of the token. These
52 : /// variables track the "state" of the simulator when it hits an accept state.
53 : /// <p/>
54 : /// We track these variables separately for the DFA and ATN simulation
55 : /// because the DFA simulation often has to fail over to the ATN
56 : /// simulation. If the ATN simulation fails, we need the DFA to fall
57 : /// back to its previously accepted state, if any. If the ATN succeeds,
58 : /// then the ATN does the accept and the DFA simulator that invoked it
59 : /// can simply return the predicted token type.
60 : /// </summary>
61 : Lexer *const _recog;
62 :
63 : /// The current token's starting index into the character stream.
64 : /// Shared across DFA to ATN simulation in case the ATN fails and the
65 : /// DFA did not have a previous accept state. In this case, we use the
66 : /// ATN-generated exception object.
67 : size_t _startIndex;
68 :
69 : /// line number 1..n within the input.
70 : size_t _line;
71 :
72 : /// The index of the character relative to the beginning of the line 0..n-1.
73 : size_t _charPositionInLine;
74 :
75 : public:
76 : std::vector<dfa::DFA> &_decisionToDFA;
77 :
78 : protected:
79 : size_t _mode;
80 :
81 : /// Used during DFA/ATN exec to record the most recent accept configuration info.
82 : SimState _prevAccept;
83 :
84 : public:
85 : static int match_calls;
86 :
87 : LexerATNSimulator(const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
88 : LexerATNSimulator(Lexer *recog, const ATN &atn, std::vector<dfa::DFA> &decisionToDFA, PredictionContextCache &sharedContextCache);
89 0 : virtual ~LexerATNSimulator () {}
90 :
91 : virtual void copyState(LexerATNSimulator *simulator);
92 : virtual size_t match(CharStream *input, size_t mode);
93 : virtual void reset() override;
94 :
95 : virtual void clearDFA() override;
96 :
97 : protected:
98 : virtual size_t matchATN(CharStream *input);
99 : virtual size_t execATN(CharStream *input, dfa::DFAState *ds0);
100 :
101 : /// <summary>
102 : /// Get an existing target state for an edge in the DFA. If the target state
103 : /// for the edge has not yet been computed or is otherwise not available,
104 : /// this method returns {@code null}.
105 : /// </summary>
106 : /// <param name="s"> The current DFA state </param>
107 : /// <param name="t"> The next input symbol </param>
108 : /// <returns> The existing target DFA state for the given input symbol
109 : /// {@code t}, or {@code null} if the target state for this edge is not
110 : /// already cached </returns>
111 : virtual dfa::DFAState *getExistingTargetState(dfa::DFAState *s, size_t t);
112 :
113 : /// <summary>
114 : /// Compute a target state for an edge in the DFA, and attempt to add the
115 : /// computed state and corresponding edge to the DFA.
116 : /// </summary>
117 : /// <param name="input"> The input stream </param>
118 : /// <param name="s"> The current DFA state </param>
119 : /// <param name="t"> The next input symbol
120 : /// </param>
121 : /// <returns> The computed target DFA state for the given input symbol
122 : /// {@code t}. If {@code t} does not lead to a valid DFA state, this method
123 : /// returns <seealso cref="#ERROR"/>. </returns>
124 : virtual dfa::DFAState *computeTargetState(CharStream *input, dfa::DFAState *s, size_t t);
125 :
126 : virtual size_t failOrAccept(CharStream *input, ATNConfigSet *reach, size_t t);
127 :
128 : /// <summary>
129 : /// Given a starting configuration set, figure out all ATN configurations
130 : /// we can reach upon input {@code t}. Parameter {@code reach} is a return
131 : /// parameter.
132 : /// </summary>
133 : void getReachableConfigSet(CharStream *input, ATNConfigSet *closure_, // closure_ as we have a closure() already
134 : ATNConfigSet *reach, size_t t);
135 :
136 : virtual void accept(CharStream *input, const Ref<LexerActionExecutor> &lexerActionExecutor, size_t startIndex, size_t index,
137 : size_t line, size_t charPos);
138 :
139 : virtual ATNState *getReachableTarget(Transition *trans, size_t t);
140 :
141 : virtual std::unique_ptr<ATNConfigSet> computeStartState(CharStream *input, ATNState *p);
142 :
143 : /// <summary>
144 : /// Since the alternatives within any lexer decision are ordered by
145 : /// preference, this method stops pursuing the closure as soon as an accept
146 : /// state is reached. After the first accept state is reached by depth-first
147 : /// search from {@code config}, all other (potentially reachable) states for
148 : /// this rule would have a lower priority.
149 : /// </summary>
150 : /// <returns> {@code true} if an accept state is reached, otherwise
151 : /// {@code false}. </returns>
152 : virtual bool closure(CharStream *input, const Ref<LexerATNConfig> &config, ATNConfigSet *configs,
153 : bool currentAltReachedAcceptState, bool speculative, bool treatEofAsEpsilon);
154 :
155 : // side-effect: can alter configs.hasSemanticContext
156 : virtual Ref<LexerATNConfig> getEpsilonTarget(CharStream *input, const Ref<LexerATNConfig> &config, Transition *t,
157 : ATNConfigSet *configs, bool speculative, bool treatEofAsEpsilon);
158 :
159 : /// <summary>
160 : /// Evaluate a predicate specified in the lexer.
161 : /// <p/>
162 : /// If {@code speculative} is {@code true}, this method was called before
163 : /// <seealso cref="#consume"/> for the matched character. This method should call
164 : /// <seealso cref="#consume"/> before evaluating the predicate to ensure position
165 : /// sensitive values, including <seealso cref="Lexer#getText"/>, <seealso cref="Lexer#getLine"/>,
166 : /// and <seealso cref="Lexer#getCharPositionInLine"/>, properly reflect the current
167 : /// lexer state. This method should restore {@code input} and the simulator
168 : /// to the original state before returning (i.e. undo the actions made by the
169 : /// call to <seealso cref="#consume"/>.
170 : /// </summary>
171 : /// <param name="input"> The input stream. </param>
172 : /// <param name="ruleIndex"> The rule containing the predicate. </param>
173 : /// <param name="predIndex"> The index of the predicate within the rule. </param>
174 : /// <param name="speculative"> {@code true} if the current index in {@code input} is
175 : /// one character before the predicate's location.
176 : /// </param>
177 : /// <returns> {@code true} if the specified predicate evaluates to
178 : /// {@code true}. </returns>
179 : virtual bool evaluatePredicate(CharStream *input, size_t ruleIndex, size_t predIndex, bool speculative);
180 :
181 : virtual void captureSimState(CharStream *input, dfa::DFAState *dfaState);
182 : virtual dfa::DFAState* addDFAEdge(dfa::DFAState *from, size_t t, ATNConfigSet *q);
183 : virtual void addDFAEdge(dfa::DFAState *p, size_t t, dfa::DFAState *q);
184 :
185 : /// <summary>
186 : /// Add a new DFA state if there isn't one with this set of
187 : /// configurations already. This method also detects the first
188 : /// configuration containing an ATN rule stop state. Later, when
189 : /// traversing the DFA, we will know which rule to accept.
190 : /// </summary>
191 : virtual dfa::DFAState *addDFAState(ATNConfigSet *configs);
192 :
193 : public:
194 : dfa::DFA& getDFA(size_t mode);
195 :
196 : /// Get the text matched so far for the current token.
197 : virtual std::string getText(CharStream *input);
198 : virtual size_t getLine() const;
199 : virtual void setLine(size_t line);
200 : virtual size_t getCharPositionInLine();
201 : virtual void setCharPositionInLine(size_t charPositionInLine);
202 : virtual void consume(CharStream *input);
203 : virtual std::string getTokenName(size_t t);
204 :
205 : private:
206 : void InitializeInstanceFields();
207 : };
208 :
209 : } // namespace atn
210 : } // namespace antlr4
|