LCOV - code coverage report
Current view: top level - usr/include/antlr4-runtime - Lexer.h (source / functions) Hit Total Coverage
Test: ROSE Lines: 0 1 0.0 %
Date: 2022-12-08 13:48:47 Functions: 0 2 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
       2             :  * Use of this file is governed by the BSD 3-clause license that
       3             :  * can be found in the LICENSE.txt file in the project root.
       4             :  */
       5             : 
       6             : #pragma once
       7             : 
       8             : #include "Recognizer.h"
       9             : #include "TokenSource.h"
      10             : #include "CharStream.h"
      11             : #include "Token.h"
      12             : 
      13             : namespace antlr4 {
      14             : 
      15             :   /// A lexer is recognizer that draws input symbols from a character stream.
      16             :   /// lexer grammars result in a subclass of this object. A Lexer object
      17             :   /// uses simplified match() and error recovery mechanisms in the interest
      18             :   /// of speed.
      19             :   class ANTLR4CPP_PUBLIC Lexer : public Recognizer, public TokenSource {
      20             :   public:
      21             :     static const size_t DEFAULT_MODE = 0;
      22             :     static const size_t MORE = static_cast<size_t>(-2);
      23             :     static const size_t SKIP = static_cast<size_t>(-3);
      24             : 
      25             :     static const size_t DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL;
      26             :     static const size_t HIDDEN = Token::HIDDEN_CHANNEL;
      27             :     static const size_t MIN_CHAR_VALUE = 0;
      28             :     static const size_t MAX_CHAR_VALUE = 0x10FFFF;
      29             : 
      30             :     CharStream *_input; // Pure reference, usually from statically allocated instance.
      31             : 
      32             :   protected:
      33             :     /// How to create token objects.
      34             :     Ref<TokenFactory<CommonToken>> _factory;
      35             : 
      36             :   public:
      37             :     /// The goal of all lexer rules/methods is to create a token object.
      38             :     ///  This is an instance variable as multiple rules may collaborate to
      39             :     ///  create a single token.  nextToken will return this object after
      40             :     ///  matching lexer rule(s).  If you subclass to allow multiple token
      41             :     ///  emissions, then set this to the last token to be matched or
      42             :     ///  something nonnull so that the auto token emit mechanism will not
      43             :     ///  emit another token.
      44             : 
      45             :     // Life cycle of a token is this:
      46             :     // Created by emit() (via the token factory) or by action code, holding ownership of it.
      47             :     // Ownership is handed over to the token stream when calling nextToken().
      48             :     std::unique_ptr<Token> token;
      49             : 
      50             :     /// <summary>
      51             :     /// What character index in the stream did the current token start at?
      52             :     ///  Needed, for example, to get the text for current token.  Set at
      53             :     ///  the start of nextToken.
      54             :     /// </summary>
      55             :     size_t tokenStartCharIndex;
      56             : 
      57             :     /// <summary>
      58             :     /// The line on which the first character of the token resides </summary>
      59             :     size_t tokenStartLine;
      60             : 
      61             :     /// The character position of first character within the line.
      62             :     size_t tokenStartCharPositionInLine;
      63             : 
      64             :     /// Once we see EOF on char stream, next token will be EOF.
      65             :     /// If you have DONE : EOF ; then you see DONE EOF.
      66             :     bool hitEOF;
      67             : 
      68             :     /// The channel number for the current token.
      69             :     size_t channel;
      70             : 
      71             :     /// The token type for the current token.
      72             :     size_t type;
      73             : 
      74             :     // Use the vector as a stack.
      75             :     std::vector<size_t> modeStack;
      76             :     size_t mode;
      77             : 
      78             :     Lexer();
      79             :     Lexer(CharStream *input);
      80           0 :     virtual ~Lexer() {}
      81             : 
      82             :     virtual void reset();
      83             : 
      84             :     /// Return a token from this source; i.e., match a token on the char stream.
      85             :     virtual std::unique_ptr<Token> nextToken() override;
      86             : 
      87             :     /// Instruct the lexer to skip creating a token for current lexer rule
      88             :     /// and look for another token.  nextToken() knows to keep looking when
      89             :     /// a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
      90             :     /// if token == null at end of any token rule, it creates one for you
      91             :     /// and emits it.
      92             :     virtual void skip();
      93             :     virtual void more();
      94             :     virtual void setMode(size_t m);
      95             :     virtual void pushMode(size_t m);
      96             :     virtual size_t popMode();
      97             : 
      98             :     template<typename T1>
      99             :     void setTokenFactory(TokenFactory<T1> *factory)  {
     100             :       this->_factory = factory;
     101             :     }
     102             : 
     103             :     virtual Ref<TokenFactory<CommonToken>> getTokenFactory() override;
     104             : 
     105             :     /// Set the char stream and reset the lexer
     106             :     virtual void setInputStream(IntStream *input) override;
     107             : 
     108             :     virtual std::string getSourceName() override;
     109             : 
     110             :     virtual CharStream* getInputStream() override;
     111             : 
     112             :     /// By default does not support multiple emits per nextToken invocation
     113             :     /// for efficiency reasons. Subclasses can override this method, nextToken,
     114             :     /// and getToken (to push tokens into a list and pull from that list
     115             :     /// rather than a single variable as this implementation does).
     116             :     virtual void emit(std::unique_ptr<Token> newToken);
     117             : 
     118             :     /// The standard method called to automatically emit a token at the
     119             :     /// outermost lexical rule.  The token object should point into the
     120             :     /// char buffer start..stop.  If there is a text override in 'text',
     121             :     /// use that to set the token's text.  Override this method to emit
     122             :     /// custom Token objects or provide a new factory.
     123             :     virtual Token* emit();
     124             : 
     125             :     virtual Token* emitEOF();
     126             : 
     127             :     virtual size_t getLine() const override;
     128             : 
     129             :     virtual size_t getCharPositionInLine() override;
     130             : 
     131             :     virtual void setLine(size_t line);
     132             : 
     133             :     virtual void setCharPositionInLine(size_t charPositionInLine);
     134             : 
     135             :     /// What is the index of the current character of lookahead?
     136             :     virtual size_t getCharIndex();
     137             : 
     138             :     /// Return the text matched so far for the current token or any
     139             :     /// text override.
     140             :     virtual std::string getText();
     141             : 
     142             :     /// Set the complete text of this token; it wipes any previous
     143             :     /// changes to the text.
     144             :     virtual void setText(const std::string &text);
     145             : 
     146             :     /// Override if emitting multiple tokens.
     147             :     virtual std::unique_ptr<Token> getToken();
     148             : 
     149             :     virtual void setToken(std::unique_ptr<Token> newToken);
     150             : 
     151             :     virtual void setType(size_t ttype);
     152             : 
     153             :     virtual size_t getType();
     154             : 
     155             :     virtual void setChannel(size_t newChannel);
     156             : 
     157             :     virtual size_t getChannel();
     158             : 
     159             :     virtual const std::vector<std::string>& getChannelNames() const = 0;
     160             : 
     161             :     virtual const std::vector<std::string>& getModeNames() const = 0;
     162             : 
     163             :     /// Return a list of all Token objects in input char stream.
     164             :     /// Forces load of all tokens. Does not include EOF token.
     165             :     virtual std::vector<std::unique_ptr<Token>> getAllTokens();
     166             : 
     167             :     virtual void recover(const LexerNoViableAltException &e);
     168             : 
     169             :     virtual void notifyListeners(const LexerNoViableAltException &e);
     170             : 
     171             :     virtual std::string getErrorDisplay(const std::string &s);
     172             : 
     173             :     /// Lexers can normally match any char in it's vocabulary after matching
     174             :     /// a token, so do the easy thing and just kill a character and hope
     175             :     /// it all works out.  You can instead use the rule invocation stack
     176             :     /// to do sophisticated error recovery if you are in a fragment rule.
     177             :     virtual void recover(RecognitionException *re);
     178             : 
     179             :     /// <summary>
     180             :     /// Gets the number of syntax errors reported during parsing. This value is
     181             :     /// incremented each time <seealso cref="#notifyErrorListeners"/> is called.
     182             :     /// </summary>
     183             :     /// <seealso cref= #notifyListeners </seealso>
     184             :     virtual size_t getNumberOfSyntaxErrors();
     185             : 
     186             :   protected:
     187             :     /// You can set the text for the current token to override what is in
     188             :     /// the input char buffer (via setText()).
     189             :     std::string _text;
     190             : 
     191             :   private:
     192             :     size_t _syntaxErrors;
     193             :     void InitializeInstanceFields();
     194             :   };
     195             : 
     196             : } // namespace antlr4

Generated by: LCOV version 1.14