reference, declarationdefinition
definition → references, declarations, derived classes, virtual overrides
reference to multiple definitions → definitions
unreferenced
    1
    2
    3
    4
    5
    6
    7
    8
    9
   10
   11
   12
   13
   14
   15
   16
   17
   18
   19
   20
   21
   22
   23
   24
   25
   26
   27
   28
   29
   30
   31
   32
   33
   34
   35
   36
   37
   38
   39
   40
   41
   42
   43
   44
   45
   46
   47
   48
   49
   50
   51
   52
   53
   54
   55
   56
   57
   58
   59
   60
   61
   62
   63
   64
   65
   66
   67
   68
   69
   70
   71
   72
   73
   74
   75
   76
   77
   78
   79
   80
   81
   82
   83
   84
   85
   86
   87
   88
   89
   90
   91
   92
   93
   94
   95
   96
   97
   98
   99
  100
  101
  102
  103
  104
  105
  106
  107
  108
  109
  110
  111
  112
  113
  114
  115
  116
  117
  118
  119
  120
  121
  122
  123
  124
  125
  126
  127
  128
  129
  130
  131
  132
  133
  134
  135
  136
  137
  138
  139
  140
  141
  142
  143
  144
  145
  146
  147
  148
  149
  150
  151
  152
  153
  154
  155
  156
  157
  158
  159
  160
  161
  162
  163
  164
  165
  166
  167
  168
  169
  170
  171
  172
  173
  174
  175
  176
  177
  178
  179
  180
  181
  182
  183
  184
  185
  186
  187
  188
  189
  190
  191
  192
  193
  194
  195
  196
  197
  198
  199
  200
  201
  202
  203
  204
  205
  206
  207
  208
  209
  210
  211
  212
  213
  214
  215
  216
  217
  218
  219
  220
  221
  222
  223
  224
  225
  226
  227
  228
  229
  230
  231
  232
  233
  234
  235
  236
  237
  238
  239
  240
  241
  242
  243
  244
  245
  246
  247
  248
  249
  250
  251
  252
  253
  254
  255
  256
  257
  258
  259
  260
  261
  262
  263
  264
  265
  266
  267
  268
  269
  270
  271
  272
// class template regex -*- C++ -*-

// Copyright (C) 2013-2017 Free Software Foundation, Inc.
//
// This file is part of the GNU ISO C++ Library.  This library is free
// software; you can redistribute it and/or modify it under the
// terms of the GNU General Public License as published by the
// Free Software Foundation; either version 3, or (at your option)
// any later version.

// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
// GNU General Public License for more details.

// Under Section 7 of GPL version 3, you are granted additional
// permissions described in the GCC Runtime Library Exception, version
// 3.1, as published by the Free Software Foundation.

// You should have received a copy of the GNU General Public License and
// a copy of the GCC Runtime Library Exception along with this program;
// see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
// <http://www.gnu.org/licenses/>.

/**
 *  @file bits/regex_scanner.h
 *  This is an internal header file, included by other library headers.
 *  Do not attempt to use it directly. @headername{regex}
 */

namespace std _GLIBCXX_VISIBILITY(default)
{
namespace __detail
{
_GLIBCXX_BEGIN_NAMESPACE_VERSION

  /**
   * @addtogroup regex-detail
   * @{
   */

  struct _ScannerBase
  {
  public:
    /// Token types returned from the scanner.
    enum _TokenT : unsigned
    {
      _S_token_anychar,
      _S_token_ord_char,
      _S_token_oct_num,
      _S_token_hex_num,
      _S_token_backref,
      _S_token_subexpr_begin,
      _S_token_subexpr_no_group_begin,
      _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n'
      _S_token_subexpr_end,
      _S_token_bracket_begin,
      _S_token_bracket_neg_begin,
      _S_token_bracket_end,
      _S_token_interval_begin,
      _S_token_interval_end,
      _S_token_quoted_class,
      _S_token_char_class_name,
      _S_token_collsymbol,
      _S_token_equiv_class_name,
      _S_token_opt,
      _S_token_or,
      _S_token_closure0,
      _S_token_closure1,
      _S_token_line_begin,
      _S_token_line_end,
      _S_token_word_bound, // neg if _M_value[0] == 'n'
      _S_token_comma,
      _S_token_dup_count,
      _S_token_eof,
      _S_token_bracket_dash,
      _S_token_unknown = -1u
    };

  protected:
    typedef regex_constants::syntax_option_type _FlagT;

    enum _StateT
    {
      _S_state_normal,
      _S_state_in_brace,
      _S_state_in_bracket,
    };

  protected:
    _ScannerBase(_FlagT __flags)
    : _M_state(_S_state_normal),
    _M_flags(__flags),
    _M_escape_tbl(_M_is_ecma()
		  ? _M_ecma_escape_tbl
		  : _M_awk_escape_tbl),
    _M_spec_char(_M_is_ecma()
		 ? _M_ecma_spec_char
		 : _M_flags & regex_constants::basic
		 ? _M_basic_spec_char
		 : _M_flags & regex_constants::extended
		 ? _M_extended_spec_char
		 : _M_flags & regex_constants::grep
		 ?  ".[\\*^$\n"
		 : _M_flags & regex_constants::egrep
		 ? ".[\\()*+?{|^$\n"
		 : _M_flags & regex_constants::awk
		 ? _M_extended_spec_char
		 : nullptr),
    _M_at_bracket_start(false)
    { __glibcxx_assert(_M_spec_char); }

  protected:
    const char*
    _M_find_escape(char __c)
    {
      auto __it = _M_escape_tbl;
      for (; __it->first != '\0'; ++__it)
	if (__it->first == __c)
	  return &__it->second;
      return nullptr;
    }

    bool
    _M_is_ecma() const
    { return _M_flags & regex_constants::ECMAScript; }

    bool
    _M_is_basic() const
    { return _M_flags & (regex_constants::basic | regex_constants::grep); }

    bool
    _M_is_extended() const
    {
      return _M_flags & (regex_constants::extended
			 | regex_constants::egrep
			 | regex_constants::awk);
    }

    bool
    _M_is_grep() const
    { return _M_flags & (regex_constants::grep | regex_constants::egrep); }

    bool
    _M_is_awk() const
    { return _M_flags & regex_constants::awk; }

  protected:
    // TODO: Make them static in the next abi change.
    const std::pair<char, _TokenT> _M_token_tbl[9] =
      {
	{'^', _S_token_line_begin},
	{'$', _S_token_line_end},
	{'.', _S_token_anychar},
	{'*', _S_token_closure0},
	{'+', _S_token_closure1},
	{'?', _S_token_opt},
	{'|', _S_token_or},
	{'\n', _S_token_or}, // grep and egrep
	{'\0', _S_token_or},
      };
    const std::pair<char, char> _M_ecma_escape_tbl[8] =
      {
	{'0', '\0'},
	{'b', '\b'},
	{'f', '\f'},
	{'n', '\n'},
	{'r', '\r'},
	{'t', '\t'},
	{'v', '\v'},
	{'\0', '\0'},
      };
    const std::pair<char, char> _M_awk_escape_tbl[11] =
      {
	{'"', '"'},
	{'/', '/'},
	{'\\', '\\'},
	{'a', '\a'},
	{'b', '\b'},
	{'f', '\f'},
	{'n', '\n'},
	{'r', '\r'},
	{'t', '\t'},
	{'v', '\v'},
	{'\0', '\0'},
      };
    const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|";
    const char* _M_basic_spec_char = ".[\\*^$";
    const char* _M_extended_spec_char = ".[\\()*+?{|^$";

    _StateT                       _M_state;
    _FlagT                        _M_flags;
    _TokenT                       _M_token;
    const std::pair<char, char>*  _M_escape_tbl;
    const char*                   _M_spec_char;
    bool                          _M_at_bracket_start;
  };

  /**
   * @brief Scans an input range for regex tokens.
   *
   * The %_Scanner class interprets the regular expression pattern in
   * the input range passed to its constructor as a sequence of parse
   * tokens passed to the regular expression compiler.  The sequence
   * of tokens provided depends on the flag settings passed to the
   * constructor: different regular expression grammars will interpret
   * the same input pattern in syntactically different ways.
   */
  template<typename _CharT>
    class _Scanner
    : public _ScannerBase
    {
    public:
      typedef const _CharT*                                       _IterT;
      typedef std::basic_string<_CharT>                           _StringT;
      typedef regex_constants::syntax_option_type                 _FlagT;
      typedef const std::ctype<_CharT>                            _CtypeT;

      _Scanner(_IterT __begin, _IterT __end,
	       _FlagT __flags, std::locale __loc);

      void
      _M_advance();

      _TokenT
      _M_get_token() const
      { return _M_token; }

      const _StringT&
      _M_get_value() const
      { return _M_value; }

#ifdef _GLIBCXX_DEBUG
      std::ostream&
      _M_print(std::ostream&);
#endif

    private:
      void
      _M_scan_normal();

      void
      _M_scan_in_bracket();

      void
      _M_scan_in_brace();

      void
      _M_eat_escape_ecma();

      void
      _M_eat_escape_posix();

      void
      _M_eat_escape_awk();

      void
      _M_eat_class(char);

      _IterT                        _M_current;
      _IterT                        _M_end;
      _CtypeT&                      _M_ctype;
      _StringT                      _M_value;
      void (_Scanner::* _M_eat_escape)();
    };

 //@} regex-detail
_GLIBCXX_END_NAMESPACE_VERSION
} // namespace __detail
} // namespace std

#include <bits/regex_scanner.tcc>