Line data Source code
1 : // class template regex -*- C++ -*-
2 :
3 : // Copyright (C) 2013-2019 Free Software Foundation, Inc.
4 : //
5 : // This file is part of the GNU ISO C++ Library. This library is free
6 : // software; you can redistribute it and/or modify it under the
7 : // terms of the GNU General Public License as published by the
8 : // Free Software Foundation; either version 3, or (at your option)
9 : // any later version.
10 :
11 : // This library is distributed in the hope that it will be useful,
12 : // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 : // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 : // GNU General Public License for more details.
15 :
16 : // Under Section 7 of GPL version 3, you are granted additional
17 : // permissions described in the GCC Runtime Library Exception, version
18 : // 3.1, as published by the Free Software Foundation.
19 :
20 : // You should have received a copy of the GNU General Public License and
21 : // a copy of the GCC Runtime Library Exception along with this program;
22 : // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 : // <http://www.gnu.org/licenses/>.
24 :
25 : /**
26 : * @file bits/regex.tcc
27 : * This is an internal header file, included by other library headers.
28 : * Do not attempt to use it directly. @headername{regex}
29 : */
30 :
31 : namespace std _GLIBCXX_VISIBILITY(default)
32 : {
33 : _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 :
35 : namespace __detail
36 : {
37 : // Result of merging regex_match and regex_search.
38 : //
39 : // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40 : // the other one if possible, for test purpose).
41 : //
42 : // That __match_mode is true means regex_match, else regex_search.
43 : template<typename _BiIter, typename _Alloc,
44 : typename _CharT, typename _TraitsT,
45 : _RegexExecutorPolicy __policy,
46 : bool __match_mode>
47 : bool
48 955 : __regex_algo_impl(_BiIter __s,
49 : _BiIter __e,
50 : match_results<_BiIter, _Alloc>& __m,
51 : const basic_regex<_CharT, _TraitsT>& __re,
52 : regex_constants::match_flag_type __flags)
53 : {
54 955 : if (__re._M_automaton == nullptr)
55 : return false;
56 :
57 955 : typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58 955 : __m._M_begin = __s;
59 955 : __m._M_resize(__re._M_automaton->_M_sub_count());
60 4775 : for (auto& __it : __res)
61 3820 : __it.matched = false;
62 :
63 : bool __ret;
64 955 : if ((__re.flags() & regex_constants::__polynomial)
65 : || (__policy == _RegexExecutorPolicy::_S_alternate
66 : && !__re._M_automaton->_M_has_backref))
67 : {
68 : _Executor<_BiIter, _Alloc, _TraitsT, false>
69 0 : __executor(__s, __e, __m, __re, __flags);
70 : if (__match_mode)
71 0 : __ret = __executor._M_match();
72 : else
73 : __ret = __executor._M_search();
74 : }
75 : else
76 : {
77 : _Executor<_BiIter, _Alloc, _TraitsT, true>
78 955 : __executor(__s, __e, __m, __re, __flags);
79 : if (__match_mode)
80 955 : __ret = __executor._M_match();
81 : else
82 : __ret = __executor._M_search();
83 : }
84 955 : if (__ret)
85 : {
86 570 : for (auto& __it : __res)
87 456 : if (!__it.matched)
88 342 : __it.first = __it.second = __e;
89 114 : auto& __pre = __m._M_prefix();
90 114 : auto& __suf = __m._M_suffix();
91 : if (__match_mode)
92 : {
93 114 : __pre.matched = false;
94 114 : __pre.first = __s;
95 114 : __pre.second = __s;
96 114 : __suf.matched = false;
97 114 : __suf.first = __e;
98 114 : __suf.second = __e;
99 : }
100 : else
101 : {
102 : __pre.first = __s;
103 : __pre.second = __res[0].first;
104 : __pre.matched = (__pre.first != __pre.second);
105 : __suf.first = __res[0].second;
106 : __suf.second = __e;
107 : __suf.matched = (__suf.first != __suf.second);
108 : }
109 : }
110 : else
111 : {
112 841 : __m._M_resize(0);
113 3364 : for (auto& __it : __res)
114 : {
115 2523 : __it.matched = false;
116 2523 : __it.first = __it.second = __e;
117 : }
118 : }
119 : return __ret;
120 : }
121 : }
122 :
123 : template<typename _Ch_type>
124 : template<typename _Fwd_iter>
125 : typename regex_traits<_Ch_type>::string_type
126 0 : regex_traits<_Ch_type>::
127 : lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
128 : {
129 : typedef std::ctype<char_type> __ctype_type;
130 0 : const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
131 :
132 : static const char* __collatenames[] =
133 : {
134 : "NUL",
135 : "SOH",
136 : "STX",
137 : "ETX",
138 : "EOT",
139 : "ENQ",
140 : "ACK",
141 : "alert",
142 : "backspace",
143 : "tab",
144 : "newline",
145 : "vertical-tab",
146 : "form-feed",
147 : "carriage-return",
148 : "SO",
149 : "SI",
150 : "DLE",
151 : "DC1",
152 : "DC2",
153 : "DC3",
154 : "DC4",
155 : "NAK",
156 : "SYN",
157 : "ETB",
158 : "CAN",
159 : "EM",
160 : "SUB",
161 : "ESC",
162 : "IS4",
163 : "IS3",
164 : "IS2",
165 : "IS1",
166 : "space",
167 : "exclamation-mark",
168 : "quotation-mark",
169 : "number-sign",
170 : "dollar-sign",
171 : "percent-sign",
172 : "ampersand",
173 : "apostrophe",
174 : "left-parenthesis",
175 : "right-parenthesis",
176 : "asterisk",
177 : "plus-sign",
178 : "comma",
179 : "hyphen",
180 : "period",
181 : "slash",
182 : "zero",
183 : "one",
184 : "two",
185 : "three",
186 : "four",
187 : "five",
188 : "six",
189 : "seven",
190 : "eight",
191 : "nine",
192 : "colon",
193 : "semicolon",
194 : "less-than-sign",
195 : "equals-sign",
196 : "greater-than-sign",
197 : "question-mark",
198 : "commercial-at",
199 : "A",
200 : "B",
201 : "C",
202 : "D",
203 : "E",
204 : "F",
205 : "G",
206 : "H",
207 : "I",
208 : "J",
209 : "K",
210 : "L",
211 : "M",
212 : "N",
213 : "O",
214 : "P",
215 : "Q",
216 : "R",
217 : "S",
218 : "T",
219 : "U",
220 : "V",
221 : "W",
222 : "X",
223 : "Y",
224 : "Z",
225 : "left-square-bracket",
226 : "backslash",
227 : "right-square-bracket",
228 : "circumflex",
229 : "underscore",
230 : "grave-accent",
231 : "a",
232 : "b",
233 : "c",
234 : "d",
235 : "e",
236 : "f",
237 : "g",
238 : "h",
239 : "i",
240 : "j",
241 : "k",
242 : "l",
243 : "m",
244 : "n",
245 : "o",
246 : "p",
247 : "q",
248 : "r",
249 : "s",
250 : "t",
251 : "u",
252 : "v",
253 : "w",
254 : "x",
255 : "y",
256 : "z",
257 : "left-curly-bracket",
258 : "vertical-line",
259 : "right-curly-bracket",
260 : "tilde",
261 : "DEL",
262 : };
263 :
264 0 : string __s;
265 0 : for (; __first != __last; ++__first)
266 0 : __s += __fctyp.narrow(*__first, 0);
267 :
268 0 : for (const auto& __it : __collatenames)
269 0 : if (__s == __it)
270 0 : return string_type(1, __fctyp.widen(
271 0 : static_cast<char>(&__it - __collatenames)));
272 :
273 : // TODO Add digraph support:
274 : // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
275 :
276 0 : return string_type();
277 : }
278 :
279 : template<typename _Ch_type>
280 : template<typename _Fwd_iter>
281 : typename regex_traits<_Ch_type>::char_class_type
282 0 : regex_traits<_Ch_type>::
283 : lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
284 : {
285 : typedef std::ctype<char_type> __ctype_type;
286 0 : const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
287 :
288 : // Mappings from class name to class mask.
289 : static const pair<const char*, char_class_type> __classnames[] =
290 : {
291 : {"d", ctype_base::digit},
292 : {"w", {ctype_base::alnum, _RegexMask::_S_under}},
293 : {"s", ctype_base::space},
294 : {"alnum", ctype_base::alnum},
295 : {"alpha", ctype_base::alpha},
296 : {"blank", ctype_base::blank},
297 : {"cntrl", ctype_base::cntrl},
298 : {"digit", ctype_base::digit},
299 : {"graph", ctype_base::graph},
300 : {"lower", ctype_base::lower},
301 : {"print", ctype_base::print},
302 : {"punct", ctype_base::punct},
303 : {"space", ctype_base::space},
304 : {"upper", ctype_base::upper},
305 : {"xdigit", ctype_base::xdigit},
306 : };
307 :
308 0 : string __s;
309 0 : for (; __first != __last; ++__first)
310 0 : __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
311 :
312 0 : for (const auto& __it : __classnames)
313 0 : if (__s == __it.first)
314 : {
315 : if (__icase
316 0 : && ((__it.second
317 0 : & (ctype_base::lower | ctype_base::upper)) != 0))
318 0 : return ctype_base::alpha;
319 0 : return __it.second;
320 : }
321 0 : return 0;
322 : }
323 :
324 : template<typename _Ch_type>
325 : bool
326 1212850 : regex_traits<_Ch_type>::
327 : isctype(_Ch_type __c, char_class_type __f) const
328 : {
329 : typedef std::ctype<char_type> __ctype_type;
330 1212850 : const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
331 :
332 1212850 : return __fctyp.is(__f._M_base, __c)
333 : // [[:w:]]
334 1212850 : || ((__f._M_extended & _RegexMask::_S_under)
335 0 : && __c == __fctyp.widen('_'));
336 : }
337 :
338 : template<typename _Ch_type>
339 : int
340 0 : regex_traits<_Ch_type>::
341 : value(_Ch_type __ch, int __radix) const
342 : {
343 0 : std::basic_istringstream<char_type> __is(string_type(1, __ch));
344 : long __v;
345 0 : if (__radix == 8)
346 0 : __is >> std::oct;
347 0 : else if (__radix == 16)
348 0 : __is >> std::hex;
349 0 : __is >> __v;
350 0 : return __is.fail() ? -1 : __v;
351 : }
352 :
353 : template<typename _Bi_iter, typename _Alloc>
354 : template<typename _Out_iter>
355 : _Out_iter match_results<_Bi_iter, _Alloc>::
356 : format(_Out_iter __out,
357 : const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
358 : const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
359 : match_flag_type __flags) const
360 : {
361 : __glibcxx_assert( ready() );
362 : regex_traits<char_type> __traits;
363 : typedef std::ctype<char_type> __ctype_type;
364 : const __ctype_type&
365 : __fctyp(use_facet<__ctype_type>(__traits.getloc()));
366 :
367 : auto __output = [&](size_t __idx)
368 : {
369 : auto& __sub = (*this)[__idx];
370 : if (__sub.matched)
371 : __out = std::copy(__sub.first, __sub.second, __out);
372 : };
373 :
374 : if (__flags & regex_constants::format_sed)
375 : {
376 : bool __escaping = false;
377 : for (; __fmt_first != __fmt_last; __fmt_first++)
378 : {
379 : if (__escaping)
380 : {
381 : __escaping = false;
382 : if (__fctyp.is(__ctype_type::digit, *__fmt_first))
383 : __output(__traits.value(*__fmt_first, 10));
384 : else
385 : *__out++ = *__fmt_first;
386 : continue;
387 : }
388 : if (*__fmt_first == '\\')
389 : {
390 : __escaping = true;
391 : continue;
392 : }
393 : if (*__fmt_first == '&')
394 : {
395 : __output(0);
396 : continue;
397 : }
398 : *__out++ = *__fmt_first;
399 : }
400 : if (__escaping)
401 : *__out++ = '\\';
402 : }
403 : else
404 : {
405 : while (1)
406 : {
407 : auto __next = std::find(__fmt_first, __fmt_last, '$');
408 : if (__next == __fmt_last)
409 : break;
410 :
411 : __out = std::copy(__fmt_first, __next, __out);
412 :
413 : auto __eat = [&](char __ch) -> bool
414 : {
415 : if (*__next == __ch)
416 : {
417 : ++__next;
418 : return true;
419 : }
420 : return false;
421 : };
422 :
423 : if (++__next == __fmt_last)
424 : *__out++ = '$';
425 : else if (__eat('$'))
426 : *__out++ = '$';
427 : else if (__eat('&'))
428 : __output(0);
429 : else if (__eat('`'))
430 : {
431 : auto& __sub = _M_prefix();
432 : if (__sub.matched)
433 : __out = std::copy(__sub.first, __sub.second, __out);
434 : }
435 : else if (__eat('\''))
436 : {
437 : auto& __sub = _M_suffix();
438 : if (__sub.matched)
439 : __out = std::copy(__sub.first, __sub.second, __out);
440 : }
441 : else if (__fctyp.is(__ctype_type::digit, *__next))
442 : {
443 : long __num = __traits.value(*__next, 10);
444 : if (++__next != __fmt_last
445 : && __fctyp.is(__ctype_type::digit, *__next))
446 : {
447 : __num *= 10;
448 : __num += __traits.value(*__next++, 10);
449 : }
450 : if (0 <= __num && __num < this->size())
451 : __output(__num);
452 : }
453 : else
454 : *__out++ = '$';
455 : __fmt_first = __next;
456 : }
457 : __out = std::copy(__fmt_first, __fmt_last, __out);
458 : }
459 : return __out;
460 : }
461 :
462 : template<typename _Out_iter, typename _Bi_iter,
463 : typename _Rx_traits, typename _Ch_type>
464 : _Out_iter
465 : regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
466 : const basic_regex<_Ch_type, _Rx_traits>& __e,
467 : const _Ch_type* __fmt,
468 : regex_constants::match_flag_type __flags)
469 : {
470 : typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
471 : _IterT __i(__first, __last, __e, __flags);
472 : _IterT __end;
473 : if (__i == __end)
474 : {
475 : if (!(__flags & regex_constants::format_no_copy))
476 : __out = std::copy(__first, __last, __out);
477 : }
478 : else
479 : {
480 : sub_match<_Bi_iter> __last;
481 : auto __len = char_traits<_Ch_type>::length(__fmt);
482 : for (; __i != __end; ++__i)
483 : {
484 : if (!(__flags & regex_constants::format_no_copy))
485 : __out = std::copy(__i->prefix().first, __i->prefix().second,
486 : __out);
487 : __out = __i->format(__out, __fmt, __fmt + __len, __flags);
488 : __last = __i->suffix();
489 : if (__flags & regex_constants::format_first_only)
490 : break;
491 : }
492 : if (!(__flags & regex_constants::format_no_copy))
493 : __out = std::copy(__last.first, __last.second, __out);
494 : }
495 : return __out;
496 : }
497 :
498 : template<typename _Bi_iter,
499 : typename _Ch_type,
500 : typename _Rx_traits>
501 : bool
502 : regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
503 : operator==(const regex_iterator& __rhs) const noexcept
504 : {
505 : if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
506 : return true;
507 : return _M_pregex == __rhs._M_pregex
508 : && _M_begin == __rhs._M_begin
509 : && _M_end == __rhs._M_end
510 : && _M_flags == __rhs._M_flags
511 : && _M_match[0] == __rhs._M_match[0];
512 : }
513 :
514 : template<typename _Bi_iter,
515 : typename _Ch_type,
516 : typename _Rx_traits>
517 : regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
518 : regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
519 : operator++()
520 : {
521 : // In all cases in which the call to regex_search returns true,
522 : // match.prefix().first shall be equal to the previous value of
523 : // match[0].second, and for each index i in the half-open range
524 : // [0, match.size()) for which match[i].matched is true,
525 : // match[i].position() shall return distance(begin, match[i].first).
526 : // [28.12.1.4.5]
527 : if (_M_match[0].matched)
528 : {
529 : auto __start = _M_match[0].second;
530 : auto __prefix_first = _M_match[0].second;
531 : if (_M_match[0].first == _M_match[0].second)
532 : {
533 : if (__start == _M_end)
534 : {
535 : _M_pregex = nullptr;
536 : return *this;
537 : }
538 : else
539 : {
540 : if (regex_search(__start, _M_end, _M_match, *_M_pregex,
541 : _M_flags
542 : | regex_constants::match_not_null
543 : | regex_constants::match_continuous))
544 : {
545 : __glibcxx_assert(_M_match[0].matched);
546 : auto& __prefix = _M_match._M_prefix();
547 : __prefix.first = __prefix_first;
548 : __prefix.matched = __prefix.first != __prefix.second;
549 : // [28.12.1.4.5]
550 : _M_match._M_begin = _M_begin;
551 : return *this;
552 : }
553 : else
554 : ++__start;
555 : }
556 : }
557 : _M_flags |= regex_constants::match_prev_avail;
558 : if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
559 : {
560 : __glibcxx_assert(_M_match[0].matched);
561 : auto& __prefix = _M_match._M_prefix();
562 : __prefix.first = __prefix_first;
563 : __prefix.matched = __prefix.first != __prefix.second;
564 : // [28.12.1.4.5]
565 : _M_match._M_begin = _M_begin;
566 : }
567 : else
568 : _M_pregex = nullptr;
569 : }
570 : return *this;
571 : }
572 :
573 : template<typename _Bi_iter,
574 : typename _Ch_type,
575 : typename _Rx_traits>
576 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
577 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
578 : operator=(const regex_token_iterator& __rhs)
579 : {
580 : _M_position = __rhs._M_position;
581 : _M_subs = __rhs._M_subs;
582 : _M_n = __rhs._M_n;
583 : _M_suffix = __rhs._M_suffix;
584 : _M_has_m1 = __rhs._M_has_m1;
585 : _M_normalize_result();
586 : return *this;
587 : }
588 :
589 : template<typename _Bi_iter,
590 : typename _Ch_type,
591 : typename _Rx_traits>
592 : bool
593 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
594 : operator==(const regex_token_iterator& __rhs) const
595 : {
596 : if (_M_end_of_seq() && __rhs._M_end_of_seq())
597 : return true;
598 : if (_M_suffix.matched && __rhs._M_suffix.matched
599 : && _M_suffix == __rhs._M_suffix)
600 : return true;
601 : if (_M_end_of_seq() || _M_suffix.matched
602 : || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
603 : return false;
604 : return _M_position == __rhs._M_position
605 : && _M_n == __rhs._M_n
606 : && _M_subs == __rhs._M_subs;
607 : }
608 :
609 : template<typename _Bi_iter,
610 : typename _Ch_type,
611 : typename _Rx_traits>
612 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
613 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
614 : operator++()
615 : {
616 : _Position __prev = _M_position;
617 : if (_M_suffix.matched)
618 : *this = regex_token_iterator();
619 : else if (_M_n + 1 < _M_subs.size())
620 : {
621 : _M_n++;
622 : _M_result = &_M_current_match();
623 : }
624 : else
625 : {
626 : _M_n = 0;
627 : ++_M_position;
628 : if (_M_position != _Position())
629 : _M_result = &_M_current_match();
630 : else if (_M_has_m1 && __prev->suffix().length() != 0)
631 : {
632 : _M_suffix.matched = true;
633 : _M_suffix.first = __prev->suffix().first;
634 : _M_suffix.second = __prev->suffix().second;
635 : _M_result = &_M_suffix;
636 : }
637 : else
638 : *this = regex_token_iterator();
639 : }
640 : return *this;
641 : }
642 :
643 : template<typename _Bi_iter,
644 : typename _Ch_type,
645 : typename _Rx_traits>
646 : void
647 : regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
648 : _M_init(_Bi_iter __a, _Bi_iter __b)
649 : {
650 : _M_has_m1 = false;
651 : for (auto __it : _M_subs)
652 : if (__it == -1)
653 : {
654 : _M_has_m1 = true;
655 : break;
656 : }
657 : if (_M_position != _Position())
658 : _M_result = &_M_current_match();
659 : else if (_M_has_m1)
660 : {
661 : _M_suffix.matched = true;
662 : _M_suffix.first = __a;
663 : _M_suffix.second = __b;
664 : _M_result = &_M_suffix;
665 : }
666 : else
667 : _M_result = nullptr;
668 : }
669 :
670 : _GLIBCXX_END_NAMESPACE_VERSION
671 : } // namespace
|