Line data Source code
1 : // Boost string_algo library regex.hpp header file ---------------------------//
2 :
3 : // Copyright Pavol Droba 2002-2003.
4 : //
5 : // Distributed under the Boost Software License, Version 1.0.
6 : // (See accompanying file LICENSE_1_0.txt or copy at
7 : // http://www.boost.org/LICENSE_1_0.txt)
8 :
9 : // See http://www.boost.org/ for updates, documentation, and revision history.
10 :
11 : #ifndef BOOST_STRING_REGEX_HPP
12 : #define BOOST_STRING_REGEX_HPP
13 :
14 : #include <boost/algorithm/string/config.hpp>
15 : #include <boost/regex.hpp>
16 :
17 : #include <boost/range/iterator_range_core.hpp>
18 : #include <boost/range/begin.hpp>
19 : #include <boost/range/end.hpp>
20 : #include <boost/range/iterator.hpp>
21 : #include <boost/range/as_literal.hpp>
22 :
23 : #include <boost/algorithm/string/find_format.hpp>
24 : #include <boost/algorithm/string/regex_find_format.hpp>
25 : #include <boost/algorithm/string/formatter.hpp>
26 : #include <boost/algorithm/string/iter_find.hpp>
27 :
28 : /*! \file
29 : Defines regex variants of the algorithms.
30 : */
31 :
32 : namespace boost {
33 : namespace algorithm {
34 :
35 : // find_regex -----------------------------------------------//
36 :
37 : //! Find regex algorithm
38 : /*!
39 : Search for a substring matching the given regex in the input.
40 :
41 : \param Input A container which will be searched.
42 : \param Rx A regular expression
43 : \param Flags Regex options
44 : \return
45 : An \c iterator_range delimiting the match.
46 : Returned iterator is either \c RangeT::iterator or
47 : \c RangeT::const_iterator, depending on the constness of
48 : the input parameter.
49 :
50 : \note This function provides the strong exception-safety guarantee
51 : */
52 : template<
53 : typename RangeT,
54 : typename CharT,
55 : typename RegexTraitsT>
56 : inline iterator_range<
57 : BOOST_STRING_TYPENAME range_iterator<RangeT>::type >
58 0 : find_regex(
59 : RangeT& Input,
60 : const basic_regex<CharT, RegexTraitsT>& Rx,
61 : match_flag_type Flags=match_default )
62 : {
63 0 : iterator_range<BOOST_STRING_TYPENAME range_iterator<RangeT>::type> lit_input(::boost::as_literal(Input));
64 :
65 0 : return ::boost::algorithm::regex_finder(Rx,Flags)(
66 0 : ::boost::begin(lit_input), ::boost::end(lit_input) );
67 : }
68 :
69 : // replace_regex --------------------------------------------------------------------//
70 :
71 : //! Replace regex algorithm
72 : /*!
73 : Search for a substring matching given regex and format it with
74 : the specified format.
75 : The result is a modified copy of the input. It is returned as a sequence
76 : or copied to the output iterator.
77 :
78 : \param Output An output iterator to which the result will be copied
79 : \param Input An input string
80 : \param Rx A regular expression
81 : \param Format Regex format definition
82 : \param Flags Regex options
83 : \return An output iterator pointing just after the last inserted character or
84 : a modified copy of the input
85 :
86 : \note The second variant of this function provides the strong exception-safety guarantee
87 : */
88 : template<
89 : typename OutputIteratorT,
90 : typename RangeT,
91 : typename CharT,
92 : typename RegexTraitsT,
93 : typename FormatStringTraitsT, typename FormatStringAllocatorT >
94 : inline OutputIteratorT replace_regex_copy(
95 : OutputIteratorT Output,
96 : const RangeT& Input,
97 : const basic_regex<CharT, RegexTraitsT>& Rx,
98 : const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
99 : match_flag_type Flags=match_default | format_default )
100 : {
101 : return ::boost::algorithm::find_format_copy(
102 : Output,
103 : Input,
104 : ::boost::algorithm::regex_finder( Rx, Flags ),
105 : ::boost::algorithm::regex_formatter( Format, Flags ) );
106 : }
107 :
108 : //! Replace regex algorithm
109 : /*!
110 : \overload
111 : */
112 : template<
113 : typename SequenceT,
114 : typename CharT,
115 : typename RegexTraitsT,
116 : typename FormatStringTraitsT, typename FormatStringAllocatorT >
117 : inline SequenceT replace_regex_copy(
118 : const SequenceT& Input,
119 : const basic_regex<CharT, RegexTraitsT>& Rx,
120 : const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
121 : match_flag_type Flags=match_default | format_default )
122 : {
123 : return ::boost::algorithm::find_format_copy(
124 : Input,
125 : ::boost::algorithm::regex_finder( Rx, Flags ),
126 : ::boost::algorithm::regex_formatter( Format, Flags ) );
127 : }
128 :
129 : //! Replace regex algorithm
130 : /*!
131 : Search for a substring matching given regex and format it with
132 : the specified format. The input string is modified in-place.
133 :
134 : \param Input An input string
135 : \param Rx A regular expression
136 : \param Format Regex format definition
137 : \param Flags Regex options
138 : */
139 : template<
140 : typename SequenceT,
141 : typename CharT,
142 : typename RegexTraitsT,
143 : typename FormatStringTraitsT, typename FormatStringAllocatorT >
144 : inline void replace_regex(
145 : SequenceT& Input,
146 : const basic_regex<CharT, RegexTraitsT>& Rx,
147 : const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
148 : match_flag_type Flags=match_default | format_default )
149 : {
150 : ::boost::algorithm::find_format(
151 : Input,
152 : ::boost::algorithm::regex_finder( Rx, Flags ),
153 : ::boost::algorithm::regex_formatter( Format, Flags ) );
154 : }
155 :
156 : // replace_all_regex --------------------------------------------------------------------//
157 :
158 : //! Replace all regex algorithm
159 : /*!
160 : Format all substrings, matching given regex, with the specified format.
161 : The result is a modified copy of the input. It is returned as a sequence
162 : or copied to the output iterator.
163 :
164 : \param Output An output iterator to which the result will be copied
165 : \param Input An input string
166 : \param Rx A regular expression
167 : \param Format Regex format definition
168 : \param Flags Regex options
169 : \return An output iterator pointing just after the last inserted character or
170 : a modified copy of the input
171 :
172 : \note The second variant of this function provides the strong exception-safety guarantee
173 : */
174 : template<
175 : typename OutputIteratorT,
176 : typename RangeT,
177 : typename CharT,
178 : typename RegexTraitsT,
179 : typename FormatStringTraitsT, typename FormatStringAllocatorT >
180 : inline OutputIteratorT replace_all_regex_copy(
181 : OutputIteratorT Output,
182 : const RangeT& Input,
183 : const basic_regex<CharT, RegexTraitsT>& Rx,
184 : const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
185 : match_flag_type Flags=match_default | format_default )
186 : {
187 : return ::boost::algorithm::find_format_all_copy(
188 : Output,
189 : Input,
190 : ::boost::algorithm::regex_finder( Rx, Flags ),
191 : ::boost::algorithm::regex_formatter( Format, Flags ) );
192 : }
193 :
194 : //! Replace all regex algorithm
195 : /*!
196 : \overload
197 : */
198 : template<
199 : typename SequenceT,
200 : typename CharT,
201 : typename RegexTraitsT,
202 : typename FormatStringTraitsT, typename FormatStringAllocatorT >
203 : inline SequenceT replace_all_regex_copy(
204 : const SequenceT& Input,
205 : const basic_regex<CharT, RegexTraitsT>& Rx,
206 : const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
207 : match_flag_type Flags=match_default | format_default )
208 : {
209 : return ::boost::algorithm::find_format_all_copy(
210 : Input,
211 : ::boost::algorithm::regex_finder( Rx, Flags ),
212 : ::boost::algorithm::regex_formatter( Format, Flags ) );
213 : }
214 :
215 : //! Replace all regex algorithm
216 : /*!
217 : Format all substrings, matching given regex, with the specified format.
218 : The input string is modified in-place.
219 :
220 : \param Input An input string
221 : \param Rx A regular expression
222 : \param Format Regex format definition
223 : \param Flags Regex options
224 : */
225 : template<
226 : typename SequenceT,
227 : typename CharT,
228 : typename RegexTraitsT,
229 : typename FormatStringTraitsT, typename FormatStringAllocatorT >
230 : inline void replace_all_regex(
231 : SequenceT& Input,
232 : const basic_regex<CharT, RegexTraitsT>& Rx,
233 : const std::basic_string<CharT, FormatStringTraitsT, FormatStringAllocatorT>& Format,
234 : match_flag_type Flags=match_default | format_default )
235 : {
236 : ::boost::algorithm::find_format_all(
237 : Input,
238 : ::boost::algorithm::regex_finder( Rx, Flags ),
239 : ::boost::algorithm::regex_formatter( Format, Flags ) );
240 : }
241 :
242 : // erase_regex --------------------------------------------------------------------//
243 :
244 : //! Erase regex algorithm
245 : /*!
246 : Remove a substring matching given regex from the input.
247 : The result is a modified copy of the input. It is returned as a sequence
248 : or copied to the output iterator.
249 :
250 : \param Output An output iterator to which the result will be copied
251 : \param Input An input string
252 : \param Rx A regular expression
253 : \param Flags Regex options
254 : \return An output iterator pointing just after the last inserted character or
255 : a modified copy of the input
256 :
257 : \note The second variant of this function provides the strong exception-safety guarantee
258 : */
259 : template<
260 : typename OutputIteratorT,
261 : typename RangeT,
262 : typename CharT,
263 : typename RegexTraitsT >
264 : inline OutputIteratorT erase_regex_copy(
265 : OutputIteratorT Output,
266 : const RangeT& Input,
267 : const basic_regex<CharT, RegexTraitsT>& Rx,
268 : match_flag_type Flags=match_default )
269 : {
270 : return ::boost::algorithm::find_format_copy(
271 : Output,
272 : Input,
273 : ::boost::algorithm::regex_finder( Rx, Flags ),
274 : ::boost::algorithm::empty_formatter( Input ) );
275 : }
276 :
277 : //! Erase regex algorithm
278 : /*!
279 : \overload
280 : */
281 : template<
282 : typename SequenceT,
283 : typename CharT,
284 : typename RegexTraitsT >
285 : inline SequenceT erase_regex_copy(
286 : const SequenceT& Input,
287 : const basic_regex<CharT, RegexTraitsT>& Rx,
288 : match_flag_type Flags=match_default )
289 : {
290 : return ::boost::algorithm::find_format_copy(
291 : Input,
292 : ::boost::algorithm::regex_finder( Rx, Flags ),
293 : ::boost::algorithm::empty_formatter( Input ) );
294 : }
295 :
296 : //! Erase regex algorithm
297 : /*!
298 : Remove a substring matching given regex from the input.
299 : The input string is modified in-place.
300 :
301 : \param Input An input string
302 : \param Rx A regular expression
303 : \param Flags Regex options
304 : */
305 : template<
306 : typename SequenceT,
307 : typename CharT,
308 : typename RegexTraitsT >
309 : inline void erase_regex(
310 : SequenceT& Input,
311 : const basic_regex<CharT, RegexTraitsT>& Rx,
312 : match_flag_type Flags=match_default )
313 : {
314 : ::boost::algorithm::find_format(
315 : Input,
316 : ::boost::algorithm::regex_finder( Rx, Flags ),
317 : ::boost::algorithm::empty_formatter( Input ) );
318 : }
319 :
320 : // erase_all_regex --------------------------------------------------------------------//
321 :
322 : //! Erase all regex algorithm
323 : /*!
324 : Erase all substrings, matching given regex, from the input.
325 : The result is a modified copy of the input. It is returned as a sequence
326 : or copied to the output iterator.
327 :
328 :
329 : \param Output An output iterator to which the result will be copied
330 : \param Input An input string
331 : \param Rx A regular expression
332 : \param Flags Regex options
333 : \return An output iterator pointing just after the last inserted character or
334 : a modified copy of the input
335 :
336 : \note The second variant of this function provides the strong exception-safety guarantee
337 : */
338 : template<
339 : typename OutputIteratorT,
340 : typename RangeT,
341 : typename CharT,
342 : typename RegexTraitsT >
343 : inline OutputIteratorT erase_all_regex_copy(
344 : OutputIteratorT Output,
345 : const RangeT& Input,
346 : const basic_regex<CharT, RegexTraitsT>& Rx,
347 : match_flag_type Flags=match_default )
348 : {
349 : return ::boost::algorithm::find_format_all_copy(
350 : Output,
351 : Input,
352 : ::boost::algorithm::regex_finder( Rx, Flags ),
353 : ::boost::algorithm::empty_formatter( Input ) );
354 : }
355 :
356 : //! Erase all regex algorithm
357 : /*!
358 : \overload
359 : */
360 : template<
361 : typename SequenceT,
362 : typename CharT,
363 : typename RegexTraitsT >
364 : inline SequenceT erase_all_regex_copy(
365 : const SequenceT& Input,
366 : const basic_regex<CharT, RegexTraitsT>& Rx,
367 : match_flag_type Flags=match_default )
368 : {
369 : return ::boost::algorithm::find_format_all_copy(
370 : Input,
371 : ::boost::algorithm::regex_finder( Rx, Flags ),
372 : ::boost::algorithm::empty_formatter( Input ) );
373 : }
374 :
375 : //! Erase all regex algorithm
376 : /*!
377 : Erase all substrings, matching given regex, from the input.
378 : The input string is modified in-place.
379 :
380 : \param Input An input string
381 : \param Rx A regular expression
382 : \param Flags Regex options
383 : */
384 : template<
385 : typename SequenceT,
386 : typename CharT,
387 : typename RegexTraitsT>
388 : inline void erase_all_regex(
389 : SequenceT& Input,
390 : const basic_regex<CharT, RegexTraitsT>& Rx,
391 : match_flag_type Flags=match_default )
392 : {
393 : ::boost::algorithm::find_format_all(
394 : Input,
395 : ::boost::algorithm::regex_finder( Rx, Flags ),
396 : ::boost::algorithm::empty_formatter( Input ) );
397 : }
398 :
399 : // find_all_regex ------------------------------------------------------------------//
400 :
401 : //! Find all regex algorithm
402 : /*!
403 : This algorithm finds all substrings matching the give regex
404 : in the input.
405 :
406 : Each part is copied and added as a new element to the output container.
407 : Thus the result container must be able to hold copies
408 : of the matches (in a compatible structure like std::string) or
409 : a reference to it (e.g. using the iterator range class).
410 : Examples of such a container are \c std::vector<std::string>
411 : or \c std::list<boost::iterator_range<std::string::iterator>>
412 :
413 : \param Result A container that can hold copies of references to the substrings.
414 : \param Input A container which will be searched.
415 : \param Rx A regular expression
416 : \param Flags Regex options
417 : \return A reference to the result
418 :
419 : \note Prior content of the result will be overwritten.
420 :
421 : \note This function provides the strong exception-safety guarantee
422 : */
423 : template<
424 : typename SequenceSequenceT,
425 : typename RangeT,
426 : typename CharT,
427 : typename RegexTraitsT >
428 : inline SequenceSequenceT& find_all_regex(
429 : SequenceSequenceT& Result,
430 : const RangeT& Input,
431 : const basic_regex<CharT, RegexTraitsT>& Rx,
432 : match_flag_type Flags=match_default )
433 : {
434 : return ::boost::algorithm::iter_find(
435 : Result,
436 : Input,
437 : ::boost::algorithm::regex_finder(Rx,Flags) );
438 : }
439 :
440 : // split_regex ------------------------------------------------------------------//
441 :
442 : //! Split regex algorithm
443 : /*!
444 : Tokenize expression. This function is equivalent to C strtok. Input
445 : sequence is split into tokens, separated by separators. Separator
446 : is an every match of the given regex.
447 : Each part is copied and added as a new element to the output container.
448 : Thus the result container must be able to hold copies
449 : of the matches (in a compatible structure like std::string) or
450 : a reference to it (e.g. using the iterator range class).
451 : Examples of such a container are \c std::vector<std::string>
452 : or \c std::list<boost::iterator_range<std::string::iterator>>
453 :
454 : \param Result A container that can hold copies of references to the substrings.
455 : \param Input A container which will be searched.
456 : \param Rx A regular expression
457 : \param Flags Regex options
458 : \return A reference to the result
459 :
460 : \note Prior content of the result will be overwritten.
461 :
462 : \note This function provides the strong exception-safety guarantee
463 : */
464 : template<
465 : typename SequenceSequenceT,
466 : typename RangeT,
467 : typename CharT,
468 : typename RegexTraitsT >
469 0 : inline SequenceSequenceT& split_regex(
470 : SequenceSequenceT& Result,
471 : const RangeT& Input,
472 : const basic_regex<CharT, RegexTraitsT>& Rx,
473 : match_flag_type Flags=match_default )
474 : {
475 0 : return ::boost::algorithm::iter_split(
476 : Result,
477 : Input,
478 : ::boost::algorithm::regex_finder(Rx,Flags) );
479 : }
480 :
481 : // join_if ------------------------------------------------------------------//
482 :
483 : #ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
484 :
485 : //! Conditional join algorithm
486 : /*!
487 : This algorithm joins all strings in a 'list' into one long string.
488 : Segments are concatenated by given separator. Only segments that
489 : match the given regular expression will be added to the result
490 :
491 : This is a specialization of join_if algorithm.
492 :
493 : \param Input A container that holds the input strings. It must be a container-of-containers.
494 : \param Separator A string that will separate the joined segments.
495 : \param Rx A regular expression
496 : \param Flags Regex options
497 : \return Concatenated string.
498 :
499 : \note This function provides the strong exception-safety guarantee
500 : */
501 : template<
502 : typename SequenceSequenceT,
503 : typename Range1T,
504 : typename CharT,
505 : typename RegexTraitsT >
506 : inline typename range_value<SequenceSequenceT>::type
507 : join_if(
508 : const SequenceSequenceT& Input,
509 : const Range1T& Separator,
510 : const basic_regex<CharT, RegexTraitsT>& Rx,
511 : match_flag_type Flags=match_default )
512 : {
513 : // Define working types
514 : typedef typename range_value<SequenceSequenceT>::type ResultT;
515 : typedef typename range_const_iterator<SequenceSequenceT>::type InputIteratorT;
516 :
517 : // Parse input
518 : InputIteratorT itBegin=::boost::begin(Input);
519 : InputIteratorT itEnd=::boost::end(Input);
520 :
521 : // Construct container to hold the result
522 : ResultT Result;
523 :
524 :
525 : // Roll to the first element that will be added
526 : while(
527 : itBegin!=itEnd &&
528 : !::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;
529 :
530 : // Add this element
531 : if(itBegin!=itEnd)
532 : {
533 : detail::insert(Result, ::boost::end(Result), *itBegin);
534 : ++itBegin;
535 : }
536 :
537 : for(;itBegin!=itEnd; ++itBegin)
538 : {
539 : if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags))
540 : {
541 : // Add separator
542 : detail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));
543 : // Add element
544 : detail::insert(Result, ::boost::end(Result), *itBegin);
545 : }
546 : }
547 :
548 : return Result;
549 : }
550 :
551 : #else // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
552 :
553 : //! Conditional join algorithm
554 : /*!
555 : This algorithm joins all strings in a 'list' into one long string.
556 : Segments are concatenated by given separator. Only segments that
557 : match the given regular expression will be added to the result
558 :
559 : This is a specialization of join_if algorithm.
560 :
561 : \param Input A container that holds the input strings. It must be a container-of-containers.
562 : \param Separator A string that will separate the joined segments.
563 : \param Rx A regular expression
564 : \param Flags Regex options
565 : \return Concatenated string.
566 :
567 : \note This function provides the strong exception-safety guarantee
568 : */
569 : template<
570 : typename SequenceSequenceT,
571 : typename Range1T,
572 : typename CharT,
573 : typename RegexTraitsT >
574 : inline typename range_value<SequenceSequenceT>::type
575 : join_if_regex(
576 : const SequenceSequenceT& Input,
577 : const Range1T& Separator,
578 : const basic_regex<CharT, RegexTraitsT>& Rx,
579 : match_flag_type Flags=match_default )
580 : {
581 : // Define working types
582 : typedef typename range_value<SequenceSequenceT>::type ResultT;
583 : typedef typename range_const_iterator<SequenceSequenceT>::type InputIteratorT;
584 :
585 : // Parse input
586 : InputIteratorT itBegin=::boost::begin(Input);
587 : InputIteratorT itEnd=::boost::end(Input);
588 :
589 : // Construct container to hold the result
590 : ResultT Result;
591 :
592 :
593 : // Roll to the first element that will be added
594 : while(
595 : itBegin!=itEnd &&
596 : !::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags)) ++itBegin;
597 :
598 : // Add this element
599 : if(itBegin!=itEnd)
600 : {
601 : detail::insert(Result, ::boost::end(Result), *itBegin);
602 : ++itBegin;
603 : }
604 :
605 : for(;itBegin!=itEnd; ++itBegin)
606 : {
607 : if(::boost::regex_match(::boost::begin(*itBegin), ::boost::end(*itBegin), Rx, Flags))
608 : {
609 : // Add separator
610 : detail::insert(Result, ::boost::end(Result), ::boost::as_literal(Separator));
611 : // Add element
612 : detail::insert(Result, ::boost::end(Result), *itBegin);
613 : }
614 : }
615 :
616 : return Result;
617 : }
618 :
619 :
620 : #endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
621 :
622 : } // namespace algorithm
623 :
624 : // pull names into the boost namespace
625 : using algorithm::find_regex;
626 : using algorithm::replace_regex;
627 : using algorithm::replace_regex_copy;
628 : using algorithm::replace_all_regex;
629 : using algorithm::replace_all_regex_copy;
630 : using algorithm::erase_regex;
631 : using algorithm::erase_regex_copy;
632 : using algorithm::erase_all_regex;
633 : using algorithm::erase_all_regex_copy;
634 : using algorithm::find_all_regex;
635 : using algorithm::split_regex;
636 :
637 : #ifndef BOOST_NO_FUNCTION_TEMPLATE_ORDERING
638 : using algorithm::join_if;
639 : #else // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
640 : using algorithm::join_if_regex;
641 : #endif // BOOST_NO_FUNCTION_TEMPLATE_ORDERING
642 :
643 : } // namespace boost
644 :
645 :
646 : #endif // BOOST_STRING_REGEX_HPP
|