libstdc++
regex.tcc
Go to the documentation of this file.
00001 // class template regex -*- C++ -*-
00002 
00003 // Copyright (C) 2013-2017 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /**
00026  *  @file bits/regex.tcc
00027  *  This is an internal header file, included by other library headers.
00028  *  Do not attempt to use it directly. @headername{regex}
00029  */
00030 
00031 namespace std _GLIBCXX_VISIBILITY(default)
00032 {
00033 namespace __detail
00034 {
00035 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00036 
00037   // Result of merging regex_match and regex_search.
00038   //
00039   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
00040   // the other one if possible, for test purpose).
00041   //
00042   // That __match_mode is true means regex_match, else regex_search.
00043   template<typename _BiIter, typename _Alloc,
00044            typename _CharT, typename _TraitsT,
00045            _RegexExecutorPolicy __policy,
00046            bool __match_mode>
00047     bool
00048     __regex_algo_impl(_BiIter                              __s,
00049                       _BiIter                              __e,
00050                       match_results<_BiIter, _Alloc>&      __m,
00051                       const basic_regex<_CharT, _TraitsT>& __re,
00052                       regex_constants::match_flag_type     __flags)
00053     {
00054       if (__re._M_automaton == nullptr)
00055         return false;
00056 
00057       typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
00058       __m._M_begin = __s;
00059       __m._M_resize(__re._M_automaton->_M_sub_count());
00060       for (auto& __it : __res)
00061         __it.matched = false;
00062 
00063       bool __ret;
00064       if ((__re.flags() & regex_constants::__polynomial)
00065           || (__policy == _RegexExecutorPolicy::_S_alternate
00066               && !__re._M_automaton->_M_has_backref))
00067         {
00068           _Executor<_BiIter, _Alloc, _TraitsT, false>
00069             __executor(__s, __e, __m, __re, __flags);
00070           if (__match_mode)
00071             __ret = __executor._M_match();
00072           else
00073             __ret = __executor._M_search();
00074         }
00075       else
00076         {
00077           _Executor<_BiIter, _Alloc, _TraitsT, true>
00078             __executor(__s, __e, __m, __re, __flags);
00079           if (__match_mode)
00080             __ret = __executor._M_match();
00081           else
00082             __ret = __executor._M_search();
00083         }
00084       if (__ret)
00085         {
00086           for (auto& __it : __res)
00087             if (!__it.matched)
00088               __it.first = __it.second = __e;
00089           auto& __pre = __m._M_prefix();
00090           auto& __suf = __m._M_suffix();
00091           if (__match_mode)
00092             {
00093               __pre.matched = false;
00094               __pre.first = __s;
00095               __pre.second = __s;
00096               __suf.matched = false;
00097               __suf.first = __e;
00098               __suf.second = __e;
00099             }
00100           else
00101             {
00102               __pre.first = __s;
00103               __pre.second = __res[0].first;
00104               __pre.matched = (__pre.first != __pre.second);
00105               __suf.first = __res[0].second;
00106               __suf.second = __e;
00107               __suf.matched = (__suf.first != __suf.second);
00108             }
00109         }
00110       else
00111         {
00112           __m._M_resize(0);
00113           for (auto& __it : __res)
00114             {
00115               __it.matched = false;
00116               __it.first = __it.second = __e;
00117             }
00118         }
00119       return __ret;
00120     }
00121 
00122 _GLIBCXX_END_NAMESPACE_VERSION
00123 }
00124 
00125 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00126 
00127   template<typename _Ch_type>
00128   template<typename _Fwd_iter>
00129     typename regex_traits<_Ch_type>::string_type
00130     regex_traits<_Ch_type>::
00131     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
00132     {
00133       typedef std::ctype<char_type> __ctype_type;
00134       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
00135 
00136       static const char* __collatenames[] =
00137         {
00138           "NUL",
00139           "SOH",
00140           "STX",
00141           "ETX",
00142           "EOT",
00143           "ENQ",
00144           "ACK",
00145           "alert",
00146           "backspace",
00147           "tab",
00148           "newline",
00149           "vertical-tab",
00150           "form-feed",
00151           "carriage-return",
00152           "SO",
00153           "SI",
00154           "DLE",
00155           "DC1",
00156           "DC2",
00157           "DC3",
00158           "DC4",
00159           "NAK",
00160           "SYN",
00161           "ETB",
00162           "CAN",
00163           "EM",
00164           "SUB",
00165           "ESC",
00166           "IS4",
00167           "IS3",
00168           "IS2",
00169           "IS1",
00170           "space",
00171           "exclamation-mark",
00172           "quotation-mark",
00173           "number-sign",
00174           "dollar-sign",
00175           "percent-sign",
00176           "ampersand",
00177           "apostrophe",
00178           "left-parenthesis",
00179           "right-parenthesis",
00180           "asterisk",
00181           "plus-sign",
00182           "comma",
00183           "hyphen",
00184           "period",
00185           "slash",
00186           "zero",
00187           "one",
00188           "two",
00189           "three",
00190           "four",
00191           "five",
00192           "six",
00193           "seven",
00194           "eight",
00195           "nine",
00196           "colon",
00197           "semicolon",
00198           "less-than-sign",
00199           "equals-sign",
00200           "greater-than-sign",
00201           "question-mark",
00202           "commercial-at",
00203           "A",
00204           "B",
00205           "C",
00206           "D",
00207           "E",
00208           "F",
00209           "G",
00210           "H",
00211           "I",
00212           "J",
00213           "K",
00214           "L",
00215           "M",
00216           "N",
00217           "O",
00218           "P",
00219           "Q",
00220           "R",
00221           "S",
00222           "T",
00223           "U",
00224           "V",
00225           "W",
00226           "X",
00227           "Y",
00228           "Z",
00229           "left-square-bracket",
00230           "backslash",
00231           "right-square-bracket",
00232           "circumflex",
00233           "underscore",
00234           "grave-accent",
00235           "a",
00236           "b",
00237           "c",
00238           "d",
00239           "e",
00240           "f",
00241           "g",
00242           "h",
00243           "i",
00244           "j",
00245           "k",
00246           "l",
00247           "m",
00248           "n",
00249           "o",
00250           "p",
00251           "q",
00252           "r",
00253           "s",
00254           "t",
00255           "u",
00256           "v",
00257           "w",
00258           "x",
00259           "y",
00260           "z",
00261           "left-curly-bracket",
00262           "vertical-line",
00263           "right-curly-bracket",
00264           "tilde",
00265           "DEL",
00266         };
00267 
00268       string __s;
00269       for (; __first != __last; ++__first)
00270         __s += __fctyp.narrow(*__first, 0);
00271 
00272       for (const auto& __it : __collatenames)
00273         if (__s == __it)
00274           return string_type(1, __fctyp.widen(
00275             static_cast<char>(&__it - __collatenames)));
00276 
00277       // TODO Add digraph support:
00278       // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
00279 
00280       return string_type();
00281     }
00282 
00283   template<typename _Ch_type>
00284   template<typename _Fwd_iter>
00285     typename regex_traits<_Ch_type>::char_class_type
00286     regex_traits<_Ch_type>::
00287     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
00288     {
00289       typedef std::ctype<char_type> __ctype_type;
00290       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
00291 
00292       // Mappings from class name to class mask.
00293       static const pair<const char*, char_class_type> __classnames[] =
00294       {
00295         {"d", ctype_base::digit},
00296         {"w", {ctype_base::alnum, _RegexMask::_S_under}},
00297         {"s", ctype_base::space},
00298         {"alnum", ctype_base::alnum},
00299         {"alpha", ctype_base::alpha},
00300         {"blank", ctype_base::blank},
00301         {"cntrl", ctype_base::cntrl},
00302         {"digit", ctype_base::digit},
00303         {"graph", ctype_base::graph},
00304         {"lower", ctype_base::lower},
00305         {"print", ctype_base::print},
00306         {"punct", ctype_base::punct},
00307         {"space", ctype_base::space},
00308         {"upper", ctype_base::upper},
00309         {"xdigit", ctype_base::xdigit},
00310       };
00311 
00312       string __s;
00313       for (; __first != __last; ++__first)
00314         __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
00315 
00316       for (const auto& __it : __classnames)
00317         if (__s == __it.first)
00318           {
00319             if (__icase
00320                 && ((__it.second
00321                      & (ctype_base::lower | ctype_base::upper)) != 0))
00322               return ctype_base::alpha;
00323             return __it.second;
00324           }
00325       return 0;
00326     }
00327 
00328   template<typename _Ch_type>
00329     bool
00330     regex_traits<_Ch_type>::
00331     isctype(_Ch_type __c, char_class_type __f) const
00332     {
00333       typedef std::ctype<char_type> __ctype_type;
00334       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
00335 
00336       return __fctyp.is(__f._M_base, __c)
00337         // [[:w:]]
00338         || ((__f._M_extended & _RegexMask::_S_under)
00339             && __c == __fctyp.widen('_'));
00340     }
00341 
00342   template<typename _Ch_type>
00343     int
00344     regex_traits<_Ch_type>::
00345     value(_Ch_type __ch, int __radix) const
00346     {
00347       std::basic_istringstream<char_type> __is(string_type(1, __ch));
00348       long __v;
00349       if (__radix == 8)
00350         __is >> std::oct;
00351       else if (__radix == 16)
00352         __is >> std::hex;
00353       __is >> __v;
00354       return __is.fail() ? -1 : __v;
00355     }
00356 
00357   template<typename _Bi_iter, typename _Alloc>
00358   template<typename _Out_iter>
00359     _Out_iter match_results<_Bi_iter, _Alloc>::
00360     format(_Out_iter __out,
00361            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
00362            const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
00363            match_flag_type __flags) const
00364     {
00365       __glibcxx_assert( ready() );
00366       regex_traits<char_type> __traits;
00367       typedef std::ctype<char_type> __ctype_type;
00368       const __ctype_type&
00369         __fctyp(use_facet<__ctype_type>(__traits.getloc()));
00370 
00371       auto __output = [&](size_t __idx)
00372         {
00373           auto& __sub = (*this)[__idx];
00374           if (__sub.matched)
00375             __out = std::copy(__sub.first, __sub.second, __out);
00376         };
00377 
00378       if (__flags & regex_constants::format_sed)
00379         {
00380           for (; __fmt_first != __fmt_last;)
00381             if (*__fmt_first == '&')
00382               {
00383                 __output(0);
00384                 ++__fmt_first;
00385               }
00386             else if (*__fmt_first == '\\')
00387               {
00388                 if (++__fmt_first != __fmt_last
00389                     && __fctyp.is(__ctype_type::digit, *__fmt_first))
00390                   __output(__traits.value(*__fmt_first++, 10));
00391                 else
00392                   *__out++ = '\\';
00393               }
00394             else
00395               *__out++ = *__fmt_first++;
00396         }
00397       else
00398         {
00399           while (1)
00400             {
00401               auto __next = std::find(__fmt_first, __fmt_last, '$');
00402               if (__next == __fmt_last)
00403                 break;
00404 
00405               __out = std::copy(__fmt_first, __next, __out);
00406 
00407               auto __eat = [&](char __ch) -> bool
00408                 {
00409                   if (*__next == __ch)
00410                     {
00411                       ++__next;
00412                       return true;
00413                     }
00414                   return false;
00415                 };
00416 
00417               if (++__next == __fmt_last)
00418                 *__out++ = '$';
00419               else if (__eat('$'))
00420                 *__out++ = '$';
00421               else if (__eat('&'))
00422                 __output(0);
00423               else if (__eat('`'))
00424                 {
00425                   auto& __sub = _M_prefix();
00426                   if (__sub.matched)
00427                     __out = std::copy(__sub.first, __sub.second, __out);
00428                 }
00429               else if (__eat('\''))
00430                 {
00431                   auto& __sub = _M_suffix();
00432                   if (__sub.matched)
00433                     __out = std::copy(__sub.first, __sub.second, __out);
00434                 }
00435               else if (__fctyp.is(__ctype_type::digit, *__next))
00436                 {
00437                   long __num = __traits.value(*__next, 10);
00438                   if (++__next != __fmt_last
00439                       && __fctyp.is(__ctype_type::digit, *__next))
00440                     {
00441                       __num *= 10;
00442                       __num += __traits.value(*__next++, 10);
00443                     }
00444                   if (0 <= __num && __num < this->size())
00445                     __output(__num);
00446                 }
00447               else
00448                 *__out++ = '$';
00449               __fmt_first = __next;
00450             }
00451           __out = std::copy(__fmt_first, __fmt_last, __out);
00452         }
00453       return __out;
00454     }
00455 
00456   template<typename _Out_iter, typename _Bi_iter,
00457            typename _Rx_traits, typename _Ch_type>
00458     _Out_iter
00459     regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
00460                   const basic_regex<_Ch_type, _Rx_traits>& __e,
00461                   const _Ch_type* __fmt,
00462                   regex_constants::match_flag_type __flags)
00463     {
00464       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
00465       _IterT __i(__first, __last, __e, __flags);
00466       _IterT __end;
00467       if (__i == __end)
00468         {
00469           if (!(__flags & regex_constants::format_no_copy))
00470             __out = std::copy(__first, __last, __out);
00471         }
00472       else
00473         {
00474           sub_match<_Bi_iter> __last;
00475           auto __len = char_traits<_Ch_type>::length(__fmt);
00476           for (; __i != __end; ++__i)
00477             {
00478               if (!(__flags & regex_constants::format_no_copy))
00479                 __out = std::copy(__i->prefix().first, __i->prefix().second,
00480                                   __out);
00481               __out = __i->format(__out, __fmt, __fmt + __len, __flags);
00482               __last = __i->suffix();
00483               if (__flags & regex_constants::format_first_only)
00484                 break;
00485             }
00486           if (!(__flags & regex_constants::format_no_copy))
00487             __out = std::copy(__last.first, __last.second, __out);
00488         }
00489       return __out;
00490     }
00491 
00492   template<typename _Bi_iter,
00493            typename _Ch_type,
00494            typename _Rx_traits>
00495     bool
00496     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00497     operator==(const regex_iterator& __rhs) const
00498     {
00499       if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
00500         return true;
00501       return _M_pregex == __rhs._M_pregex
00502           && _M_begin == __rhs._M_begin
00503           && _M_end == __rhs._M_end
00504           && _M_flags == __rhs._M_flags
00505           && _M_match[0] == __rhs._M_match[0];
00506     }
00507 
00508   template<typename _Bi_iter,
00509            typename _Ch_type,
00510            typename _Rx_traits>
00511     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
00512     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00513     operator++()
00514     {
00515       // In all cases in which the call to regex_search returns true,
00516       // match.prefix().first shall be equal to the previous value of
00517       // match[0].second, and for each index i in the half-open range
00518       // [0, match.size()) for which match[i].matched is true,
00519       // match[i].position() shall return distance(begin, match[i].first).
00520       // [28.12.1.4.5]
00521       if (_M_match[0].matched)
00522         {
00523           auto __start = _M_match[0].second;
00524           auto __prefix_first = _M_match[0].second;
00525           if (_M_match[0].first == _M_match[0].second)
00526             {
00527               if (__start == _M_end)
00528                 {
00529                   _M_pregex = nullptr;
00530                   return *this;
00531                 }
00532               else
00533                 {
00534                   if (regex_search(__start, _M_end, _M_match, *_M_pregex,
00535                                    _M_flags
00536                                    | regex_constants::match_not_null
00537                                    | regex_constants::match_continuous))
00538                     {
00539                       __glibcxx_assert(_M_match[0].matched);
00540                       auto& __prefix = _M_match._M_prefix();
00541                       __prefix.first = __prefix_first;
00542                       __prefix.matched = __prefix.first != __prefix.second;
00543                       // [28.12.1.4.5]
00544                       _M_match._M_begin = _M_begin;
00545                       return *this;
00546                     }
00547                   else
00548                     ++__start;
00549                 }
00550             }
00551           _M_flags |= regex_constants::match_prev_avail;
00552           if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
00553             {
00554               __glibcxx_assert(_M_match[0].matched);
00555               auto& __prefix = _M_match._M_prefix();
00556               __prefix.first = __prefix_first;
00557               __prefix.matched = __prefix.first != __prefix.second;
00558               // [28.12.1.4.5]
00559               _M_match._M_begin = _M_begin;
00560             }
00561           else
00562             _M_pregex = nullptr;
00563         }
00564       return *this;
00565     }
00566 
00567   template<typename _Bi_iter,
00568            typename _Ch_type,
00569            typename _Rx_traits>
00570     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
00571     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00572     operator=(const regex_token_iterator& __rhs)
00573     {
00574       _M_position = __rhs._M_position;
00575       _M_subs = __rhs._M_subs;
00576       _M_n = __rhs._M_n;
00577       _M_suffix = __rhs._M_suffix;
00578       _M_has_m1 = __rhs._M_has_m1;
00579       _M_normalize_result();
00580       return *this;
00581     }
00582 
00583   template<typename _Bi_iter,
00584            typename _Ch_type,
00585            typename _Rx_traits>
00586     bool
00587     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00588     operator==(const regex_token_iterator& __rhs) const
00589     {
00590       if (_M_end_of_seq() && __rhs._M_end_of_seq())
00591         return true;
00592       if (_M_suffix.matched && __rhs._M_suffix.matched
00593           && _M_suffix == __rhs._M_suffix)
00594         return true;
00595       if (_M_end_of_seq() || _M_suffix.matched
00596           || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
00597         return false;
00598       return _M_position == __rhs._M_position
00599         && _M_n == __rhs._M_n
00600         && _M_subs == __rhs._M_subs;
00601     }
00602 
00603   template<typename _Bi_iter,
00604            typename _Ch_type,
00605            typename _Rx_traits>
00606     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
00607     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00608     operator++()
00609     {
00610       _Position __prev = _M_position;
00611       if (_M_suffix.matched)
00612         *this = regex_token_iterator();
00613       else if (_M_n + 1 < _M_subs.size())
00614         {
00615           _M_n++;
00616           _M_result = &_M_current_match();
00617         }
00618       else
00619         {
00620           _M_n = 0;
00621           ++_M_position;
00622           if (_M_position != _Position())
00623             _M_result = &_M_current_match();
00624           else if (_M_has_m1 && __prev->suffix().length() != 0)
00625             {
00626               _M_suffix.matched = true;
00627               _M_suffix.first = __prev->suffix().first;
00628               _M_suffix.second = __prev->suffix().second;
00629               _M_result = &_M_suffix;
00630             }
00631           else
00632             *this = regex_token_iterator();
00633         }
00634       return *this;
00635     }
00636 
00637   template<typename _Bi_iter,
00638            typename _Ch_type,
00639            typename _Rx_traits>
00640     void
00641     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
00642     _M_init(_Bi_iter __a, _Bi_iter __b)
00643     {
00644       _M_has_m1 = false;
00645       for (auto __it : _M_subs)
00646         if (__it == -1)
00647           {
00648             _M_has_m1 = true;
00649             break;
00650           }
00651       if (_M_position != _Position())
00652         _M_result = &_M_current_match();
00653       else if (_M_has_m1)
00654         {
00655           _M_suffix.matched = true;
00656           _M_suffix.first = __a;
00657           _M_suffix.second = __b;
00658           _M_result = &_M_suffix;
00659         }
00660       else
00661         _M_result = nullptr;
00662     }
00663 
00664 _GLIBCXX_END_NAMESPACE_VERSION
00665 } // namespace
00666