libstdc++
regex_grep_matcher.tcc
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2010-2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_grep_matcher.tcc
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 #include <regex>
32 
33 namespace std _GLIBCXX_VISIBILITY(default)
34 {
35 namespace __detail
36 {
37 _GLIBCXX_BEGIN_NAMESPACE_VERSION
38 
39  // TODO: This is too slow. Try to compile the NFA to a DFA.
40  template<bool __match_mode>
41  bool _DFSMatcher::
42  _M_dfs(_StateIdT __i)
43  {
44  if (__i == _S_invalid_state_id)
45  // This is not that certain. Need deeper investigate.
46  return false;
47  const auto& __state = (*_M_nfa)[__i];
48  bool __ret = false;
49  switch (__state._M_opcode)
50  {
51  case _S_opcode_alternative:
52  // Greedy mode by default. For non-greedy mode,
53  // swap _M_alt and _M_next.
54  // TODO: Add greedy mode option.
55  __ret = _M_dfs<__match_mode>(__state._M_alt)
56  || _M_dfs<__match_mode>(__state._M_next);
57  break;
58  case _S_opcode_subexpr_begin:
59  __state._M_tagger(_M_str_cur, _M_results);
60  __ret = _M_dfs<__match_mode>(__state._M_next);
61  break;
62  case _S_opcode_subexpr_end:
63  __state._M_tagger(_M_str_cur, _M_results);
64  __ret = _M_dfs<__match_mode>(__state._M_next);
65  _M_results._M_set_matched(__state._M_subexpr, __ret);
66  break;
67  case _S_opcode_match:
68  if (!_M_str_cur._M_at_end() && __state._M_matches(_M_str_cur))
69  {
70  _M_str_cur._M_next();
71  __ret = _M_dfs<__match_mode>(__state._M_next);
72  _M_str_cur._M_prev();
73  }
74  break;
75  case _S_opcode_accept:
76  if (__match_mode)
77  __ret = _M_str_cur._M_at_end();
78  else
79  __ret = true;
80  break;
81  default:
82  _GLIBCXX_DEBUG_ASSERT(false);
83  }
84  return __ret;
85  }
86 
87  template<bool __match_mode>
88  bool _BFSMatcher::
89  _M_main_loop()
90  {
91  while (!_M_str_cur._M_at_end())
92  {
93  if (!__match_mode)
94  if (_M_includes_some())
95  return true;
96  _M_move();
97  _M_str_cur._M_next();
98  _M_e_closure();
99  }
100  return _M_includes_some();
101  }
102 
103  // The SPFA approach.
104  // FIXME: move it to src/c++11 when it's stable, and make it not inlined.
105  inline
106  void _BFSMatcher::
107  _M_e_closure()
108  {
110  std::vector<bool> __in_q(_M_nfa->size(), false);
111  for (auto& __it : _M_current)
112  {
113  __in_q[__it.first] = true;
114  __q.push(__it.first);
115  }
116  while (!__q.empty())
117  {
118  auto __u = __q.front();
119  __q.pop();
120  __in_q[__u] = false;
121  const auto& __state = (*_M_nfa)[__u];
122 
123  // Can be implemented using method, but there're too much arguments.
124  auto __add_visited_state = [&](_StateIdT __v)
125  {
126  if (__v == _S_invalid_state_id)
127  return;
128  if (_M_match_less_than(__u, __v))
129  {
130  _M_current[__v] = _M_current[__u]->_M_clone();
131  // if a state is updated, it's outgoing neighbors should be
132  // reconsidered too. Push them to the queue.
133  if (!__in_q[__v])
134  {
135  __in_q[__v] = true;
136  __q.push(__v);
137  }
138  }
139  };
140 
141  switch (__state._M_opcode)
142  {
143  case _S_opcode_alternative:
144  __add_visited_state(__state._M_next);
145  __add_visited_state(__state._M_alt);
146  break;
147  case _S_opcode_subexpr_begin:
148  __state._M_tagger(_M_str_cur, *_M_current[__u]);
149  __add_visited_state(__state._M_next);
150  break;
151  case _S_opcode_subexpr_end:
152  __state._M_tagger(_M_str_cur, *_M_current[__u]);
153  _M_current[__u]->_M_set_matched(__state._M_subexpr, true);
154  __add_visited_state(__state._M_next);
155  break;
156  case _S_opcode_match:
157  break;
158  case _S_opcode_accept:
159  __add_visited_state(__state._M_next);
160  break;
161  default:
162  _GLIBCXX_DEBUG_ASSERT(false);
163  }
164  }
165  }
166 
167  // FIXME: move it to src/c++11 when it's stable, and make it not inlined.
168  inline
169  void _BFSMatcher::
170  _M_move()
171  {
172  decltype(_M_current) __next;
173  for (auto& __it : _M_current)
174  {
175  const auto& __state = (*_M_nfa)[__it.first];
176  if (__state._M_opcode == _S_opcode_match
177  && __state._M_matches(_M_str_cur))
178  if (_M_match_less_than(__it.first, __state._M_next)
179  && __state._M_next != _S_invalid_state_id)
180  __next[__state._M_next] = __it.second->_M_clone();
181  }
182  _M_current = move(__next);
183  }
184 
185  // FIXME: move it to src/c++11 when it's stable, and make it not inlined.
186  inline
187  bool _BFSMatcher::
188  _M_match_less_than(_StateIdT __u, _StateIdT __v) const
189  {
190  if (_M_current.count(__u) == 0)
191  return false;
192  if (_M_current.count(__v) > 0)
193  return true;
194  // TODO: Greedy and Non-greedy support
195  return true;
196  }
197 
198  // FIXME: move it to src/c++11 when it's stable, and make it not inlined.
199  inline
200  bool _BFSMatcher::
201  _M_includes_some() const
202  {
203  auto& __s = _M_nfa->_M_final_states();
204  auto& __t = _M_current;
205  if (__s.size() > 0 && __t.size() > 0)
206  {
207  auto __first = __s.begin();
208  auto __second = __t.begin();
209  while (__first != __s.end() && __second != __t.end())
210  {
211  if (*__first < __second->first)
212  ++__first;
213  else if (__second->first < *__first)
214  ++__second;
215  else
216  {
217  _M_results._M_assign(*__second->second);
218  return true;
219  }
220  }
221  }
222  return false;
223  }
224 
225  // FIXME: move it to src/c++11 when it's stable, and make it not inlined.
226  inline
228  _M_get_matcher(_PatternCursor& __p,
229  _Results& __r,
230  const _AutomatonPtr& __a,
232  {
233  if (_M_has_back_ref)
234  return unique_ptr<_Grep_matcher>(
235  new _DFSMatcher(__p, __r, __a, __flags));
236  else
237  return unique_ptr<_Grep_matcher>(
238  new _BFSMatcher(__p, __r, __a, __flags));
239  }
240 
241 _GLIBCXX_END_NAMESPACE_VERSION
242 } // namespace __detail
243 } // namespace