libstdc++
regex_grep_matcher.h
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2010-2013 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_grep_matcher.h
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 
35  template<typename _BiIter>
36  class sub_match;
37 
38  template<typename _Bi_iter, typename _Allocator>
39  class match_results;
40 
41 _GLIBCXX_END_NAMESPACE_VERSION
42 
43 namespace __detail
44 {
45 _GLIBCXX_BEGIN_NAMESPACE_VERSION
46 
47  /**
48  * @defgroup regex-detail Base and Implementation Classes
49  * @ingroup regex
50  * @{
51  */
52 
53  /// A _Results facade specialized for wrapping a templated match_results.
54  template<typename _FwdIterT, typename _Alloc>
56  : public _Results
57  {
58  public:
59  _SpecializedResults(const _Automaton::_SizeT __size,
60  const _SpecializedCursor<_FwdIterT>& __cursor,
62 
64  {
65  if (_M_managed)
66  delete &_M_results;
67  }
68 
69  private:
71  : _M_results(*new match_results<_FwdIterT, _Alloc>(__rhs._M_results)),
72  _M_managed(true)
73  { }
74 
75  public:
76  void
77  _M_set_pos(int __i, int __j, const _PatternCursor& __pc);
78 
79  void
80  _M_set_range(int __i, const _PatternCursor& __pc)
81  {
82  typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
83  _CursorT __c = static_cast<_CursorT>(__pc);
84  _M_results.at(__i).first = __c._M_begin();
85  _M_results.at(__i).second = __c._M_end();
86  }
87 
88  void
89  _M_set_matched(int __i, bool __is_matched)
90  { _M_results.at(__i).matched = __is_matched; }
91 
93  _M_clone() const
94  { return unique_ptr<_Results>(new _SpecializedResults(*this)); }
95 
96  void
97  _M_assign(const _Results& __rhs)
98  {
99  auto __r = static_cast<const _SpecializedResults*>(&__rhs);
100  _M_results = __r->_M_results;
101  }
102 
103  private:
105  bool _M_managed;
106  };
107 
108  template<typename _FwdIterT, typename _Alloc>
110  _SpecializedResults(const _Automaton::_SizeT __size,
111  const _SpecializedCursor<_FwdIterT>& __cursor,
113  : _M_results(__m), _M_managed(false)
114  {
115  _M_results.clear();
116  _M_results.reserve(__size + 2);
117  _M_results.resize(__size);
119  __sm.first = __sm.second = __cursor._M_begin();
120  _M_results.push_back(__sm);
121  __sm.first = __sm.second = __cursor._M_end();
122  _M_results.push_back(__sm);
123  }
124 
125  template<typename _FwdIterT, typename _Alloc>
126  void
127  _SpecializedResults<_FwdIterT, _Alloc>::
128  _M_set_pos(int __i, int __j, const _PatternCursor& __pc)
129  {
130  typedef const _SpecializedCursor<_FwdIterT>& _CursorT;
131  _CursorT __c = static_cast<_CursorT>(__pc);
132  if (__j == 0)
133  _M_results.at(__i).first = __c._M_pos();
134  else
135  _M_results.at(__i).second = __c._M_pos();
136  }
137 
138  /// Executes a regular expression NFA/DFA over a range using a
139  /// variant of the parallel execution algorithm featured in the grep
140  /// utility, modified to use Laurikari tags.
142  {
143  public:
145  _Results& __r,
146  const _AutomatonPtr& __automaton,
148  : _M_nfa(static_pointer_cast<_Nfa>(__automaton)),
149  _M_str_cur(__p), _M_results(__r)
150  { }
151 
152  virtual
153  ~_Grep_matcher()
154  { }
155 
156  // Set matched when string exactly match the pattern.
157  virtual bool
158  _M_match() = 0;
159 
160  // Set matched when some prefix of the string matches the pattern.
161  virtual bool
162  _M_search_from_first() = 0;
163 
164  protected:
165  const std::shared_ptr<_Nfa> _M_nfa;
166  _PatternCursor& _M_str_cur;
167  _Results& _M_results;
168  };
169 
170  // Time complexity: exponential
171  // Space complexity: O(_M_str_cur.size())
172  // _M_dfs() take a state, along with current string cursor(_M_str_cur),
173  // trying to match current state with current character.
174  // Only _S_opcode_match will consume a character.
175  class _DFSMatcher
176  : public _Grep_matcher
177  {
178  public:
179  _DFSMatcher(_PatternCursor& __p,
180  _Results& __r,
181  const _AutomatonPtr& __automaton,
183  : _Grep_matcher(__p, __r, __automaton, __flags)
184  { }
185 
186  bool
187  _M_match()
188  { return _M_dfs<true>(_M_nfa->_M_start()); }
189 
190  bool
191  _M_search_from_first()
192  { return _M_dfs<false>(_M_nfa->_M_start()); }
193 
194  private:
195  template<bool __match_mode>
196  bool
197  _M_dfs(_StateIdT __i);
198  };
199 
200  // It's essentially a variant of Single-Source-Shortest-Path problem, where,
201  // the matching results is the final distance and should be minimized.
202  // Instead of using Dijkstra Algorithm, I pick up the queue-optimizaed
203  // (BFS-like) Bellman-Ford algorithm,
204  // SPFA(http://en.wikipedia.org/wiki/Shortest_Path_Faster_Algorithm).
205  //
206  // Every entry of _M_current saves the solution(grouping status) for every
207  // matching head. When states transfer, solutions will be compared and
208  // deduplicated(based on which greedy mode we have).
209  //
210  // Time complexity: O(_M_str_cur.size() * _M_nfa.size())
211  // Space complexity: O(_M_nfa.size() * _M_nfa.mark_count())
212  class _BFSMatcher
213  : public _Grep_matcher
214  {
215  public:
216  _BFSMatcher(_PatternCursor& __p,
217  _Results& __r,
218  const _AutomatonPtr& __automaton,
220  : _Grep_matcher(__p, __r, __automaton, __flags)
221  {
222  if (_M_nfa->_M_start() != _S_invalid_state_id)
223  _M_current[_M_nfa->_M_start()] = _M_results._M_clone();
224  _M_e_closure();
225  }
226 
227  bool
228  _M_match()
229  { return _M_main_loop<true>(); }
230 
231  bool
232  _M_search_from_first()
233  { return _M_main_loop<false>(); }
234 
235  private:
236  template<bool __match_mode>
237  bool
238  _M_main_loop();
239 
240  void
241  _M_e_closure();
242 
243  void
244  _M_move();
245 
246  bool
247  _M_match_less_than(_StateIdT __u, _StateIdT __v) const;
248 
249  bool
250  _M_includes_some() const;
251 
253  };
254 
255  //@} regex-detail
256 _GLIBCXX_END_NAMESPACE_VERSION
257 } // namespace __detail
258 } // namespace std
259