wibble 1.1
regexp.h
Go to the documentation of this file.
1#ifndef WIBBLE_REGEXP_H
2#define WIBBLE_REGEXP_H
3
4/*
5 * OO wrapper for regular expression functions
6 *
7 * Copyright (C) 2003--2006 Enrico Zini <enrico@debian.org>
8 *
9 * This library is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public
11 * License as published by the Free Software Foundation; either
12 * version 2.1 of the License, or (at your option) any later version.
13 *
14 * This library is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
18 *
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with this library; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24#include <wibble/exception.h>
25#include <sys/types.h>
26#include <regex.h>
27
28namespace wibble {
29namespace exception {
30
32
34{
35protected:
36 int m_code;
37 std::string m_message;
38
39public:
40 Regexp(const regex_t& re, int code, const std::string& context)
41 throw ();
42 ~Regexp() throw () {}
43
45 virtual int code() const throw () { return m_code; }
46
47 virtual const char* type() const throw () { return "Regexp"; }
48 virtual std::string desc() const throw () { return m_message; }
49};
50
51}
52
53class Regexp
54{
55protected:
58 int nmatch;
59 std::string lastMatch;
60
61public:
62 /* Note that match_count is required to be >1 to enable
63 sub-regexp capture. The maximum *INCLUDES* the whole-regexp
64 match (indexed 0). [TODO we may want to fix this to be more
65 friendly?] */
66 Regexp(const std::string& expr, int match_count = 0, int flags = 0);
67 ~Regexp() throw ();
68
69 bool match(const std::string& str, int flags = 0);
70
71 /* Indexing is from 1 for capture matches, like perl's $0,
72 $1... 0 is whole-regexp match, not a capture. TODO
73 the range is miscalculated (an off-by-one, wrt. the
74 counterintuitive match counting). */
75 std::string operator[](int idx);
76
77 size_t matchStart(int idx);
78 size_t matchEnd(int idx);
79 size_t matchLength(int idx);
80};
81
83{
84public:
85 ERegexp(const std::string& expr, int match_count = 0, int flags = 0)
87};
88
90{
91 const std::string& str;
93
94public:
96 {
97 Tokenizer& tok;
98 size_t beg, end;
99 public:
100 typedef std::string value_type;
104 typedef std::forward_iterator_tag iterator_category;
105
106 const_iterator(Tokenizer& tok) : tok(tok), beg(0), end(0) { operator++(); }
107 const_iterator(Tokenizer& tok, bool) : tok(tok), beg(tok.str.size()), end(tok.str.size()) {}
108
109 const_iterator& operator++();
110
111 std::string operator*() const
112 {
113 return tok.str.substr(beg, end-beg);
114 }
115 bool operator==(const const_iterator& ti) const
116 {
117 return beg == ti.beg && end == ti.end;
118 }
119 bool operator!=(const const_iterator& ti) const
120 {
121 return beg != ti.beg || end != ti.end;
122 }
123 };
124
125 Tokenizer(const std::string& str, const std::string& re, int flags)
126 : str(str), re(re, 1, flags) {}
127
129 const_iterator end() { return const_iterator(*this, false); }
130};
131
146{
148
149public:
154 // TODO: add iterator_traits
156 {
157 wibble::Regexp& re;
158 std::string cur;
159 std::string next;
160
161 public:
162 typedef std::string value_type;
166 typedef std::forward_iterator_tag iterator_category;
167
168 const_iterator(wibble::Regexp& re, const std::string& str) : re(re), next(str) { ++*this; }
170
171 const_iterator& operator++();
172
173 const std::string& operator*() const
174 {
175 return cur;
176 }
177 const std::string* operator->() const
178 {
179 return &cur;
180 }
181 bool operator==(const const_iterator& ti) const
182 {
183 return cur == ti.cur && next == ti.next;
184 }
185 bool operator!=(const const_iterator& ti) const
186 {
187 return cur != ti.cur || next != ti.next;
188 }
189 };
190
194 Splitter(const std::string& re, int flags)
195 : re(re, 1, flags) {}
196
200 const_iterator begin(const std::string& str) { return const_iterator(re, str); }
202};
203
204}
205
206// vim:set ts=4 sw=4:
207#endif
Definition regexp.h:83
ERegexp(const std::string &expr, int match_count=0, int flags=0)
Definition regexp.h:85
Definition regexp.h:54
size_t matchLength(int idx)
Definition regexp.cpp:118
std::string lastMatch
Definition regexp.h:59
regmatch_t * pmatch
Definition regexp.h:57
size_t matchStart(int idx)
Definition regexp.cpp:104
regex_t re
Definition regexp.h:56
int nmatch
Definition regexp.h:58
size_t matchEnd(int idx)
Definition regexp.cpp:111
bool match(const std::string &str, int flags=0)
Definition regexp.cpp:73
~Regexp()
Definition regexp.cpp:65
Warning: the various iterators reuse the Regexps and therefore only one iteration of a Splitter can b...
Definition regexp.h:156
value_type * pointer
Definition regexp.h:164
std::string value_type
Definition regexp.h:162
bool operator!=(const const_iterator &ti) const
Definition regexp.h:185
const std::string & operator*() const
Definition regexp.h:173
const_iterator(wibble::Regexp &re, const std::string &str)
Definition regexp.h:168
const_iterator(wibble::Regexp &re)
Definition regexp.h:169
std::forward_iterator_tag iterator_category
Definition regexp.h:166
bool operator==(const const_iterator &ti) const
Definition regexp.h:181
value_type & reference
Definition regexp.h:165
ptrdiff_t difference_type
Definition regexp.h:163
const std::string * operator->() const
Definition regexp.h:177
Split a string using a regular expression to match the token separators.
Definition regexp.h:146
Splitter(const std::string &re, int flags)
Create a splitter that uses the given regular expression to find tokens.
Definition regexp.h:194
const_iterator end()
Definition regexp.h:201
const_iterator begin(const std::string &str)
Split the string and iterate the resulting tokens.
Definition regexp.h:200
Definition regexp.h:96
std::forward_iterator_tag iterator_category
Definition regexp.h:104
std::string value_type
Definition regexp.h:100
const_iterator(Tokenizer &tok)
Definition regexp.h:106
value_type & reference
Definition regexp.h:103
const_iterator(Tokenizer &tok, bool)
Definition regexp.h:107
value_type * pointer
Definition regexp.h:102
bool operator!=(const const_iterator &ti) const
Definition regexp.h:119
bool operator==(const const_iterator &ti) const
Definition regexp.h:115
std::string operator*() const
Definition regexp.h:111
ptrdiff_t difference_type
Definition regexp.h:101
Definition regexp.h:90
Tokenizer(const std::string &str, const std::string &re, int flags)
Definition regexp.h:125
const_iterator end()
Definition regexp.h:129
const_iterator begin()
Definition regexp.h:128
const std::vector< std::string > & context() const
Definition exception.h:166
Base class for all exceptions.
Definition exception.h:180
Definition regexp.h:34
int m_code
Definition regexp.h:36
std::string m_message
Definition regexp.h:37
virtual std::string desc() const
Get a string describing what happened that threw the exception.
Definition regexp.h:48
virtual int code() const
Get the regexp error code associated to the exception.
Definition regexp.h:45
virtual const char * type() const
Get a string tag identifying the exception type.
Definition regexp.h:47
~Regexp()
Definition regexp.h:42
Definition amorph.h:17
Definition amorph.h:30