Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef _SIMPLE_TOKENIZER_H_
00025 #define _SIMPLE_TOKENIZER_H_
00026
00027 #include <stdlib.h>
00028 #include <string.h>
00029 #include <ctype.h>
00030
00031
00032
00033
00034
00035
00036
00037
00038
00039
00040
00041
00042
00043
00044
00045
00046
00047
00048
00049
00050
00051
00052
00053
00054
00055
00056
00057
00058
00059
00060
00061
00062
00063
00064
00065
00066
00067
00068
00069
00070
00071
00072
00073
00074
00075
00076
00077
00078
00079
00080
00081
00082
00083
00084
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111
00112 class SimpleTokenizer
00113 {
00114 public:
00115
00116
00117
00118
00119 enum
00120 {
00121 CONSIDER_NULL_FIELDS = 1,
00122 KEEP_WHITESPACE_LEFT = 2,
00123 KEEP_WHITESPACE_RIGHT = 4,
00124 KEEP_WHITESPACE = KEEP_WHITESPACE_LEFT + KEEP_WHITESPACE_RIGHT,
00125 OVERWRITE_INPUT_STRING = 8
00126 };
00127
00128 SimpleTokenizer(char delimiter = ' ', unsigned mode = 0, char escape = '\\')
00129 : _data(0), _delimiter(delimiter), _mode(mode), _escape(escape), _start(0), _length(0)
00130 { }
00131
00132
00133 SimpleTokenizer(const char *s, char delimiter = ' ', unsigned mode = 0, char escape = '\\')
00134 : _data(0), _delimiter(delimiter), _mode(mode), _escape(escape)
00135 {
00136 setString(s);
00137 }
00138
00139 ~SimpleTokenizer() {
00140 _clearData();
00141 }
00142
00143 void setString(const char *s)
00144 {
00145 _clearData();
00146
00147 _start = 0;
00148 _length = strlen(s);
00149 _data = (_mode & OVERWRITE_INPUT_STRING ? const_cast<char *>(s) : ats_strdup(s));
00150
00151
00152
00153
00154
00155
00156 _data[_length++] = _delimiter;
00157 };
00158 char *getNext(int count = 1) {
00159 return _getNext(_delimiter, false, count);
00160 };
00161 char *getNext(char delimiter, int count = 1) {
00162 return _getNext(delimiter, false, count);
00163 }
00164 char *getRest()
00165 {
00166
00167
00168
00169 return _getNext(_delimiter, false, _length);
00170 }
00171 size_t getNumTokensRemaining()
00172 {
00173 return _getNumTokensRemaining(_delimiter);
00174 };
00175 size_t getNumTokensRemaining(char delimiter)
00176 {
00177 return _getNumTokensRemaining(delimiter);
00178 };
00179 char *peekAtRestOfString()
00180 {
00181 _data[_length - 1] = 0;
00182 return (_start < _length ? &_data[_start] : &_data[_length - 1]);
00183 }
00184
00185 private:
00186
00187 char *_data;
00188
00189 char _delimiter;
00190 unsigned _mode;
00191
00192 char _escape;
00193 size_t _start;
00194
00195 size_t _length;
00196
00197 void _clearData()
00198 {
00199 if (_data && !(_mode & OVERWRITE_INPUT_STRING)) {
00200 ats_free(_data);
00201 }
00202 }
00203
00204 char *_getNext(char delimiter, bool countOnly = false, int numTokens = 1) {
00205 char *next = NULL;
00206
00207 if (_start < _length) {
00208
00209
00210 bool hasEsc = false;
00211 while (_start < _length &&
00212 ((!(_mode & CONSIDER_NULL_FIELDS) &&
00213 (_data[_start] == delimiter &&
00214 !(_start &&
00215 (_data[_start - 1] == _escape ? (hasEsc = true) : 0)))) ||
00216 (!(_mode & KEEP_WHITESPACE_LEFT) && isspace(_data[_start])))) {
00217 ++_start;
00218 }
00219
00220 if (_start < _length)
00221 {
00222
00223
00224
00225 _data[_length - 1] = delimiter;
00226
00227 next = &_data[_start];
00228
00229
00230
00231 size_t end = _start;
00232 int delimCount = 0;
00233 while (end < _length &&
00234 (_data[end] != delimiter ||
00235 (end && (_data[end - 1] == _escape ? (hasEsc = true) : 0)) ||
00236 ((++delimCount < numTokens) && (end < _length - 1)))) {
00237 ++end;
00238 }
00239
00240 _start = end + 1;
00241
00242
00243
00244
00245
00246 if (!(_mode & CONSIDER_NULL_FIELDS)) {
00247 while (_data[--end] == delimiter);
00248 ++end;
00249 }
00250
00251 if (!(_mode & KEEP_WHITESPACE_RIGHT)) {
00252 while (isspace(_data[--end]));
00253 ++end;
00254 }
00255
00256 if (!countOnly) {
00257 _data[end] = 0;
00258
00259
00260
00261
00262 if (hasEsc && delimCount == 1) {
00263 int numEscape = 0, i = 0;
00264 while (next[i]) {
00265 if (next[i] == _escape) {
00266 ++numEscape;
00267 } else {
00268 next[i - numEscape] = next[i];
00269 }
00270 ++i;
00271 }
00272 _data[end - numEscape] = 0;
00273 }
00274 }
00275 }
00276 }
00277 return next;
00278 };
00279
00280 size_t _getNumTokensRemaining(char delimiter)
00281 {
00282 size_t startSave = _start;
00283 size_t count = 0;
00284 while (_getNext(delimiter, true)) {
00285 ++count;
00286 };
00287 _start = startSave;
00288 return count;
00289 };
00290 };
00291
00292 #endif