Go to the documentation of this file.00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "ink_platform.h"
00026 #include "Tokenizer.h"
00027 #include "ink_assert.h"
00028 #include "ink_memory.h"
00029
00030
00031
00032
00033
00034
00035
00036
00037
00038 Tokenizer::Tokenizer(const char *StrOfDelimiters)
00039 {
00040 int length;
00041
00042 if (StrOfDelimiters == NULL) {
00043 strOfDelimit = NULL;
00044 } else {
00045 length = (int) (strlen(StrOfDelimiters) + 1);
00046 strOfDelimit = new char[length];
00047 memcpy(strOfDelimit, StrOfDelimiters, length);
00048 }
00049
00050 memset(&start_node, 0, sizeof(tok_node));
00051
00052 numValidTokens = 0;
00053 maxTokens = -1;
00054 options = 0;
00055
00056 add_node = &start_node;
00057 add_index = 0;
00058 }
00059
00060 Tokenizer::~Tokenizer()
00061 {
00062 bool root = true;
00063 tok_node *cur = &start_node;;
00064 tok_node *next = NULL;
00065
00066 if (strOfDelimit != NULL) {
00067 delete[]strOfDelimit;
00068 }
00069
00070 while (cur != NULL) {
00071
00072 if (options & COPY_TOKS) {
00073 for (int i = 0; i < TOK_NODE_ELEMENTS; i++)
00074 ats_free(cur->el[i]);
00075 }
00076
00077 next = cur->next;
00078 if (root == false) {
00079 ats_free(cur);
00080 } else {
00081 root = false;
00082 }
00083 cur = next;
00084 }
00085 }
00086
00087 int
00088 Tokenizer::Initialize(const char *str)
00089 {
00090 return Initialize((char *) str, COPY_TOKS);
00091 }
00092
00093 inline int
00094 Tokenizer::isDelimiter(char c)
00095 {
00096 int i = 0;
00097
00098 while (strOfDelimit[i] != '\0') {
00099 if (c == strOfDelimit[i]) {
00100 return 1;
00101 }
00102 i++;
00103 }
00104
00105 return 0;
00106 }
00107
00108 int
00109 Tokenizer::Initialize(char *str, int opt)
00110 {
00111 char *strStart;
00112 int priorCharWasDelimit = 1;
00113 char *tokStart = NULL;
00114 int tok_count = 0;
00115 bool max_limit_hit = false;
00116
00117
00118
00119 if (numValidTokens > 0) {
00120 ReUse();
00121 }
00122
00123 strStart = str;
00124
00125 if (!(opt & (COPY_TOKS | SHARE_TOKS))) {
00126 opt = opt | COPY_TOKS;
00127 }
00128 options = opt;
00129
00130
00131 ink_assert(!((opt & COPY_TOKS) && (opt & SHARE_TOKS)));
00132
00133
00134 str = strStart;
00135 priorCharWasDelimit = 1;
00136
00137 tok_count = 0;
00138 tokStart = str;
00139
00140 while (*str != '\0') {
00141
00142
00143 if (tok_count + 1 == maxTokens) {
00144 max_limit_hit = true;
00145 break;
00146 }
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156 if (options & ALLOW_EMPTY_TOKS) {
00157 if (isDelimiter(*str)) {
00158 addToken(tokStart, (int) (str - tokStart));
00159 tok_count++;
00160 tokStart = str + 1;
00161 priorCharWasDelimit = 1;
00162 } else {
00163 priorCharWasDelimit = 0;
00164 }
00165 str++;
00166 } else {
00167 if (isDelimiter(*str)) {
00168 if (priorCharWasDelimit == 0) {
00169
00170 addToken(tokStart, (int) (str - tokStart));
00171 tok_count++;
00172 }
00173 priorCharWasDelimit = 1;
00174 } else {
00175 if (priorCharWasDelimit == 1) {
00176
00177 tokStart = str;
00178 }
00179 priorCharWasDelimit = 0;
00180 }
00181 str++;
00182 }
00183 }
00184
00185
00186 if (max_limit_hit == true) {
00187
00188 if (options & ALLOW_EMPTY_TOKS) {
00189
00190
00191
00192
00193 for (; *str != '\0' && !isDelimiter(*str); str++);
00194 priorCharWasDelimit = 0;
00195
00196 } else {
00197
00198
00199 for (; *str != '\0' && isDelimiter(*str); str++);
00200
00201
00202
00203 if (*str == '\0') {
00204 priorCharWasDelimit = 1;
00205 } else {
00206
00207 tokStart = str;
00208 priorCharWasDelimit = 0;
00209
00210
00211 for (; *str != '\0'; str++);
00212
00213
00214 for (; isDelimiter(*(str - 1)); str--);
00215 }
00216 }
00217 }
00218
00219
00220 if (priorCharWasDelimit == 0) {
00221
00222 addToken(tokStart, (int) (str - tokStart));
00223 tok_count++;
00224 }
00225
00226 numValidTokens = tok_count;
00227 return tok_count;
00228 }
00229
00230
00231 void
00232 Tokenizer::addToken(char *startAddr, int length)
00233 {
00234 char *add_ptr;
00235 if (options & SHARE_TOKS) {
00236 startAddr[length] = '\0';
00237 add_ptr = startAddr;
00238 } else {
00239 add_ptr = (char *)ats_malloc(length + 1);
00240 memcpy(add_ptr, startAddr, length);
00241 add_ptr[length] = '\0';
00242 }
00243
00244 add_node->el[add_index] = add_ptr;
00245
00246 add_index++;
00247
00248
00249
00250
00251
00252 if (add_index >= TOK_NODE_ELEMENTS) {
00253 if (add_node->next == NULL) {
00254 add_node->next = (tok_node *)ats_malloc(sizeof(tok_node));
00255 memset(add_node->next, 0, sizeof(tok_node));
00256 }
00257 add_node = add_node->next;
00258 add_index = 0;
00259 }
00260 }
00261
00262
00263 const char *
00264 Tokenizer::operator[] (int index)
00265 {
00266 tok_node *
00267 cur_node = &start_node;
00268 int
00269 cur_start = 0;
00270 if (index >= numValidTokens) {
00271 return NULL;
00272 } else {
00273 while (cur_start + TOK_NODE_ELEMENTS <= index) {
00274 cur_node = cur_node->next;
00275 ink_assert(cur_node != NULL);
00276 cur_start += TOK_NODE_ELEMENTS;
00277 }
00278 return cur_node->el[index % TOK_NODE_ELEMENTS];
00279 }
00280 }
00281
00282 int
00283 Tokenizer::getNumber()
00284 {
00285 return numValidTokens;
00286 }
00287
00288 const char *
00289 Tokenizer::iterFirst(tok_iter_state * state)
00290 {
00291 state->node = &start_node;
00292 state->index = -1;
00293 return iterNext(state);
00294 }
00295
00296 const char *
00297 Tokenizer::iterNext(tok_iter_state * state)
00298 {
00299 tok_node *node = state->node;;
00300 int index = state->index;
00301
00302 index++;
00303 if (index >= TOK_NODE_ELEMENTS) {
00304 node = node->next;
00305 if (node == NULL) {
00306 return NULL;
00307 } else {
00308 index = 0;
00309 }
00310 }
00311
00312 if (node->el[index] != NULL) {
00313 state->node = node;
00314 state->index = index;
00315 return node->el[index];
00316 } else {
00317 return NULL;
00318 }
00319 }
00320
00321
00322
00323 void
00324 Tokenizer::Print()
00325 {
00326 tok_node *cur_node = &start_node;
00327 int node_index = 0;
00328 int count = 0;
00329
00330 while (cur_node != NULL) {
00331
00332 if (cur_node->el[node_index] != NULL) {
00333 printf("Token %d : |%s|\n", count, cur_node->el[node_index]);
00334 count++;
00335 } else {
00336 return;
00337 }
00338
00339 node_index++;
00340 if (node_index >= TOK_NODE_ELEMENTS) {
00341 cur_node = cur_node->next;
00342 node_index = 0;
00343 }
00344 }
00345 }
00346
00347 void
00348 Tokenizer::ReUse()
00349 {
00350 tok_node *cur_node = &start_node;
00351
00352 while (cur_node != NULL) {
00353 if (options & COPY_TOKS) {
00354 for (int i = 0; i < TOK_NODE_ELEMENTS; i++)
00355 ats_free(cur_node->el[i]);
00356 }
00357 memset(cur_node->el, 0, sizeof(char *) * TOK_NODE_ELEMENTS);
00358 cur_node = cur_node->next;
00359 }
00360
00361 numValidTokens = 0;
00362 add_node = &start_node;
00363 add_index = 0;
00364 }