Go to the documentation of this file.00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 #include "ink_platform.h"
00026 #include "Tokenizer.h"
00027 #include "ink_assert.h"
00028 #include "ink_memory.h"
00029 
00030 
00031 
00032 
00033 
00034 
00035 
00036         
00037 
00038 Tokenizer::Tokenizer(const char *StrOfDelimiters)
00039 {
00040   int length;
00041 
00042   if (StrOfDelimiters == NULL) {
00043     strOfDelimit = NULL;
00044   } else {
00045     length = (int) (strlen(StrOfDelimiters) + 1);
00046     strOfDelimit = new char[length];
00047     memcpy(strOfDelimit, StrOfDelimiters, length);
00048   }
00049 
00050   memset(&start_node, 0, sizeof(tok_node));
00051 
00052   numValidTokens = 0;
00053   maxTokens = -1;
00054   options = 0;
00055 
00056   add_node = &start_node;
00057   add_index = 0;
00058 }
00059 
00060 Tokenizer::~Tokenizer()
00061 {
00062   bool root = true;
00063   tok_node *cur = &start_node;;
00064   tok_node *next = NULL;
00065 
00066   if (strOfDelimit != NULL) {
00067     delete[]strOfDelimit;
00068   }
00069 
00070   while (cur != NULL) {
00071 
00072     if (options & COPY_TOKS) {
00073       for (int i = 0; i < TOK_NODE_ELEMENTS; i++)
00074         ats_free(cur->el[i]);
00075     }
00076 
00077     next = cur->next;
00078     if (root == false) {
00079       ats_free(cur);
00080     } else {
00081       root = false;
00082     }
00083     cur = next;
00084   }
00085 }
00086 
00087 int
00088 Tokenizer::Initialize(const char *str)
00089 {
00090   return Initialize((char *) str, COPY_TOKS);
00091 }
00092 
00093 inline int
00094 Tokenizer::isDelimiter(char c)
00095 {
00096   int i = 0;
00097 
00098   while (strOfDelimit[i] != '\0') {
00099     if (c == strOfDelimit[i]) {
00100       return 1;
00101     }
00102     i++;
00103   }
00104 
00105   return 0;
00106 }
00107 
00108 int
00109 Tokenizer::Initialize(char *str, int opt)
00110 {
00111   char *strStart;
00112   int priorCharWasDelimit = 1;
00113   char *tokStart = NULL;
00114   int tok_count = 0;
00115   bool max_limit_hit = false;
00116 
00117   
00118   
00119   if (numValidTokens > 0) {
00120     ReUse();
00121   }
00122 
00123   strStart = str;
00124 
00125   if (!(opt & (COPY_TOKS | SHARE_TOKS))) {
00126     opt = opt | COPY_TOKS;
00127   }
00128   options = opt;
00129 
00130   
00131   ink_assert(!((opt & COPY_TOKS) && (opt & SHARE_TOKS)));
00132 
00133 
00134   str = strStart;
00135   priorCharWasDelimit = 1;
00136 
00137   tok_count = 0;
00138   tokStart = str;
00139 
00140   while (*str != '\0') {
00141 
00142     
00143     if (tok_count + 1 == maxTokens) {
00144       max_limit_hit = true;
00145       break;
00146     }
00147     
00148     
00149     
00150     
00151     
00152     
00153     
00154     
00155     
00156     if (options & ALLOW_EMPTY_TOKS) {
00157       if (isDelimiter(*str)) {
00158         addToken(tokStart, (int) (str - tokStart));
00159         tok_count++;
00160         tokStart = str + 1;
00161         priorCharWasDelimit = 1;
00162       } else {
00163         priorCharWasDelimit = 0;
00164       }
00165       str++;
00166     } else {
00167       if (isDelimiter(*str)) {
00168         if (priorCharWasDelimit == 0) {
00169           
00170           addToken(tokStart, (int) (str - tokStart));
00171           tok_count++;
00172         }
00173         priorCharWasDelimit = 1;
00174       } else {
00175         if (priorCharWasDelimit == 1) {
00176           
00177           tokStart = str;
00178         }
00179         priorCharWasDelimit = 0;
00180       }
00181       str++;
00182     }
00183   }
00184 
00185   
00186   if (max_limit_hit == true) {
00187 
00188     if (options & ALLOW_EMPTY_TOKS) {
00189 
00190       
00191       
00192       
00193       for (; *str != '\0' && !isDelimiter(*str); str++);
00194       priorCharWasDelimit = 0;
00195 
00196     } else {
00197 
00198       
00199       for (; *str != '\0' && isDelimiter(*str); str++);
00200 
00201       
00202       
00203       if (*str == '\0') {
00204         priorCharWasDelimit = 1;
00205       } else {
00206         
00207         tokStart = str;
00208         priorCharWasDelimit = 0;
00209 
00210         
00211         for (; *str != '\0'; str++);
00212 
00213         
00214         for (; isDelimiter(*(str - 1)); str--);
00215       }
00216     }
00217   }
00218   
00219   
00220   if (priorCharWasDelimit == 0) {
00221     
00222     addToken(tokStart, (int) (str - tokStart));
00223     tok_count++;
00224   }
00225 
00226   numValidTokens = tok_count;
00227   return tok_count;
00228 }
00229 
00230 
00231 void
00232 Tokenizer::addToken(char *startAddr, int length)
00233 {
00234   char *add_ptr;
00235   if (options & SHARE_TOKS) {
00236     startAddr[length] = '\0';
00237     add_ptr = startAddr;
00238   } else {
00239     add_ptr = (char *)ats_malloc(length + 1);
00240     memcpy(add_ptr, startAddr, length);
00241     add_ptr[length] = '\0';
00242   }
00243 
00244   add_node->el[add_index] = add_ptr;
00245 
00246   add_index++;
00247 
00248   
00249   
00250   
00251   
00252   if (add_index >= TOK_NODE_ELEMENTS) {
00253     if (add_node->next == NULL) {
00254       add_node->next = (tok_node *)ats_malloc(sizeof(tok_node));
00255       memset(add_node->next, 0, sizeof(tok_node));
00256     }
00257     add_node = add_node->next;
00258     add_index = 0;
00259   }
00260 }
00261 
00262 
00263 const char *
00264 Tokenizer::operator[] (int index)
00265 {
00266   tok_node *
00267     cur_node = &start_node;
00268   int
00269     cur_start = 0;
00270   if (index >= numValidTokens) {
00271     return NULL;
00272   } else {
00273     while (cur_start + TOK_NODE_ELEMENTS <= index) {
00274       cur_node = cur_node->next;
00275       ink_assert(cur_node != NULL);
00276       cur_start += TOK_NODE_ELEMENTS;
00277     }
00278     return cur_node->el[index % TOK_NODE_ELEMENTS];
00279   }
00280 }
00281 
00282 int
00283 Tokenizer::getNumber()
00284 {
00285   return numValidTokens;
00286 }
00287 
00288 const char *
00289 Tokenizer::iterFirst(tok_iter_state * state)
00290 {
00291   state->node = &start_node;
00292   state->index = -1;
00293   return iterNext(state);
00294 }
00295 
00296 const char *
00297 Tokenizer::iterNext(tok_iter_state * state)
00298 {
00299   tok_node *node = state->node;;
00300   int index = state->index;
00301 
00302   index++;
00303   if (index >= TOK_NODE_ELEMENTS) {
00304     node = node->next;
00305     if (node == NULL) {
00306       return NULL;
00307     } else {
00308       index = 0;
00309     }
00310   }
00311 
00312   if (node->el[index] != NULL) {
00313     state->node = node;
00314     state->index = index;
00315     return node->el[index];
00316   } else {
00317     return NULL;
00318   }
00319 }
00320 
00321 
00322 
00323 void
00324 Tokenizer::Print()
00325 {
00326   tok_node *cur_node = &start_node;
00327   int node_index = 0;
00328   int count = 0;
00329 
00330   while (cur_node != NULL) {
00331 
00332     if (cur_node->el[node_index] != NULL) {
00333       printf("Token %d : |%s|\n", count, cur_node->el[node_index]);
00334       count++;
00335     } else {
00336       return;
00337     }
00338 
00339     node_index++;
00340     if (node_index >= TOK_NODE_ELEMENTS) {
00341       cur_node = cur_node->next;
00342       node_index = 0;
00343     }
00344   }
00345 }
00346 
00347 void
00348 Tokenizer::ReUse()
00349 {
00350   tok_node *cur_node = &start_node;
00351 
00352   while (cur_node != NULL) {
00353     if (options & COPY_TOKS) {
00354       for (int i = 0; i < TOK_NODE_ELEMENTS; i++)
00355         ats_free(cur_node->el[i]);
00356     }
00357     memset(cur_node->el, 0, sizeof(char *) * TOK_NODE_ELEMENTS);
00358     cur_node = cur_node->next;
00359   }
00360 
00361   numValidTokens = 0;
00362   add_node = &start_node;
00363   add_index = 0;
00364 }