00001 /** @file 00002 00003 A brief file description 00004 00005 @section license License 00006 00007 Licensed to the Apache Software Foundation (ASF) under one 00008 or more contributor license agreements. See the NOTICE file 00009 distributed with this work for additional information 00010 regarding copyright ownership. The ASF licenses this file 00011 to you under the Apache License, Version 2.0 (the 00012 "License"); you may not use this file except in compliance 00013 with the License. You may obtain a copy of the License at 00014 00015 http://www.apache.org/licenses/LICENSE-2.0 00016 00017 Unless required by applicable law or agreed to in writing, software 00018 distributed under the License is distributed on an "AS IS" BASIS, 00019 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00020 See the License for the specific language governing permissions and 00021 limitations under the License. 00022 */ 00023 00024 /***************************************/ 00025 00026 #ifndef _TOKENIZER_H_ 00027 #define _TOKENIZER_H_ 00028 00029 /**************************************************************************** 00030 * 00031 * Tokenizer.h - A string tokenzier 00032 * 00033 * 00034 * 00035 ****************************************************************************/ 00036 00037 /********************************************************** 00038 * class Tokenizer 00039 * 00040 * Tokenizes a string, and then allows array like access 00041 * 00042 * The delimiters are determined by the string passed to the 00043 * the constructor. 00044 * 00045 * There are three memory options. 00046 * SHARE_TOKS - this modifies the original string passed in 00047 * through Intialize() and shares its space. NULLs 00048 * are inserted into string after each token. Choosing 00049 * this option means the user is reponsible for not 00050 * deallocating the string storage before deallocating 00051 * the tokenizer object 00052 * COPY_TOKS - this option copies the orginial string and 00053 * leaves the original unchanged. The deallocation of the 00054 * original string and the deallocation of the Tokenizer 00055 * object are now independent. 00056 * Note: If neither SHARE_TOKS or COPY_TOKS is selected, COPY_TOKS 00057 * is the default 00058 * ALLOW_EMPTY_TOKENS: If multiple delimiters appear next to each 00059 * other, each delimiter creates a token someof which 00060 * will be zero length. The default is to skip repeated 00061 * delimiters 00062 * 00063 * Tokenizer(const char* StrOfDelimit) - a string that contains 00064 * the delimiters for tokenizing. This string is copied. 00065 * 00066 * Intialize(char* str, TokenizerOpts opt) - Submits a string 00067 * to be tokenized according to the memory options listed above 00068 * 00069 * ReUse() - Allows the object to be reused for a new string 00070 * After ReUse() is called, Initialize() can be called safely 00071 * again 00072 * 00073 * operator[index] - returns a pointer to the number token given 00074 * by index. If index > numTokens-1, NULL is returned. 00075 * Because of way tokens are stored, this is O(n) operation 00076 * It is very fast though for the first 16 tokens and 00077 * is intended to be used on a small number of tokens 00078 * 00079 * iterFirst(tok_iter_state* state) - Returns the first 00080 * token and intializes state argument for subsequent 00081 * calls to iterNext. If no tokens exist, NULL is 00082 * returned 00083 * 00084 * iterNext(tok_iter_state* state) - Returns the next token after 00085 * what arg state returned next last time. Returns NULL if no 00086 * more tokens exists. 00087 * 00088 * Note: To iterate through a list using operator[] takes O(n^2) time 00089 * Using iterFirst, iterNext the running time is O(n), so use 00090 * the iteration where possible 00091 * 00092 * getNumber() - returns the number of tokens 00093 * 00094 * setMaxTokens() - sets the maximum number of tokens. Once maxTokens 00095 * is reached, delimiters are ignored and the 00096 * last token is rest of the string. Negative numbers 00097 * mean no limit on the number of tokens 00098 * 00099 * getMaxTokens() - returns maxTokens. Negative number mean no limit 00100 * 00101 * Print() - Debugging method to print out the tokens 00102 * 00103 *******************************************************************/ 00104 00105 #include "ink_apidefs.h" 00106 00107 #define COPY_TOKS 1 << 0 00108 #define SHARE_TOKS 1 << 1 00109 #define ALLOW_EMPTY_TOKS 1 << 2 00110 00111 #define TOK_NODE_ELEMENTS 16 00112 struct tok_node 00113 { 00114 char *el[TOK_NODE_ELEMENTS]; 00115 tok_node *next; 00116 }; 00117 00118 struct tok_iter_state 00119 { 00120 tok_node *node; 00121 int index; 00122 }; 00123 00124 00125 00126 class Tokenizer 00127 { 00128 public: 00129 inkcoreapi Tokenizer(const char *StrOfDelimiters); 00130 inkcoreapi ~ Tokenizer(); 00131 int Initialize(char *str, int opt); 00132 inkcoreapi int Initialize(const char *str); // Automatically sets option to copy 00133 const char *operator [] (int index); 00134 void setMaxTokens(int max) 00135 { 00136 maxTokens = max; 00137 }; 00138 int getMaxTokens() 00139 { 00140 return maxTokens; 00141 }; 00142 int getNumber(); 00143 void Print(); // Debugging print out 00144 inkcoreapi const char *iterFirst(tok_iter_state * state); 00145 inkcoreapi const char *iterNext(tok_iter_state * state); 00146 private: 00147 Tokenizer & operator=(const Tokenizer &); 00148 Tokenizer(const Tokenizer &); 00149 int isDelimiter(char c); 00150 void addToken(char *startAddr, int length); 00151 void ReUse(); 00152 char *strOfDelimit; 00153 tok_node start_node; 00154 int numValidTokens; 00155 int maxTokens; 00156 int options; 00157 00158 // State about where to add the next token 00159 tok_node *add_node; 00160 int add_index; 00161 }; 00162 00163 #endif