• Main Page
  • Related Pages
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

HostLookup.cc

Go to the documentation of this file.
00001 /** @file
00002 
00003   A brief file description
00004 
00005   @section license License
00006 
00007   Licensed to the Apache Software Foundation (ASF) under one
00008   or more contributor license agreements.  See the NOTICE file
00009   distributed with this work for additional information
00010   regarding copyright ownership.  The ASF licenses this file
00011   to you under the Apache License, Version 2.0 (the
00012   "License"); you may not use this file except in compliance
00013   with the License.  You may obtain a copy of the License at
00014 
00015       http://www.apache.org/licenses/LICENSE-2.0
00016 
00017   Unless required by applicable law or agreed to in writing, software
00018   distributed under the License is distributed on an "AS IS" BASIS,
00019   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020   See the License for the specific language governing permissions and
00021   limitations under the License.
00022  */
00023 
00024 /*****************************************************************************
00025  *
00026  *  HostLookup.cc - Implementation of a hostname/domainname matcher
00027  *
00028  *
00029  ****************************************************************************/
00030 #include "libts.h"
00031 #include "HostLookup.h"
00032 #include "MatcherUtils.h"
00033 
00034 // bool domaincmp(const char* hostname, const char* domain)
00035 //
00036 //    Returns true if hostname is in domain
00037 //
00038 bool
00039 domaincmp(const char *hostname, const char *domain)
00040 {
00041   ink_assert(hostname != NULL);
00042   ink_assert(domain != NULL);
00043 
00044   const char *host_cur = hostname + strlen(hostname);
00045   const char *domain_cur = domain + strlen(domain);
00046 
00047   // Check to see if were passed emtpy stings for either
00048   //  argument.  Empty strings do not match anything
00049   //
00050   if (domain_cur == domain || host_cur == hostname) {
00051     return false;
00052   }
00053   // Go back to the last character
00054   domain_cur--;
00055   host_cur--;
00056 
00057   // Trailing dots should be ignored since they are optional
00058   //
00059   if (*(domain_cur) == '.') {
00060     domain_cur--;
00061   }
00062   if (*(host_cur) == '.') {
00063     host_cur--;
00064   }
00065   // Walk through both strings backward
00066   while (domain_cur >= domain && host_cur >= hostname) {
00067 
00068     // If we still have characters left on both strings and
00069     //   they do not match, matching fails
00070     //
00071     if (tolower(*domain_cur) != tolower(*host_cur)) {
00072       return false;
00073     }
00074 
00075     domain_cur--;
00076     host_cur--;
00077   };
00078 
00079   // There are three possible cases that could have gotten us
00080   //  here
00081   //
00082   //     Case 1: we ran out of both strings
00083   //     Case 2: we ran out of domain but not hostname
00084   //     Case 3: we ran out of hostname but not domain
00085   //
00086   if (domain_cur < domain) {
00087     if (host_cur < hostname) {
00088       // This covers the case 1
00089       //   ex: example.com matching example.com
00090       return true;
00091     } else {
00092       // This covers case 2 (the most common case):
00093       //   ex: www.example.com matching .com or com
00094       // But we must check that we do match
00095       //   www.inktomi.ecom against com
00096       //
00097       if (*(domain_cur + 1) == '.') {
00098         return true;
00099       } else if (*host_cur == '.') {
00100         return true;
00101       } else {
00102         return false;
00103       }
00104     }
00105   } else if (host_cur < hostname) {
00106     // This covers the case 3 (a very unusual case)
00107     //  ex: example.com needing to match .example.com
00108     if (*domain_cur == '.' && domain_cur == domain) {
00109       return true;
00110     } else {
00111       return false;
00112     }
00113   }
00114 
00115   ink_assert(!"Should not get here");
00116   return false;
00117 }
00118 
00119 // int hostcmp(const char* c1, const char* c2)
00120 //
00121 //   Similar to strcasecmp except that if one string has a
00122 //     trailing '.' and the other one does not
00123 //     then they are equal
00124 //
00125 //   Meant to compare to host names and
00126 //     take in out account that www.example.com
00127 //     and www.example.com. are the same host
00128 //     since the trailing dot is optional
00129 //
00130 int
00131 hostcmp(const char *c1, const char *c2)
00132 {
00133   ink_assert(c1 != NULL);
00134   ink_assert(c2 != NULL);
00135   do {
00136     if (tolower(*c1) < tolower(*c2)) {
00137       if (*c1 == '\0' && *c2 == '.' && *(c2 + 1) == '\0') {
00138         break;
00139       }
00140       return -1;
00141     } else if (tolower(*c1) > tolower(*c2)) {
00142       if (*c2 == '\0' && *c1 == '.' && *(c1 + 1) == '\0') {
00143         break;
00144       }
00145       return 1;
00146     }
00147 
00148     if (*c1 == '\0') {
00149       break;
00150     }
00151     c1++;
00152     c2++;
00153   } while (1);
00154 
00155   return 0;
00156 }
00157 
00158 // static const unsigned char asciiToTable[256]
00159 //
00160 //   Used to Map Legal hostname characters into
00161 //     to indexes used by char_table to
00162 //     index strings
00163 //
00164 //   Legal hostname characters are 0-9, A-Z, a-z and '-'
00165 //   '_' is also included although it is not in the spec (RFC 883)
00166 //
00167 //   Uppercase and lowercase "a-z" both map to same indexes
00168 //     since hostnames are not case sensative
00169 //
00170 //   Illegal characters map to 255
00171 //
00172 static const unsigned char asciiToTable[256] = {
00173   255, 255, 255, 255, 255, 255, 255, 255,       // 0 - 7
00174   255, 255, 255, 255, 255, 255, 255, 255,       // 8 - 15
00175   255, 255, 255, 255, 255, 255, 255, 255,       // 16 - 23
00176   255, 255, 255, 255, 255, 255, 255, 255,       // 24 - 31
00177   255, 255, 255, 255, 255, 255, 255, 255,       // 32 - 39
00178   255, 255, 255, 255, 255, 0, 255, 255, // 40 - 47 ('-')
00179   1, 2, 3, 4, 5, 6, 7, 8,       // 48 - 55 (0-7)
00180   9, 10, 255, 255, 255, 255, 255, 255,  // 56 - 63 (8-9)
00181   255, 11, 12, 13, 14, 15, 16, 17,      // 64 - 71 (A-G)
00182   18, 19, 20, 21, 22, 23, 24, 25,       // 72 - 79 (H-O)
00183   26, 27, 28, 29, 30, 31, 32, 33,       // 80 - 87 (P-W)
00184   34, 35, 36, 255, 255, 255, 255, 37,   // 88 - 95 (X-Z, '_')
00185   255, 11, 12, 13, 14, 15, 16, 17,      // 96 - 103 (a-g)
00186   18, 19, 20, 21, 22, 23, 24, 25,       // 104 - 111 (h-0)
00187   26, 27, 28, 29, 30, 31, 32, 33,       // 112 - 119 (p-w)
00188   34, 35, 36, 255, 255, 255, 255, 255,  // 120 - 127 (x-z)
00189   255, 255, 255, 255, 255, 255, 255, 255,
00190   255, 255, 255, 255, 255, 255, 255, 255,
00191   255, 255, 255, 255, 255, 255, 255, 255,
00192   255, 255, 255, 255, 255, 255, 255, 255,
00193   255, 255, 255, 255, 255, 255, 255, 255,
00194   255, 255, 255, 255, 255, 255, 255, 255,
00195   255, 255, 255, 255, 255, 255, 255, 255,
00196   255, 255, 255, 255, 255, 255, 255, 255,
00197   255, 255, 255, 255, 255, 255, 255, 255,
00198   255, 255, 255, 255, 255, 255, 255, 255,
00199   255, 255, 255, 255, 255, 255, 255, 255,
00200   255, 255, 255, 255, 255, 255, 255, 255,
00201   255, 255, 255, 255, 255, 255, 255, 255,
00202   255, 255, 255, 255, 255, 255, 255, 255,
00203   255, 255, 255, 255, 255, 255, 255, 255,
00204   255, 255, 255, 255, 255, 255, 255, 255
00205 };
00206 
00207 // Number of legal characters in the acssiToTable array
00208 static const int numLegalChars = 38;
00209 
00210 // struct charIndex_el
00211 //
00212 //   Used by class charIndex.  Forms a single level
00213 //    in charIndex tree
00214 //
00215 struct charIndex_el
00216 {
00217   charIndex_el();
00218   ~charIndex_el();
00219   HostBranch *branch_array[numLegalChars];
00220   charIndex_el *next_level[numLegalChars];
00221 };
00222 
00223 charIndex_el::charIndex_el()
00224 {
00225   memset(branch_array, 0, sizeof(branch_array));
00226   memset(next_level, 0, sizeof(next_level));
00227 }
00228 
00229 charIndex_el::~charIndex_el()
00230 {
00231   int i;
00232 
00233   // Recursively delete all the lower levels of the
00234   //   data structure
00235   for (i = 0; i < numLegalChars; i++) {
00236     if (next_level[i] != NULL) {
00237       delete next_level[i];
00238     }
00239   }
00240 }
00241 
00242 // struct charIndexIterInternal
00243 //
00244 //  An internal struct to charIndexIterState
00245 //    Stores the location of an element in
00246 //    class charIndex
00247 //
00248 struct charIndexIterInternal
00249 {
00250   charIndex_el *ptr;
00251   int index;
00252 };
00253 
00254 // Used as a default return element for DynArray in
00255 //   struct charIndexIterState
00256 static charIndexIterInternal default_iter = { NULL, -1 };
00257 
00258 // struct charIndexIterState
00259 //
00260 //    struct for the callee to keep interation state
00261 //      for class charIndex
00262 //
00263 struct charIndexIterState
00264 {
00265   charIndexIterState();
00266 
00267   // Where that level we are in interation
00268   int cur_level;
00269 
00270   // Where we got the last element from
00271   int cur_index;
00272   charIndex_el *cur_el;
00273 
00274   //  Queue of the above levels
00275     DynArray<charIndexIterInternal> q;
00276 };
00277 
00278 charIndexIterState::charIndexIterState():cur_level(-1), cur_index(-1), cur_el(NULL), q(&default_iter, 6)
00279 {
00280 }
00281 
00282 
00283 // class charIndex - A constant time string matcher intended for
00284 //    short strings in a sparsely populated DNS paritition
00285 //
00286 //    Creates a look up table for character in data string
00287 //
00288 //    Mapping from character to table index is done by
00289 //      asciiToTable[]
00290 //
00291 //    The illegalKey hash table is side structure for any
00292 //     entries that contain illegal hostname characters that
00293 //     we can not index into the normal table
00294 //
00295 //    Example: com
00296 //      c maps to 13, o maps to 25, m maps to 23
00297 //
00298 //                             charIndex_el         charIndex_el
00299 //                             -----------         ------------
00300 //                           0 |    |    |         |    |     |
00301 //                           . |    |    |         |    |     |
00302 //    charIndex_el           . |    |    |         |    |     |
00303 //    ----------             . |    |    |         |    |     |
00304 //  0 |   |    |             . |    |    |   |-->23| ptr|  0  |  (ptr is to the
00305 //  . |   |    |   |-------->25| 0  |   -----|     |    |     |   hostBranch for
00306 //  . |   |    |   |         . |    |    |         |    |     |   domain com)
00307 // 13 | 0 |  --|---|         . |    |    |         |    |     |
00308 //  . |   |    |             . |    |    |         |    |     |
00309 //  . |   |    |               |    |    |         |    |     |
00310 //  . |   |    |               |    |    |         |    |     |
00311 //    |   |    |               -----------         -----------|
00312 //    |   |    |
00313 //    |   |    |
00314 //    |   |    |
00315 //    |--------|
00316 //
00317 //
00318 //
00319 class
00320   charIndex
00321 {
00322 public:
00323   charIndex();
00324   ~
00325   charIndex();
00326   void
00327   Insert(const char *match_data, HostBranch * toInsert);
00328   HostBranch *
00329   Lookup(const char *match_data);
00330   HostBranch *
00331   iter_first(charIndexIterState * s);
00332   HostBranch *
00333   iter_next(charIndexIterState * s);
00334 private:
00335   charIndex_el *
00336     root;
00337   InkHashTable *
00338     illegalKey;
00339 };
00340 
00341 charIndex::charIndex():illegalKey(NULL)
00342 {
00343   root = new charIndex_el;
00344 }
00345 
00346 charIndex::~charIndex()
00347 {
00348   InkHashTableIteratorState ht_iter;
00349   InkHashTableEntry *ht_entry = NULL;
00350   HostBranch *tmp;
00351 
00352   delete root;
00353 
00354   // Destroy the illegalKey hashtable if there is one and free
00355   //   up all of its values
00356   if (illegalKey != NULL) {
00357     ht_entry = ink_hash_table_iterator_first(illegalKey, &ht_iter);
00358 
00359     while (ht_entry != NULL) {
00360       tmp = (HostBranch *) ink_hash_table_entry_value(illegalKey, ht_entry);
00361       ink_assert(tmp != NULL);
00362       delete tmp;
00363       ht_entry = ink_hash_table_iterator_next(illegalKey, &ht_iter);
00364     }
00365     ink_hash_table_destroy(illegalKey);
00366   }
00367 }
00368 
00369 // void charIndex::Insert(const char* match_data, HostBranch* toInsert)
00370 //
00371 //   Places a binding for match_data to toInsert into the index
00372 //
00373 void
00374 charIndex::Insert(const char *match_data, HostBranch * toInsert)
00375 {
00376   unsigned char index;
00377   const char *match_start = match_data;
00378   charIndex_el *cur = root;
00379   charIndex_el *next;
00380 
00381   if (*match_data == '\0') {
00382     // Should not happen
00383     ink_assert(0);
00384     return;
00385   }
00386 
00387   while (1) {
00388     index = asciiToTable[(unsigned char) (*match_data)];
00389 
00390     // Check to see if our index into table is for an
00391     //  'illegal' DNS character
00392     if (index == 255) {
00393       // Insert into illgals hash table
00394       if (illegalKey == NULL) {
00395         illegalKey = ink_hash_table_create(InkHashTableKeyType_String);
00396       }
00397       ink_hash_table_insert(illegalKey, (char *) match_start, toInsert);
00398       break;
00399     }
00400 
00401     // Check to see if are at the level we supposed be at
00402     if (*(match_data + 1) == '\0') {
00403       // The slot should always be emtpy, no duplicate
00404       //   keys are allowed
00405       ink_assert(cur->branch_array[index] == NULL);
00406       cur->branch_array[index] = toInsert;
00407       break;
00408     } else {
00409       // We need to find the next level in the table
00410 
00411       next = cur->next_level[index];
00412 
00413       // Check to see if we need to expand the table
00414       if (next == NULL) {
00415         next = new charIndex_el;
00416         cur->next_level[index] = next;
00417       }
00418       cur = next;
00419     }
00420     match_data++;
00421   }
00422 }
00423 
00424 
00425 // HostBranch* charIndex::Lookup(const char* match_data)
00426 //
00427 //  Searches the charIndex on key match_data
00428 //    If there is a binding for match_data, returns a pointer to it
00429 //    otherwise a NULL pointer is returned
00430 //
00431 HostBranch *
00432 charIndex::Lookup(const char *match_data)
00433 {
00434   unsigned char index;
00435   charIndex_el *cur = root;
00436   void *hash_lookup;
00437   const char *match_start = match_data;
00438 
00439   if (root == NULL || *match_data == '\0') {
00440     return NULL;
00441   }
00442 
00443   while (1) {
00444 
00445     index = asciiToTable[(unsigned char) (*match_data)];
00446 
00447     // Check to see if our index into table is for an
00448     //  'illegal' DNS character
00449     if (index == 255) {
00450       if (illegalKey == NULL) {
00451         return NULL;
00452       } else {
00453         if (ink_hash_table_lookup(illegalKey, (char *) match_start, &hash_lookup)) {
00454           return (HostBranch *) hash_lookup;
00455         } else {
00456           return NULL;
00457         }
00458       }
00459     }
00460     // Check to see if we are looking for the next level or
00461     //    a HostBranch
00462     if (*(match_data + 1) == '\0') {
00463       return cur->branch_array[index];
00464     } else {
00465       cur = cur->next_level[index];
00466 
00467       if (cur == NULL) {
00468         return NULL;
00469       }
00470     }
00471 
00472     match_data++;
00473   }
00474 }
00475 
00476 //
00477 // HostBranch* charIndex::iter_next(charIndexIterState* s)
00478 //
00479 //    Initialize iterator state and returns the first element
00480 //     found in the charTable.  If none is found, NULL
00481 //     is returned
00482 //
00483 HostBranch *
00484 charIndex::iter_first(charIndexIterState * s)
00485 {
00486   s->cur_level = 0;
00487   s->cur_index = -1;
00488   s->cur_el = root;
00489 
00490   return iter_next(s);
00491 }
00492 
00493 //
00494 // HostBranch* charIndex::iter_next(charIndexIterState* s)
00495 //
00496 //    Finds the next element in the char index and returns
00497 //      a pointer to it.  If there are no more elements, NULL
00498 //      is returned
00499 //
00500 HostBranch *
00501 charIndex::iter_next(charIndexIterState * s)
00502 {
00503   int index;
00504   charIndex_el *current_el = s->cur_el;
00505   intptr_t level = s->cur_level;
00506   charIndexIterInternal stored_state;
00507   HostBranch *r = NULL;
00508   bool first_element;
00509 
00510   // bool first_element is used to tell if first elemente
00511   //  pointed to by s has already been returned to caller
00512   //  it has unless we are being called from iter_first
00513   if (s->cur_index < 0) {
00514     first_element = false;
00515     index = s->cur_index + 1;
00516   } else {
00517     first_element = true;
00518     index = s->cur_index;
00519   }
00520 
00521   while (1) {
00522 
00523     // Check to see if we need to go back up a level
00524     if (index >= numLegalChars) {
00525       if (level <= 0) {
00526         // No more levels so bail out
00527         break;
00528       } else {
00529         // Go back up to a stored level
00530         stored_state = s->q[level - 1];
00531         ink_assert(stored_state.ptr != NULL);
00532         ink_assert(stored_state.index >= 0);
00533         level--;
00534         current_el = stored_state.ptr;
00535         index = stored_state.index + 1;
00536       }
00537     } else {
00538       // Check to see if there is something to return
00539       //
00540       //  Note: we check for something to return before a descending
00541       //    a level so that when we come back up we will
00542       //    be done with this index
00543       //
00544       if (current_el->branch_array[index] != NULL && first_element == false) {
00545         r = current_el->branch_array[index];
00546         s->cur_level = level;
00547         s->cur_index = index;
00548         s->cur_el = current_el;
00549         break;
00550       } else if (current_el->next_level[index] != NULL) {
00551         // There is a lower level to iterate over, store our
00552         //   current state and descend
00553         stored_state.ptr = current_el;
00554         stored_state.index = index;
00555         s->q(level) = stored_state;
00556         current_el = current_el->next_level[index];
00557         index = 0;
00558         level++;
00559       } else {
00560         // Nothing here so advance to next index
00561         index++;
00562       }
00563     }
00564     first_element = false;
00565   }
00566 
00567   return r;
00568 }
00569 
00570 // class hostArray
00571 //
00572 //   Is a fixed size array for holding HostBrach*
00573 //   Allows only sequential access to data
00574 //
00575 
00576 // Since the only iter state is an index into the
00577 //   array typedef it
00578 typedef int hostArrayIterState;
00579 
00580 class hostArray
00581 {
00582 public:
00583   hostArray();
00584   ~hostArray();
00585   bool Insert(const char *match_data_in, HostBranch * toInsert);
00586   HostBranch *Lookup(const char *match_data_in, bool bNotProcess);
00587   HostBranch *iter_first(hostArrayIterState * s, char **key = NULL);
00588   HostBranch *iter_next(hostArrayIterState * s, char **key = NULL);
00589 private:
00590   int num_el;                   // number of elements currently in the array
00591   HostBranch *branch_array[HOST_ARRAY_MAX];
00592   char *match_data[HOST_ARRAY_MAX];
00593 };
00594 
00595 hostArray::hostArray():num_el(0)
00596 {
00597   memset(branch_array, 0, sizeof(branch_array));
00598   memset(match_data, 0, sizeof(match_data));
00599 }
00600 
00601 hostArray::~hostArray()
00602 {
00603   for (int i = 0; i < num_el; i++) {
00604     ink_assert(branch_array[i] != NULL);
00605     ink_assert(match_data[i] != NULL);
00606     ats_free(match_data[i]);
00607   }
00608 }
00609 
00610 // bool hostArray::Insert(const char* match_data_in, HostBranch* toInsert)
00611 //
00612 //    Places toInsert into the array with key match_data if there
00613 //     is adequate space, in which case true is returned
00614 //    If space is inadequate, false is returned and nothing is inserted
00615 //
00616 bool
00617 hostArray::Insert(const char *match_data_in, HostBranch * toInsert)
00618 {
00619   if (num_el >= HOST_ARRAY_MAX) {
00620     return false;
00621   } else {
00622     branch_array[num_el] = toInsert;
00623     match_data[num_el] = ats_strdup(match_data_in);
00624     num_el++;
00625     return true;
00626   }
00627 }
00628 
00629 // HostBranch* hostArray::Lookup(const char* match_data_in)
00630 //
00631 //   Looks for key match_data_in.  If a binding is found,
00632 //     returns HostBranch* found to the key, otherwise
00633 //     NULL is returned
00634 //
00635 HostBranch *
00636 hostArray::Lookup(const char *match_data_in, bool bNotProcess)
00637 {
00638   HostBranch *r = NULL;
00639   char *pMD;
00640 
00641   for (int i = 0; i < num_el; i++) {
00642     pMD = match_data[i];
00643 
00644     if (bNotProcess && '!' == *pMD) {
00645 
00646       char *cp = ++pMD;
00647       if ('\0' == *cp)
00648         continue;
00649 
00650       if (strcmp(cp, match_data_in) != 0) {
00651         r = branch_array[i];
00652       } else {
00653         continue;
00654       }
00655     }
00656 
00657     else if (strcmp(match_data[i], match_data_in) == 0) {
00658       r = branch_array[i];
00659       break;
00660     }
00661   }
00662   return r;
00663 }
00664 
00665 // HostBranch* hostArray::iter_first(hostArrayIterState* s) {
00666 //
00667 //   Initilizes s and returns the first element or
00668 //     NULL if no elements exist
00669 //
00670 HostBranch *
00671 hostArray::iter_first(hostArrayIterState * s, char **key)
00672 {
00673   *s = -1;
00674   return iter_next(s, key);
00675 }
00676 
00677 // HostBranch* hostArray::iter_next(hostArrayIterState* s) {
00678 //
00679 //    Returns the next element in the hostArray or
00680 //      NULL if none exist
00681 //
00682 HostBranch *
00683 hostArray::iter_next(hostArrayIterState * s, char **key)
00684 {
00685   (*s)++;
00686 
00687   if (*s < num_el) {
00688     if (key != NULL) {
00689       *key = match_data[*s];
00690     }
00691     return branch_array[*s];
00692   } else {
00693     return NULL;
00694   }
00695 }
00696 
00697 // maps enum LeafType to strings
00698 const char *LeafTypeStr[] = {
00699   "Leaf Invalid",
00700   "Host (Partial)",
00701   "Host (Full)",
00702   "Domain (Full)",
00703   "Domain (Partial)"
00704 };
00705 
00706 static int negative_one = -1;
00707 
00708 HostBranch::HostBranch():level(0), type(HOST_TERMINAL), next_level(NULL), leaf_indexs(&negative_one, 1)
00709 {
00710 }
00711 
00712 // HostBranch::~HostBranch()
00713 //
00714 //   Recursive delete all host branches below us
00715 //
00716 HostBranch::~HostBranch()
00717 {
00718 
00719   // Hash Iteration
00720   InkHashTable *ht;
00721   InkHashTableIteratorState ht_iter;
00722   InkHashTableEntry *ht_entry = NULL;
00723 
00724   // charIndex Iteration
00725   charIndexIterState ci_iter;
00726   charIndex *ci;
00727 
00728   // hostArray Iteration
00729   hostArray *ha;
00730   hostArrayIterState ha_iter;
00731 
00732   HostBranch *lower_branch;
00733 
00734   switch (type) {
00735   case HOST_TERMINAL:
00736     ink_assert(next_level == NULL);
00737     break;
00738   case HOST_HASH:
00739     ink_assert(next_level != NULL);
00740     ht = (InkHashTable *) next_level;
00741     ht_entry = ink_hash_table_iterator_first(ht, &ht_iter);
00742 
00743     while (ht_entry != NULL) {
00744       lower_branch = (HostBranch *) ink_hash_table_entry_value(ht, ht_entry);
00745       delete lower_branch;
00746       ht_entry = ink_hash_table_iterator_next(ht, &ht_iter);
00747     }
00748     ink_hash_table_destroy(ht);
00749     break;
00750   case HOST_INDEX:
00751     ink_assert(next_level != NULL);
00752     ci = (charIndex *) next_level;
00753     lower_branch = ci->iter_first(&ci_iter);
00754     while (lower_branch != NULL) {
00755       delete lower_branch;
00756       lower_branch = ci->iter_next(&ci_iter);
00757     }
00758     delete ci;
00759     break;
00760   case HOST_ARRAY:
00761     ink_assert(next_level != NULL);
00762     ha = (hostArray *) next_level;
00763     lower_branch = ha->iter_first(&ha_iter);
00764     while (lower_branch != NULL) {
00765       delete lower_branch;
00766       lower_branch = ha->iter_next(&ha_iter);
00767     }
00768     delete ha;
00769     break;
00770   }
00771 }
00772 
00773 HostLookup::HostLookup(const char *name):
00774 leaf_array(NULL),
00775 array_len(-1),
00776 num_el(-1),
00777 matcher_name(name)
00778 {
00779 
00780   root = new HostBranch;
00781   root->level = 0;
00782   root->type = HOST_TERMINAL;
00783   root->next_level = NULL;
00784 }
00785 
00786 HostLookup::~HostLookup()
00787 {
00788 
00789   if (leaf_array != NULL) {
00790     // Free up the match strings
00791     for (int i = 0; i < num_el; i++) {
00792       ats_free(leaf_array[i].match);
00793     }
00794     delete[]leaf_array;
00795   }
00796 
00797   delete root;
00798 }
00799 
00800 static void
00801 empty_print_fn(void */* opaque_data ATS_UNUSED */)
00802 {
00803 }
00804 
00805 void
00806 HostLookup::Print()
00807 {
00808   Print(empty_print_fn);
00809 }
00810 
00811 void
00812 HostLookup::Print(HostLookupPrintFunc f)
00813 {
00814   PrintHostBranch(root, f);
00815 }
00816 
00817 //
00818 // void HostLookup::PrintHostBranch(HostBranch* hb, HostLookupPrintFunc f)
00819 //
00820 //   Recursively traverse the matching tree rooted at arg hb
00821 //     and print out each element
00822 //
00823 void
00824 HostLookup::PrintHostBranch(HostBranch * hb, HostLookupPrintFunc f)
00825 {
00826 
00827   // Hash iteration
00828   InkHashTable *ht;
00829   InkHashTableIteratorState ht_iter;
00830   InkHashTableEntry *ht_entry = NULL;
00831 
00832   // charIndex Iteration
00833   charIndexIterState ci_iter;
00834   charIndex *ci;
00835 
00836   // hostArray Iteration
00837   hostArray *h_array;
00838   hostArrayIterState ha_iter;
00839 
00840   HostBranch *lower_branch;
00841   intptr_t curIndex;
00842   intptr_t i;                        // Loop var
00843 
00844   for (i = 0; i < hb->leaf_indexs.length(); i++) {
00845     curIndex = hb->leaf_indexs[i];
00846     printf("\t\t%s for %s\n", LeafTypeStr[leaf_array[curIndex].type], leaf_array[curIndex].match);
00847     f(leaf_array[curIndex].opaque_data);
00848   }
00849 
00850   switch (hb->type) {
00851   case HOST_TERMINAL:
00852     ink_assert(hb->next_level == NULL);
00853     break;
00854   case HOST_HASH:
00855     ink_assert(hb->next_level != NULL);
00856     ht = (InkHashTable *) hb->next_level;
00857     ht_entry = ink_hash_table_iterator_first(ht, &ht_iter);
00858 
00859     while (ht_entry != NULL) {
00860       lower_branch = (HostBranch *) ink_hash_table_entry_value(ht, ht_entry);
00861       PrintHostBranch(lower_branch, f);
00862       ht_entry = ink_hash_table_iterator_next(ht, &ht_iter);
00863     }
00864     break;
00865   case HOST_INDEX:
00866     ink_assert(hb->next_level != NULL);
00867     ci = (charIndex *) hb->next_level;
00868     lower_branch = ci->iter_first(&ci_iter);
00869     while (lower_branch != NULL) {
00870       PrintHostBranch(lower_branch, f);
00871       lower_branch = ci->iter_next(&ci_iter);
00872     }
00873     break;
00874   case HOST_ARRAY:
00875     ink_assert(hb->next_level != NULL);
00876     h_array = (hostArray *) hb->next_level;
00877     lower_branch = h_array->iter_first(&ha_iter);
00878     while (lower_branch != NULL) {
00879       PrintHostBranch(lower_branch, f);
00880       lower_branch = h_array->iter_next(&ha_iter);
00881     }
00882     break;
00883   }
00884 }
00885 
00886 //
00887 // HostBranch* HostLookup::TableNewLevel(HostBranch* from, const char* level_data)
00888 //
00889 //    Creates the next level structure in arg from
00890 //       Creates a HostBranch for level_data, inserts it in to the
00891 //         the next_level structure and returns a pointer to the new
00892 //         HostBranch
00893 //
00894 HostBranch *
00895 HostLookup::TableNewLevel(HostBranch * from, const char *level_data)
00896 {
00897   hostArray *new_ha_table;
00898   charIndex *new_ci_table;
00899 
00900   ink_assert(from->type == HOST_TERMINAL);
00901 
00902   // Use the charIndex for high speed matching at the first level of
00903   //   the table.  The first level is short strings, ie: com, edu, jp, fr
00904   if (from->level == 0) {
00905     new_ci_table = new charIndex;
00906     from->type = HOST_INDEX;
00907     from->next_level = new_ci_table;
00908   } else {
00909     new_ha_table = new hostArray;
00910     from->type = HOST_ARRAY;
00911     from->next_level = new_ha_table;
00912   }
00913 
00914   return InsertBranch(from, level_data);
00915 }
00916 
00917 
00918 //
00919 // HostBranch* HostLookup::InsertBranch(HostBranch* insert_to, const char* level_data)
00920 //
00921 //
00922 //    Abstrction to place a new node for level_data below node
00923 //      insert to.  Inserts into any of the data types used by
00924 //      by class HostMatcher
00925 //
00926 HostBranch *
00927 HostLookup::InsertBranch(HostBranch * insert_in, const char *level_data)
00928 {
00929 
00930   // Variables for moving an array into a hash table after it
00931   //   gets too big
00932   //
00933   hostArray *ha;
00934   hostArrayIterState ha_iter;
00935   HostBranch *tmp;
00936   char *key = NULL;
00937   InkHashTable *new_ht;
00938 
00939 
00940   HostBranch *new_branch = new HostBranch;
00941   new_branch->type = HOST_TERMINAL;
00942   new_branch->level = insert_in->level + 1;
00943   new_branch->next_level = NULL;
00944 
00945   switch (insert_in->type) {
00946   case HOST_TERMINAL:
00947     // Should not happen
00948     ink_release_assert(0);
00949     break;
00950   case HOST_HASH:
00951     ink_hash_table_insert((InkHashTable *) insert_in->next_level, (char *) level_data, new_branch);
00952     break;
00953   case HOST_INDEX:
00954     ((charIndex *) insert_in->next_level)->Insert(level_data, new_branch);
00955     break;
00956   case HOST_ARRAY:
00957     if (((hostArray *) insert_in->next_level)->Insert(level_data, new_branch) == false) {
00958 
00959       // The array is out of space, time to move to a hash table
00960       ha = (hostArray *) insert_in->next_level;
00961       new_ht = ink_hash_table_create(InkHashTableKeyType_String);
00962       ink_hash_table_insert(new_ht, (char *) level_data, new_branch);
00963 
00964       // Iterate through the existing elements in the array and
00965       //  stuff them into the hash table
00966       tmp = ha->iter_first(&ha_iter, &key);
00967       ink_assert(tmp != NULL);
00968       while (tmp != NULL) {
00969         ink_assert(key != NULL);
00970         ink_hash_table_insert(new_ht, key, tmp);
00971         tmp = ha->iter_next(&ha_iter, &key);
00972       }
00973 
00974       // Ring out the old, ring in the new
00975       delete ha;
00976       insert_in->next_level = new_ht;
00977       insert_in->type = HOST_HASH;
00978     }
00979     break;
00980 
00981   }
00982 
00983   return new_branch;
00984 }
00985 
00986 // HostBranch* HostLookup::FindNextLevel(HostBranch* from,
00987 //                                       const char* level_data)
00988 //
00989 //   Searches in the branch for  the next level down in the search
00990 //     structure bound to level data
00991 //   If found returns a pointer to the next level HostBranch,
00992 //    otherwise returns NULL
00993 //
00994 HostBranch *
00995 HostLookup::FindNextLevel(HostBranch * from, const char *level_data, bool bNotProcess)
00996 {
00997 
00998   HostBranch *r = NULL;
00999   InkHashTable *hash;
01000   charIndex *ci_table;
01001   hostArray *ha_table;
01002   void *lookup;
01003 
01004   switch (from->type) {
01005   case HOST_TERMINAL:
01006     // Should not happen
01007     ink_assert(0);
01008     return NULL;
01009   case HOST_HASH:
01010     hash = (InkHashTable *) from->next_level;
01011     ink_assert(hash != NULL);
01012     if (ink_hash_table_lookup(hash, (char *) level_data, &lookup)) {
01013       r = (HostBranch *) lookup;
01014     } else {
01015       r = NULL;
01016     }
01017     break;
01018   case HOST_INDEX:
01019     ci_table = (charIndex *) from->next_level;
01020     ink_assert(ci_table != NULL);
01021     r = ci_table->Lookup(level_data);
01022     break;
01023   case HOST_ARRAY:
01024     ha_table = (hostArray *) from->next_level;
01025     ink_assert(ha_table != NULL);
01026     r = ha_table->Lookup(level_data, bNotProcess);
01027     break;
01028   }
01029   return r;
01030 }
01031 
01032 // void HostLookup::TableInsert(const char* match_data, int index)
01033 //
01034 //   Insert the match data, index pair into domain/search table
01035 //
01036 //   arg index is the index into both Data and HostLeaf array for
01037 //    the elements corresponding to match_data
01038 //
01039 void
01040 HostLookup::TableInsert(const char *match_data, int index, bool domain_record)
01041 {
01042   HostBranch *cur = this->root;
01043   HostBranch *next;
01044   char *match_copy = ats_strdup(match_data);
01045   Tokenizer match_tok(".");
01046   int numTok;
01047   int i;
01048 
01049   LowerCaseStr(match_copy);
01050   numTok = match_tok.Initialize(match_copy, SHARE_TOKS);
01051 
01052   // Traverse down the search structure until we either
01053   //       Get beyond the fixed number depth of the host table
01054   //  OR   We reach the level where the match stops
01055   //
01056   for (i = 0; i < HOST_TABLE_DEPTH; i++) {
01057 
01058     // Check to see we need to stop at the current level
01059     if (numTok == cur->level) {
01060       break;
01061     }
01062 
01063     if (cur->next_level == NULL) {
01064       cur = TableNewLevel(cur, match_tok[numTok - i - 1]);
01065     } else {
01066       next = FindNextLevel(cur, match_tok[numTok - i - 1]);
01067       if (next == NULL) {
01068         cur = InsertBranch(cur, match_tok[numTok - i - 1]);
01069       } else {
01070         cur = next;
01071       }
01072     }
01073   }
01074 
01075   // Update the leaf type.  There are three types:
01076   //     HOST_PARTIAL - Indicates that part of the hostname name
01077   //         was not matched by traversin the search structure since
01078   //         it had too elements.  A comparison must be done at the
01079   //         leaf node to make sure we have a match
01080   //     HOST_COMPLETE - Indicates that the entire domain name
01081   //         in the match_data was matched by traversing the search
01082   //         structure, no further comparison is necessary
01083   //
01084   //     DOMAIN_COMPLETE - Indicates that the entire domain name
01085   //         in the match_data was matched by traversing the search
01086   //         structure, no further comparison is necessary
01087   //     DOMAIN_PARTIAL - Indicates that part of the domain name
01088   //         was not matched by traversin the search structure since
01089   //         it had too elements.  A comparison must be done at the
01090   //         leaf node to make sure we have a match
01091   if (domain_record == false) {
01092     if (numTok > HOST_TABLE_DEPTH) {
01093       leaf_array[index].type = HOST_PARTIAL;
01094     } else {
01095       leaf_array[index].type = HOST_COMPLETE;
01096     }
01097   } else {
01098     if (numTok > HOST_TABLE_DEPTH) {
01099       leaf_array[index].type = DOMAIN_PARTIAL;
01100     } else {
01101       leaf_array[index].type = DOMAIN_COMPLETE;
01102     }
01103   }
01104 
01105   // Place the index in to leaf array into the match list for this
01106   //   HOST_BRANCH
01107   cur->leaf_indexs(cur->leaf_indexs.length()) = index;
01108 
01109   ats_free(match_copy);
01110 }
01111 
01112 // bool HostLookup::MatchArray(HostLookupState* s, void**opaque_ptr, DynArray<int>& array,
01113 //                             bool host_done)
01114 //
01115 //  Helper function to iterate throught arg array and update Result
01116 //    for each element in arg array
01117 //
01118 //  host_done should be passed as true if this call represents the all fields
01119 //     in the matched against hostname being consumed.  Example: for www.example.com
01120 //     this would be true for the call matching against the "www", but
01121 //     neither of the prior two fields, "inktomi" and "com"
01122 //
01123 
01124 bool
01125 HostLookup::MatchArray(HostLookupState * s, void **opaque_ptr, DynArray<int>&array, bool host_done)
01126 {
01127   intptr_t index;
01128   intptr_t i;
01129 
01130   for (i = s->array_index + 1; i < array.length(); i++) {
01131     index = array[i];
01132 
01133     switch (leaf_array[index].type) {
01134     case HOST_PARTIAL:
01135       if (hostcmp(s->hostname, leaf_array[index].match) == 0) {
01136         *opaque_ptr = leaf_array[index].opaque_data;
01137         s->array_index = i;
01138         return true;
01139       }
01140       break;
01141     case HOST_COMPLETE:
01142       // We have to have consumed the whole hostname for this to match
01143       //   so that we do not match a rule for "example.com" to
01144       //   "www.example.com
01145       //
01146       if (host_done == true) {
01147         *opaque_ptr = leaf_array[index].opaque_data;
01148         s->array_index = i;
01149         return true;
01150       }
01151       break;
01152     case DOMAIN_PARTIAL:
01153       if (domaincmp(s->hostname, leaf_array[index].match) == false) {
01154         break;
01155       }
01156       // FALL THROUGH
01157     case DOMAIN_COMPLETE:
01158       *opaque_ptr = leaf_array[index].opaque_data;
01159       s->array_index = i;
01160       return true;
01161     case LEAF_INVALID:
01162       // Should not get here
01163       ink_assert(0);
01164       break;
01165     }
01166   }
01167 
01168   s->array_index = i;
01169   return false;
01170 }
01171 
01172 
01173 // bool HostLookup::MatchFirst(const char* host, HostLookupState* s, void** opaque_ptr)
01174 //
01175 //
01176 bool
01177 HostLookup::MatchFirst(const char *host, HostLookupState * s, void **opaque_ptr)
01178 {
01179   char *last_dot = NULL;
01180 
01181   s->cur = root;
01182   s->table_level = 0;
01183   s->array_index = -1;
01184   s->hostname = host ? host : "";
01185   s->host_copy = ats_strdup(s->hostname);
01186   LowerCaseStr(s->host_copy);
01187 
01188   // Find the top level domain in the host copy
01189   s->host_copy_next = s->host_copy;
01190   while (*s->host_copy_next != '\0') {
01191     if (*s->host_copy_next == '.') {
01192       last_dot = s->host_copy_next;
01193     }
01194     s->host_copy_next++;
01195   }
01196 
01197   if (last_dot == NULL) {
01198     // Must be an unqualified hostname, no dots
01199     s->host_copy_next = s->host_copy;
01200   } else {
01201     s->host_copy_next = last_dot + 1;
01202   }
01203 
01204   return MatchNext(s, opaque_ptr);
01205 }
01206 
01207 // bool HostLookup::MatchNext(HostLookupState* s, void** opaque_ptr)
01208 //
01209 //  Searches our tree and updates argresult for each element matching
01210 //    arg hostname
01211 //
01212 bool
01213 HostLookup::MatchNext(HostLookupState * s, void **opaque_ptr)
01214 {
01215   HostBranch *cur = s->cur;
01216 
01217   // Check to see if there is any work to be done
01218   if (num_el <= 0) {
01219     return false;
01220   }
01221 
01222   while (s->table_level <= HOST_TABLE_DEPTH) {
01223 
01224     if (MatchArray(s, opaque_ptr, cur->leaf_indexs, (s->host_copy_next == NULL))) {
01225       return true;
01226     }
01227     // Check to see if we run out of tokens in the hostname
01228     if (s->host_copy_next == NULL) {
01229       break;
01230     }
01231     // Check to see if there are any lower levels
01232     if (cur->type == HOST_TERMINAL) {
01233       break;
01234     }
01235 
01236     cur = FindNextLevel(cur, s->host_copy_next, true);
01237 
01238     if (cur == NULL) {
01239       break;
01240     } else {
01241       s->cur = cur;
01242       s->array_index = -1;
01243       s->table_level++;
01244 
01245       // Find the next part of the hostname to process
01246       if (s->host_copy_next <= s->host_copy) {
01247         // Nothing left
01248         s->host_copy_next = NULL;
01249       } else {
01250 
01251         // Back up to period ahead of us and axe it
01252         s->host_copy_next--;
01253         ink_assert(*s->host_copy_next == '.');
01254         *s->host_copy_next = '\0';
01255 
01256         s->host_copy_next--;
01257 
01258         while (1) {
01259 
01260           if (s->host_copy_next <= s->host_copy) {
01261             s->host_copy_next = s->host_copy;
01262             break;
01263           }
01264           // Check for our stop.  If we hit a period, we want
01265           //  the our portion of the hostname starts one character
01266           //  after it
01267           if (*s->host_copy_next == '.') {
01268             s->host_copy_next++;
01269             break;
01270           }
01271 
01272           s->host_copy_next--;
01273         }
01274       }
01275     }
01276   }
01277 
01278   return false;
01279 }
01280 
01281 // void HostLookup::AllocateSpace(int num_entries)
01282 //
01283 //   Allocate the leaf array structure
01284 //
01285 void
01286 HostLookup::AllocateSpace(int num_entries)
01287 {
01288   // Should not have been allocated before
01289   ink_assert(array_len == -1);
01290 
01291   leaf_array = new HostLeaf[num_entries];
01292   memset(leaf_array, 0, sizeof(HostLeaf) * num_entries);
01293 
01294   array_len = num_entries;
01295   num_el = 0;
01296 }
01297 
01298 // void HostLookup::NewEntry(const char* match_data, bool domain_record, void* opaque_data_in)
01299 //
01300 //   Insert a new element in to the table
01301 //
01302 void
01303 HostLookup::NewEntry(const char *match_data, bool domain_record, void *opaque_data_in)
01304 {
01305 
01306   // Make sure space has been allocated
01307   ink_assert(num_el >= 0);
01308   ink_assert(array_len >= 0);
01309 
01310   // Make sure we do not overrun the array;
01311   ink_assert(num_el < array_len);
01312 
01313   leaf_array[num_el].match = ats_strdup(match_data);
01314   leaf_array[num_el].opaque_data = opaque_data_in;
01315 
01316   if ('!' != *(leaf_array[num_el].match)) {
01317     leaf_array[num_el].len = strlen(match_data);
01318     leaf_array[num_el].isNot = false;
01319   } else {
01320     leaf_array[num_el].len = strlen(match_data) - 1;
01321     leaf_array[num_el].isNot = true;
01322   }
01323 
01324   TableInsert(match_data, num_el, domain_record);
01325   num_el++;
01326 }

Generated by  doxygen 1.7.1