• Main Page
  • Related Pages
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

ControlMatcher.h

Go to the documentation of this file.
00001 /** @file
00002 
00003   A brief file description
00004 
00005   @section license License
00006 
00007   Licensed to the Apache Software Foundation (ASF) under one
00008   or more contributor license agreements.  See the NOTICE file
00009   distributed with this work for additional information
00010   regarding copyright ownership.  The ASF licenses this file
00011   to you under the Apache License, Version 2.0 (the
00012   "License"); you may not use this file except in compliance
00013   with the License.  You may obtain a copy of the License at
00014 
00015       http://www.apache.org/licenses/LICENSE-2.0
00016 
00017   Unless required by applicable law or agreed to in writing, software
00018   distributed under the License is distributed on an "AS IS" BASIS,
00019   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020   See the License for the specific language governing permissions and
00021   limitations under the License.
00022  */
00023 
00024 /*****************************************************************************
00025  *
00026  *  ControlMatcher.h - Interface to general purpose matcher
00027  *
00028  *
00029  *
00030  *
00031  *  Description:
00032  *
00033  *     The control matcher module provides the ability to lookup arbitrary
00034  *  information specific to a URL and IP address.  The outside
00035  *  world only sees the ControlMatcher class which parses the relevant
00036  *  configuration file and builds the lookup table
00037  *
00038  *     Four types of matched are supported: hostname, domain name, ip address
00039  *  and URL regex.  For these four types, three lookup tables are used.  Regex and
00040  *  ip lookups have there own tables and host and domain lookups share a single
00041  *  table
00042  *
00043  *  Module Purpose & Specifications
00044  *  -------------------------------
00045  *   -  to provide a generic mechanism for matching configuration data
00046  *       against hostname, domain name, ip address and URL regex
00047  *   -  the generic mechanism should require minimum effort to apply it
00048  *       to new features that require per request matching
00049  *   -  for the mechanism to be efficient such that lookups against
00050  *       the tables are not a performance problem when they are both done
00051  *       for every request through the proxy and set of matching
00052  *       is very large
00053  *
00054  *  Lookup Table Descriptions
00055  *  -------------------------
00056  *
00057  *   regex table - implemented as a linear list of regular expressions to
00058  *       match against
00059  *
00060  *   host/domain table - The host domain table is logically implemented as
00061  *       tree, broken up at each partition in a hostname.  Three mechanism
00062  *       are used to move from one level to the next: a hash table, a fixed
00063  *       sized array and a constant time index (class charIndex).  The constant
00064  *       time index is only used to from the root domain to the first
00065  *       level partition (ie: .com). The fixed array is used for subsequent
00066  *       paritions until the fan out exceeds the arrays fixed size at which
00067  *       time, the fixed array is converted to a hash table
00068  *
00069  *   ip table - supports ip ranges.  A single ip address is treated as
00070  *       a range with the same beginning and end address.  The table is
00071  *       is devided up into a fixed number of  levels, indexed 8 bit
00072  *       boundaries, starting at the the high bit of the address.  Subsequent
00073  *       levels are allocated only when needed.
00074  *
00075  ****************************************************************************/
00076 
00077 //
00078 // IMPORTANT: Instantiating these templates
00079 //
00080 //    The Implementation for these templates appears in
00081 //     ControlMatcher.cc   To get the templates instantiated
00082 //     correctly on all compilers new uses MUST explicitly
00083 //     instantiate the new instance at the bottom of
00084 //     ControlMatcher.cc
00085 //
00086 
00087 #ifndef _CONTROL_MATCHER_H_
00088 #define _CONTROL_MATCHER_H_
00089 
00090 #ifdef HAVE_PCRE_PCRE_H
00091 #include <pcre/pcre.h>
00092 #else
00093 #include <pcre.h>
00094 #endif
00095 
00096 #ifdef HAVE_CTYPE_H
00097 #include <ctype.h>
00098 #endif
00099 
00100 #include "DynArray.h"
00101 #include <ts/IpMap.h>
00102 
00103 #include "ink_defs.h"
00104 #include "HTTP.h"
00105 #include "ink_apidefs.h"
00106 
00107 class HostLookup;
00108 struct _HttpApiInfo;
00109 struct matcher_line;
00110 struct matcher_tags;
00111 
00112 struct RequestData
00113 {
00114 public:
00115   // First three are the lookup keys to the tables
00116   //  get_ip() can be either client_ip or server_ip
00117   //  depending on how the module user wants to key
00118   //  the table
00119   virtual ~ RequestData()
00120   {
00121   }
00122   virtual char *get_string() = 0;
00123   virtual const char *get_host() = 0;
00124   virtual sockaddr const* get_ip() = 0;
00125 
00126   virtual sockaddr const* get_client_ip() = 0;
00127 
00128   enum RD_Type
00129   {
00130     RD_NULL,
00131     RD_HTTP,
00132     RD_CONGEST_ENTRY
00133   };
00134 
00135   virtual RD_Type data_type(void) { return RD_NULL; }
00136 };
00137 
00138 class HttpRequestData:public RequestData
00139 {
00140 public:
00141   inkcoreapi char *get_string();
00142   inkcoreapi const char *get_host();
00143   inkcoreapi sockaddr const* get_ip();
00144   inkcoreapi sockaddr const* get_client_ip();
00145 
00146   HttpRequestData()
00147     : hdr(NULL), hostname_str(NULL), api_info(NULL), xact_start(0), incoming_port(0), tag(NULL)
00148   { 
00149     ink_zero(src_ip);
00150     ink_zero(dest_ip);
00151   }
00152 
00153   HTTPHdr *hdr;
00154   char *hostname_str;
00155   _HttpApiInfo *api_info;
00156   time_t xact_start;
00157   IpEndpoint src_ip;
00158   IpEndpoint dest_ip;
00159   uint16_t incoming_port;
00160   char *tag;
00161 };
00162 
00163 
00164 template<class Data, class Result> class UrlMatcher {
00165 public:
00166   UrlMatcher(const char *name, const char *filename);
00167   ~UrlMatcher();
00168   void Match(RequestData * rdata, Result * result);
00169   void AllocateSpace(int num_entries);
00170   char *NewEntry(matcher_line * line_info);
00171   void Print();
00172 
00173   int getNumElements() { return num_el; }
00174   Data *getDataArray() { return data_array; }
00175 
00176 protected:
00177   InkHashTable *url_ht;
00178   char **url_str;                // array of url strings
00179   int  *url_value;                // array of posion of url strings
00180   Data *data_array;             // data array.  Corresponds to re_array
00181   int array_len;                // length of the arrays (all three are the same length)
00182   int num_el;                   // number of elements in the table
00183   const char *matcher_name;     // Used for Debug/Warning/Error messages
00184   const char *file_name;        // Used for Debug/Warning/Error messages
00185 };
00186 
00187 
00188 template<class Data, class Result> class RegexMatcher {
00189 public:
00190   RegexMatcher(const char *name, const char *filename);
00191   ~RegexMatcher();
00192   void Match(RequestData * rdata, Result * result);
00193   void AllocateSpace(int num_entries);
00194   char *NewEntry(matcher_line * line_info);
00195   void Print();
00196 
00197   int getNumElements() { return num_el; }
00198   Data *getDataArray() { return data_array; }
00199 
00200 protected:
00201   pcre** re_array;              // array of compiled regexs
00202   char **re_str;                // array of uncompiled regex strings
00203   Data *data_array;             // data array.  Corresponds to re_array
00204   int array_len;                // length of the arrays (all three are the same length)
00205   int num_el;                   // number of elements in the table
00206   const char *matcher_name;     // Used for Debug/Warning/Error messages
00207   const char *file_name;        // Used for Debug/Warning/Error messages
00208 };
00209 
00210 template<class Data, class Result> class HostRegexMatcher:public RegexMatcher<Data, Result> {
00211 public:
00212   HostRegexMatcher(const char *name, const char *filename);
00213   void Match(RequestData * rdata, Result * result);
00214 };
00215 
00216 template<class Data, class Result> class HostMatcher {
00217 public:
00218   HostMatcher(const char *name, const char *filename);
00219   ~HostMatcher();
00220   void Match(RequestData * rdata, Result * result);
00221   void AllocateSpace(int num_entries);
00222   char *NewEntry(matcher_line * line_info);
00223   void Print();
00224 
00225   int getNumElements() { return num_el; }
00226   Data *getDataArray() { return data_array; }
00227   HostLookup *getHLookup() { return host_lookup; }
00228 
00229 private:
00230   static void PrintFunc(void *opaque_data);
00231   HostLookup *host_lookup;      // Data structure to do the lookups
00232   Data *data_array;             // array of all data items
00233   int array_len;                // the length of the arrays
00234   int num_el;                   // the numbe of itmems in the tree
00235   const char *matcher_name;     // Used for Debug/Warning/Error messages
00236   const char *file_name;        // Used for Debug/Warning/Error messages
00237 };
00238 
00239 template<class Data, class Result> class IpMatcher {
00240 public:
00241   IpMatcher(const char *name, const char *filename);
00242   ~IpMatcher();
00243   void Match(sockaddr const* ip_addr, RequestData * rdata, Result * result);
00244   void AllocateSpace(int num_entries);
00245   char *NewEntry(matcher_line * line_info);
00246   void Print();
00247 
00248   int getNumElements() { return num_el; }
00249   Data *getDataArray() { return data_array; }
00250 
00251   static void PrintFunc(void *opaque_data);
00252   IpMap ip_map;                 // Data structure to do lookups
00253   Data *data_array;             // array of the data lements with in the table
00254   int array_len;                // size of the arrays
00255   int num_el;                   // number of elements in the table
00256   const char *matcher_name;     // Used for Debug/Warning/Error messages
00257   const char *file_name;        // Used for Debug/Warning/Error messages
00258 };
00259 
00260 
00261 #define ALLOW_HOST_TABLE   1 << 0
00262 #define ALLOW_IP_TABLE     1 << 1
00263 #define ALLOW_REGEX_TABLE  1 << 2
00264 #define ALLOW_HOST_REGEX_TABLE 1 << 3
00265 #define ALLOW_URL_TABLE 1 << 4
00266 #define DONT_BUILD_TABLE     1 << 5     // for testing
00267 
00268 template<class Data, class Result> class ControlMatcher {
00269 public:
00270   // Parameter name must not be deallocated before this
00271   //  object is
00272   ControlMatcher(const char *file_var, const char *name, const matcher_tags * tags,
00273                  int flags_in = (ALLOW_HOST_TABLE | ALLOW_IP_TABLE | ALLOW_REGEX_TABLE |
00274                                  ALLOW_HOST_REGEX_TABLE | ALLOW_URL_TABLE));
00275   ~ControlMatcher();
00276   int BuildTable();
00277   int BuildTableFromString(char *str);
00278   void Match(RequestData * rdata, Result * result);
00279   void Print();
00280 
00281   int getEntryCount() { return m_numEntries; }
00282   HostMatcher<Data, Result> *getHostMatcher() { return hostMatch; }
00283   RegexMatcher<Data, Result> *getReMatcher() { return reMatch; }
00284   UrlMatcher<Data, Result> *getUrlMatcher() { return urlMatch; }
00285   IpMatcher<Data, Result> *getIPMatcher() { return ipMatch; }
00286   HostRegexMatcher<Data, Result> *getHrMatcher() { return hrMatch; }
00287 
00288   //private:
00289   RegexMatcher<Data, Result> *reMatch;
00290   UrlMatcher<Data, Result> *urlMatch;
00291   HostMatcher<Data, Result> *hostMatch;
00292   IpMatcher<Data, Result> *ipMatch;
00293   HostRegexMatcher<Data, Result> *hrMatch;
00294   const matcher_tags *config_tags;
00295   char config_file_path[PATH_NAME_MAX];
00296   int flags;
00297   int m_numEntries;
00298   const char *matcher_name;     // Used for Debug/Warning/Error messages
00299 };
00300 
00301 #endif /* _CONTROL_MATCHER_H_ */

Generated by  doxygen 1.7.1