• Main Page
  • Related Pages
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

LogFilter.h

Go to the documentation of this file.
00001 /** @file
00002 
00003   A brief file description
00004 
00005   @section license License
00006 
00007   Licensed to the Apache Software Foundation (ASF) under one
00008   or more contributor license agreements.  See the NOTICE file
00009   distributed with this work for additional information
00010   regarding copyright ownership.  The ASF licenses this file
00011   to you under the Apache License, Version 2.0 (the
00012   "License"); you may not use this file except in compliance
00013   with the License.  You may obtain a copy of the License at
00014 
00015       http://www.apache.org/licenses/LICENSE-2.0
00016 
00017   Unless required by applicable law or agreed to in writing, software
00018   distributed under the License is distributed on an "AS IS" BASIS,
00019   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020   See the License for the specific language governing permissions and
00021   limitations under the License.
00022  */
00023 
00024 
00025 #ifndef LOG_FILTER_H
00026 #define LOG_FILTER_H
00027 
00028 #include "libts.h"
00029 #include "IpMap.h"
00030 #include "LogAccess.h"
00031 #include "LogField.h"
00032 #include "LogFormat.h"
00033 
00034 /*-------------------------------------------------------------------------
00035   LogFilter
00036 
00037   This is an abstract base class from which particular filters can be
00038   derived.  Each filter must implement the "toss_this_entry" member
00039   function which, given a LogAccess object, returns true if
00040   the log entry is to be tossed out.
00041   -------------------------------------------------------------------------*/
00042 class LogFilter
00043 {
00044 public:
00045   enum Type
00046   {
00047     INT_FILTER = 0,
00048     STRING_FILTER,
00049     IP_FILTER,
00050     N_TYPES
00051   };
00052 
00053   enum Action
00054   {
00055     REJECT = 0,
00056     ACCEPT,
00057     WIPE_FIELD_VALUE,
00058     N_ACTIONS
00059   };
00060   static const char *ACTION_NAME[];
00061 
00062   // all operators "positive" (i.e., there is no NOMATCH operator anymore)
00063   // because one can specify through the "action" field if the record should
00064   // be kept or tossed away
00065   //
00066   enum Operator
00067   {
00068     MATCH = 0,
00069     CASE_INSENSITIVE_MATCH,
00070     CONTAIN,
00071     CASE_INSENSITIVE_CONTAIN,
00072     N_OPERATORS
00073   };
00074   static const char *OPERATOR_NAME[];
00075 
00076   LogFilter(const char *name, LogField * field, Action action, Operator oper);
00077   virtual ~LogFilter();
00078 
00079   char *name() const { return m_name; }
00080   Type type() const { return m_type; }
00081   size_t get_num_values() const { return m_num_values; };
00082 
00083   virtual bool toss_this_entry(LogAccess * lad) = 0;
00084   virtual bool wipe_this_entry(LogAccess * lad) = 0;
00085   virtual void display(FILE * fd = stdout) = 0;
00086   virtual void display_as_XML(FILE * fd = stdout) = 0;
00087 
00088   void reverse() { m_action = (m_action == REJECT ? ACCEPT : REJECT); }
00089 
00090 protected:
00091   char *m_name;
00092   LogField *m_field;
00093   Action m_action;              // the action this filter takes
00094   Operator m_operator;
00095   Type m_type;
00096   size_t m_num_values;          // the number of comparison values
00097 
00098 public:
00099   LINK(LogFilter, link);      // so we can create a LogFilterList
00100 
00101 private:
00102   // -- member functions that are not allowed --
00103   LogFilter();
00104   LogFilter(const LogFilter & rhs);
00105   LogFilter & operator=(LogFilter & rhs);
00106 };
00107 
00108 /*-------------------------------------------------------------------------
00109   LogFilterString
00110 
00111   Filter for string fields.
00112   -------------------------------------------------------------------------*/
00113 class LogFilterString:public LogFilter
00114 {
00115 public:
00116   LogFilterString(const char *name, LogField * field, Action a, Operator o, char *value);
00117   LogFilterString(const char *name, LogField * field, Action a, Operator o, size_t num_values, char **value);
00118   LogFilterString(const LogFilterString & rhs);
00119   ~LogFilterString();
00120   bool operator==(LogFilterString & rhs);
00121 
00122   bool toss_this_entry(LogAccess * lad);
00123   bool wipe_this_entry(LogAccess * lad);
00124   void display(FILE * fd = stdout);
00125   void display_as_XML(FILE * fd = stdout);
00126 
00127 private:
00128   char **m_value;               // the array of values
00129 
00130   // these are used to speed up case insensitive operations
00131   //
00132   char **m_value_uppercase;     // m_value in all uppercase
00133   size_t *m_length;             // length of m_value string
00134 
00135   void _setValues(size_t n, char **value);
00136 
00137   // note: OperatorFunction's must return 0 (zero) if condition is satisfied
00138   // (as strcmp does)
00139   typedef int (*OperatorFunction) (const char *, const char *);
00140 
00141   static int _isSubstring(const char *s0, const char *s1)
00142   {
00143     // return 0 if s1 is substring of s0 and 1 otherwise
00144     // this reverse behavior is to conform to the behavior of strcmp
00145     // which returns 0 if strings match
00146     return (strstr(s0, s1) == NULL ? 1 : 0);
00147   };
00148 
00149   enum LengthCondition
00150   {
00151     DATA_LENGTH_EQUAL = 0,
00152     DATA_LENGTH_LARGER
00153   };
00154 
00155   inline bool _checkCondition(OperatorFunction f,
00156                               const char *field_value, size_t field_value_length, char **val, LengthCondition lc);
00157 
00158   inline bool _checkConditionAndWipe(OperatorFunction f, char **field_value, size_t field_value_length, char **val,
00159                                      LengthCondition lc);
00160 
00161   // -- member functions that are not allowed --
00162   LogFilterString();
00163   LogFilterString & operator=(LogFilterString & rhs);
00164 };
00165 
00166 /*-------------------------------------------------------------------------
00167   LogFilterInt
00168 
00169   Filter for int fields.
00170   -------------------------------------------------------------------------*/
00171 class LogFilterInt:public LogFilter
00172 {
00173 public:
00174   LogFilterInt(const char *name, LogField * field, Action a, Operator o, int64_t value);
00175     LogFilterInt(const char *name, LogField * field, Action a, Operator o, size_t num_values, int64_t *value);
00176     LogFilterInt(const char *name, LogField * field, Action a, Operator o, char *values);
00177     LogFilterInt(const LogFilterInt & rhs);
00178    ~LogFilterInt();
00179   bool operator==(LogFilterInt & rhs);
00180 
00181   bool toss_this_entry(LogAccess * lad);
00182   bool wipe_this_entry(LogAccess * lad);
00183   void display(FILE * fd = stdout);
00184   void display_as_XML(FILE * fd = stdout);
00185 
00186 private:
00187   int64_t *m_value;            // the array of values
00188 
00189   void _setValues(size_t n, int64_t *value);
00190   int _convertStringToInt(char *val, int64_t *ival, LogFieldAliasMap * map);
00191 
00192   // -- member functions that are not allowed --
00193   LogFilterInt();
00194   LogFilterInt & operator=(LogFilterInt & rhs);
00195 };
00196 
00197 /*-------------------------------------------------------------------------
00198   LogFilterIP
00199   
00200   Filter for IP fields using IpAddr.
00201   -------------------------------------------------------------------------*/
00202 class LogFilterIP:public LogFilter
00203 {
00204 public:
00205   LogFilterIP(const char *name, LogField * field, Action a, Operator o, IpAddr value);
00206   LogFilterIP(const char *name, LogField * field, Action a, Operator o, size_t num_values,  IpAddr* value);
00207   LogFilterIP(const char *name, LogField * field, Action a, Operator o, char *values);
00208   LogFilterIP(const LogFilterIP & rhs);
00209   ~LogFilterIP();
00210 
00211   bool operator==(LogFilterIP & rhs);
00212 
00213   virtual bool toss_this_entry(LogAccess * lad);
00214   virtual bool wipe_this_entry(LogAccess * lad);
00215   void display(FILE * fd = stdout);
00216   void display_as_XML(FILE * fd = stdout);
00217 
00218 private:
00219   IpMap m_map;
00220 
00221   /// Initialization common to all constructors.
00222   void init();
00223 
00224   void displayRanges(FILE* fd);
00225   void displayRange(FILE* fd, IpMap::iterator const& iter);
00226 
00227   // Checks for a match on this filter.
00228   bool is_match(LogAccess* lad);
00229   
00230   // -- member functions that are not allowed --
00231   LogFilterIP();
00232   LogFilterIP & operator=(LogFilterIP & rhs);
00233 };
00234 
00235 bool filters_are_equal(LogFilter * filt1, LogFilter * filt2);
00236 
00237 
00238 /*-------------------------------------------------------------------------
00239   LogFilterList
00240   -------------------------------------------------------------------------*/
00241 class LogFilterList
00242 {
00243 public:
00244   LogFilterList();
00245   ~LogFilterList();
00246   bool operator==(LogFilterList &);
00247 
00248   void add(LogFilter * filter, bool copy = true);
00249   bool toss_this_entry(LogAccess * lad);
00250   bool wipe_this_entry(LogAccess * lad);
00251   LogFilter *find_by_name(char *name);
00252   void clear();
00253 
00254   LogFilter *first() const { return m_filter_list.head; }
00255   LogFilter *next(LogFilter * here) const { return (here->link).next; }
00256 
00257   unsigned count();
00258   void display(FILE * fd = stdout);
00259   void display_as_XML(FILE * fd = stdout);
00260 
00261   bool does_conjunction() const { return m_does_conjunction;  };
00262   void set_conjunction(bool c) { m_does_conjunction = c;  };
00263 
00264 private:
00265   Queue<LogFilter> m_filter_list;
00266 
00267   bool m_does_conjunction;
00268   // If m_does_conjunction = true
00269   // toss_this_entry returns true
00270   // if ANY filter tosses entry away.
00271   // If m_does_conjunction = false,
00272   // toss this entry returns true if
00273   // ALL filters toss away entry
00274 
00275   // -- member functions that are not allowed --
00276   LogFilterList(const LogFilterList & rhs);
00277   LogFilterList & operator=(const LogFilterList & rhs);
00278 };
00279 
00280 
00281 /*-------------------------------------------------------------------------
00282   Inline functions
00283   -------------------------------------------------------------------------*/
00284 
00285 /*-------------------------------------------------------------------------
00286   _checkCondition
00287 
00288   check all values for a matching condition
00289 
00290   the arguments to the function are:
00291 
00292   - a function f of type OperatorFunction that determines if the
00293     condition is true for a single filter value. Note that this function
00294     must return 0 if the condition is true.
00295   - the value of the field from the log record
00296   - the length of this field
00297   - the array of filter values to compare to note that we pass this as an
00298     argument because it can be either m_value or m_value_uppercase
00299   - a LengthCondition argument that determines if the length of the field value
00300     must be equal or larger to the length of the filter value (this is to
00301     compare strings only if really needed
00302     ------------------------------------------------------------------------*/
00303 
00304 inline bool
00305 LogFilterString::_checkCondition(OperatorFunction f,
00306                                  const char *field_value, size_t field_value_length, char **val, LengthCondition lc)
00307 {
00308   bool retVal = false;
00309 
00310   // make single value case a little bit faster by taking it out of loop
00311   //
00312   if (m_num_values == 1) {
00313     switch (lc) {
00314     case DATA_LENGTH_EQUAL:
00315       retVal = (field_value_length == *m_length ? ((*f) (field_value, *val) == 0 ? true : false) : false);
00316       break;
00317     case DATA_LENGTH_LARGER:
00318       retVal = (field_value_length > *m_length ? ((*f) (field_value, *val) == 0 ? true : false) : false);
00319       break;
00320     default:
00321       ink_assert(!"LogFilterString::checkCondition " "unknown LengthCondition");
00322     }
00323   } else {
00324     size_t i;
00325     switch (lc) {
00326     case DATA_LENGTH_EQUAL:
00327       for (i = 0; i < m_num_values; ++i) {
00328         // condition is satisfied if f returns zero
00329         if (field_value_length == m_length[i] && (*f) (field_value, val[i]) == 0) {
00330           retVal = true;
00331           break;
00332         }
00333       }
00334       break;
00335     case DATA_LENGTH_LARGER:
00336       for (i = 0; i < m_num_values; ++i) {
00337         // condition is satisfied if f returns zero
00338         if (field_value_length > m_length[i] && (*f) (field_value, val[i]) == 0) {
00339           retVal = true;
00340           break;
00341         }
00342       }
00343       break;
00344     default:
00345       ink_assert(!"LogFilterString::checkCondition " "unknown LengthCondition");
00346     }
00347   }
00348   return retVal;
00349 }
00350 
00351 /*---------------------------------------------------------------------------
00352   wipeField : Given a dest buffer, wipe the first occurance of the value of the
00353   field in the buffer.
00354 
00355 --------------------------------------------------------------------------*/
00356 static void
00357 wipeField(char** dest, char* field)
00358 {
00359 
00360   char* buf_dest  = *dest;
00361 
00362   if (buf_dest) {
00363 
00364     char* query_param = strstr(buf_dest, "?");
00365 
00366     if (!query_param) return;
00367 
00368     char* p1 = strstr(query_param, field);
00369 
00370     if (p1) {
00371       char tmp_text[strlen(buf_dest) + 10];
00372       char *temp_text = tmp_text;
00373       memcpy(temp_text, buf_dest, (p1 - buf_dest));
00374       temp_text += (p1 - buf_dest);
00375       char* p2 = strstr(p1, "=");
00376       if (p2) {
00377         p2++;
00378         memcpy(temp_text, p1, (p2 - p1));
00379         temp_text += (p2 - p1);
00380         char* p3 = strstr(p2, "&");
00381         if (p3) {
00382           for (int i=0; i<(p3 - p2); i++)
00383             temp_text[i] = 'X';
00384           temp_text += (p3 - p2);
00385           memcpy(temp_text, p3, ((buf_dest+strlen(buf_dest)) - p3));
00386         } else {
00387           for (int i=0; i<((buf_dest+strlen(buf_dest)) - p2); i++)
00388             temp_text[i] = 'X';
00389         }
00390       } else {
00391         return;
00392       }
00393 
00394       tmp_text[strlen(buf_dest)] = '\0';
00395       strcpy(*dest, tmp_text);
00396     }
00397   }
00398 }
00399 
00400 /*-------------------------------------------------------------------------
00401   _checkConditionAndWipe
00402 
00403   check all values for a matching condition and perform wipe action
00404 
00405   the arguments to the function are:
00406 
00407   - a function f of type OperatorFunction that determines if the
00408     condition is true for a single filter value. Note that this function
00409     must return 0 if the condition is true.
00410   - the value of the field from the log record
00411   - the length of this field
00412   - the array of filter values to compare to note that we pass this as an
00413     argument because it can be either m_value or m_value_uppercase
00414   - a LengthCondition argument that determines if the length of the field value
00415     must be equal or larger to the length of the filter value (this is to
00416     compare strings only if really needed
00417     ------------------------------------------------------------------------*/
00418 
00419 inline bool
00420 LogFilterString::_checkConditionAndWipe(OperatorFunction f, char **field_value, size_t field_value_length,
00421                                         char **val, LengthCondition lc)
00422 {
00423   bool retVal = false;
00424 
00425   if (m_action != WIPE_FIELD_VALUE) return false;
00426 
00427   // make single value case a little bit faster by taking it out of loop
00428   //
00429   if (m_num_values == 1) {
00430     switch (lc) {
00431     case DATA_LENGTH_EQUAL:
00432       retVal = (field_value_length == *m_length ? ((*f) (*field_value, *val) == 0 ? true : false) : false);
00433       if (retVal) {
00434         wipeField(field_value, *val);
00435       }
00436       break;
00437     case DATA_LENGTH_LARGER:
00438       retVal = (field_value_length > *m_length ? ((*f) (*field_value, *val) == 0 ? true : false) : false);
00439       if (retVal) {
00440         wipeField(field_value, *val);
00441       }
00442       break;
00443     default:
00444       ink_assert(!"LogFilterString::checkCondition " "unknown LengthCondition");
00445     }
00446   } else {
00447     size_t i;
00448     switch (lc) {
00449     case DATA_LENGTH_EQUAL:
00450       for (i = 0; i < m_num_values; ++i) {
00451         // condition is satisfied if f returns zero
00452         if (field_value_length == m_length[i] && (*f) (*field_value, val[i]) == 0) {
00453           retVal = true;
00454           wipeField(field_value, val[i]);
00455         }
00456       }
00457       break;
00458     case DATA_LENGTH_LARGER:
00459       for (i = 0; i < m_num_values; ++i) {
00460         // condition is satisfied if f returns zero
00461         if (field_value_length > m_length[i] && (*f) (*field_value, val[i]) == 0) {
00462           retVal = true;
00463           wipeField(field_value, val[i]);
00464         }
00465       }
00466       break;
00467     default:
00468       ink_assert(!"LogFilterString::checkConditionAndWipe " "unknown LengthConditionAndWipe");
00469     }
00470   }
00471   return retVal;
00472 }
00473 
00474 #endif

Generated by  doxygen 1.7.1