ApacheTrafficServer: MatcherUtils.cc Source File

Go to the documentation of this file.
00001 /** @file
00002 
00003   A brief file description
00004 
00005   @section license License
00006 
00007   Licensed to the Apache Software Foundation (ASF) under one
00008   or more contributor license agreements.  See the NOTICE file
00009   distributed with this work for additional information
00010   regarding copyright ownership.  The ASF licenses this file
00011   to you under the Apache License, Version 2.0 (the
00012   "License"); you may not use this file except in compliance
00013   with the License.  You may obtain a copy of the License at
00014 
00015       http://www.apache.org/licenses/LICENSE-2.0
00016 
00017   Unless required by applicable law or agreed to in writing, software
00018   distributed under the License is distributed on an "AS IS" BASIS,
00019   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020   See the License for the specific language governing permissions and
00021   limitations under the License.
00022  */
00023 
00024 /*****************************************************************************
00025  *
00026  *  MatcherUtils.cc - Various helper routines used in ControlMatcher
00027  *                    and ReverseProxy
00028  *
00029  *
00030  ****************************************************************************/
00031 
00032 #include "libts.h"      /* MAGIC_EDITING_TAG */
00033 
00034 // char* readIntoBuffer(const char* file_path, const char* module_name,
00035 //                          int* read_size_ptr)
00036 //
00037 //  Attempts to open and read arg file_path into a buffer allocated
00038 //   off the heap (via malloc() )  Returns a pointer to the buffer
00039 //   is successful and NULL otherwise.
00040 //
00041 //  CALLEE is responsibled for deallocating the buffer via free()
00042 //
00043 char *
00044 readIntoBuffer(const char *file_path, const char *module_name, int *read_size_ptr)
00045 {
00046 
00047   int fd;
00048   struct stat file_info;
00049   char *file_buf;
00050   int read_size = 0;
00051 
00052   if (read_size_ptr != NULL) {
00053     *read_size_ptr = 0;
00054   }
00055   // Open the file for Blocking IO.  We will be reading this
00056   //   at start up and infrequently afterward
00057   if ((fd = open(file_path, O_RDONLY | _O_ATTRIB_NORMAL)) < 0) {
00058     Error("%s Can not open %s file : %s", module_name, file_path, strerror(errno));
00059     return NULL;
00060   }
00061 
00062   if (fstat(fd, &file_info) < 0) {
00063     Error("%s Can not stat %s file : %s", module_name, file_path, strerror(errno));
00064     close(fd);
00065     return NULL;
00066   }
00067 
00068   if (file_info.st_size < 0) {
00069     Error("%s Can not get correct file size for %s file : %" PRId64 "", module_name, file_path, (int64_t) file_info.st_size);
00070     close(fd);
00071     return NULL;
00072   }
00073   // Allocate a buffer large enough to hold the entire file
00074   //   File size should be small and this makes it easy to
00075   //   do two passes on the file
00076   file_buf = (char *)ats_malloc(file_info.st_size + 1);
00077   // Null terminate the buffer so that string operations will work
00078   file_buf[file_info.st_size] = '\0';
00079 
00080   read_size = (file_info.st_size > 0) ? read(fd, file_buf, file_info.st_size) : 0;
00081 
00082   // Check to make sure that we got the whole file
00083   if (read_size < 0) {
00084     Error("%s Read of %s file failed : %s", module_name, file_path, strerror(errno));
00085     ats_free(file_buf);
00086     file_buf = NULL;
00087   } else if (read_size < file_info.st_size) {
00088     // We don't want to signal this error on WIN32 because the sizes
00089     // won't match if the file contains any CR/LF sequence.
00090     Error("%s Only able to read %d bytes out %d for %s file",
00091           module_name, read_size, (int) file_info.st_size, file_path);
00092     file_buf[read_size] = '\0';
00093   }
00094 
00095   if (file_buf && read_size_ptr) {
00096     *read_size_ptr = read_size;
00097   }
00098 
00099   close(fd);
00100 
00101   return file_buf;
00102 }
00103 
00104 // int unescapifyStr(char* buffer)
00105 //
00106 //   Unescapifies a URL without a making a copy.
00107 //    The passed in string is modified
00108 //
00109 int
00110 unescapifyStr(char *buffer)
00111 {
00112   char *read = buffer;
00113   char *write = buffer;
00114   char subStr[3];
00115 
00116   subStr[2] = '\0';
00117   while (*read != '\0') {
00118     if (*read == '%' && *(read + 1) != '\0' && *(read + 2) != '\0') {
00119       subStr[0] = *(++read);
00120       subStr[1] = *(++read);
00121       *write = (char)strtol(subStr, (char **) NULL, 16);
00122       read++;
00123       write++;
00124     } else if (*read == '+') {
00125       *write = ' ';
00126       write++;
00127       read++;
00128     } else {
00129       *write = *read;
00130       write++;
00131       read++;
00132     }
00133   }
00134   *write = '\0';
00135 
00136   return (write - buffer);
00137 }
00138 
00139 char const*
00140 ExtractIpRange(char* match_str, in_addr_t* min, in_addr_t* max) {
00141   IpEndpoint ip_min, ip_max;
00142   char const* zret = ExtractIpRange(match_str, &ip_min.sa, &ip_max.sa);
00143   if (0 == zret) { // success
00144     if (ats_is_ip4(&ip_min) && ats_is_ip4(&ip_max)) {
00145       if (min) *min = ntohl(ats_ip4_addr_cast(&ip_min));
00146       if (max) *max = ntohl(ats_ip4_addr_cast(&ip_max));
00147     } else {
00148       zret = "The addresses were not IPv4 addresses.";
00149     }
00150   }
00151   return zret;
00152 }
00153 
00154 //   char* ExtractIpRange(char* match_str, sockaddr* addr1,
00155 //                         sockaddr* addr2)
00156 //
00157 //   Attempts to extract either an Ip Address or an IP Range
00158 //     from match_str.  The range should be two addresses
00159 //     separated by a hyphen and no spaces
00160 //
00161 //   If the extraction is successful, sets addr1 and addr2
00162 //     to the extracted values (in the case of a single
00163 //     address addr2 = addr1) and returns NULL
00164 //
00165 //   If the extraction fails, returns a static string
00166 //     that describes the reason for the error.
00167 //
00168 const char *
00169 ExtractIpRange(char *match_str, sockaddr* addr1, sockaddr* addr2)
00170 {
00171   Tokenizer rangeTok("-/");
00172   bool mask = strchr(match_str, '/') != NULL;
00173   int mask_bits;
00174   int mask_val;
00175   int numToks;
00176   IpEndpoint la1, la2;
00177 
00178   // Extract the IP addresses from match data
00179   numToks = rangeTok.Initialize(match_str, SHARE_TOKS);
00180 
00181   if (numToks < 0) {
00182     return "no IP address given";
00183   } else if (numToks > 2) {
00184     return "malformed IP range";
00185   }
00186 
00187   if (0 != ats_ip_pton(rangeTok[0], &la1.sa)) {
00188     return "malformed IP address";
00189   }
00190 
00191   // Handle a IP range
00192   if (numToks == 2) {
00193 
00194     if (mask) {
00195       if (!ats_is_ip4(&la1)) {
00196         return "Masks supported only for IPv4";
00197       }
00198       // coverity[secure_coding]
00199       if (sscanf(rangeTok[1], "%d", &mask_bits) != 1) {
00200         return "bad mask specification";
00201       }
00202 
00203       if (!(mask_bits >= 0 && mask_bits <= 32)) {
00204         return "invalid mask specification";
00205       }
00206 
00207       if (mask_bits == 32) {
00208         mask_val = 0;
00209       } else {
00210         mask_val = htonl(0xffffffff >> mask_bits);
00211       }
00212       in_addr_t a = ats_ip4_addr_cast(&la1);
00213       ats_ip4_set(&la2, a | mask_val);
00214       ats_ip4_set(&la1, a & (mask_val ^ 0xffffffff));
00215 
00216     } else {
00217       if (0 != ats_ip_pton(rangeTok[1], &la2)) {
00218         return "malformed ip address at range end";
00219       }
00220     }
00221 
00222     if (1 == ats_ip_addr_cmp(&la1.sa, &la2.sa)) {
00223       return "range start greater than range end";
00224     }
00225 
00226     ats_ip_copy(addr2, &la2);
00227   } else {
00228     ats_ip_copy(addr2, &la1);
00229   }
00230 
00231   ats_ip_copy(addr1, &la1);
00232   return NULL;
00233 }
00234 
00235 // char* tokLine(char* buf, char** last, char cont)
00236 //
00237 //  Similar to strtok_r but only tokenizes on '\n'
00238 //   and will return tokens that are empty strings
00239 //
00240 char *
00241 tokLine(char *buf, char **last, char cont)
00242 {
00243   char *start;
00244   char *cur;
00245   char *prev = NULL;
00246 
00247   if (buf != NULL) {
00248     start = cur = buf;
00249     *last = buf;
00250   } else {
00251     start = cur = (*last) + 1;
00252   }
00253 
00254   while (*cur != '\0') {
00255     if (*cur == '\n') {
00256       if (cont != '\0' && prev != NULL && *prev == cont) {
00257         *prev = ' ';
00258         *cur = ' ';
00259       }
00260       else {
00261         *cur = '\0';
00262         *last = cur;
00263         return start;
00264       }
00265     }
00266     prev = cur++;
00267   }
00268 
00269   // Return the last line even if it does
00270   //  not end in a newline
00271   if (cur > (*last + 1)) {
00272     *last = cur - 1;
00273     return start;
00274   }
00275 
00276   return NULL;
00277 }
00278 
00279 const char *matcher_type_str[] = {
00280   "invalid",
00281   "host",
00282   "domain",
00283   "ip",
00284   "url_regex",
00285   "url",
00286   "host_regex"
00287 };
00288 
00289 // char* processDurationString(char* str, int* seconds)
00290 //
00291 //   Take a duration sting which is composed of
00292 //      digits followed by a unit specifier
00293 //         w - week
00294 //         d - day
00295 //         h - hour
00296 //         m - min
00297 //         s - sec
00298 //
00299 //   Trailing digits without a specifier are
00300 //    assumed to be seconds
00301 //
00302 //   Returns NULL on success and a static
00303 //    error string on failure
00304 //
00305 const char *
00306 processDurationString(char *str, int *seconds)
00307 {
00308   char *s = str;
00309   char *current = str;
00310   char unit;
00311   int tmp;
00312   int multiplier;
00313   int result = 0;
00314   int len;
00315 
00316   if (str == NULL) {
00317     return "Missing time";
00318   }
00319 
00320   len = strlen(str);
00321   for (int i = 0; i < len; i++) {
00322     if (!ParseRules::is_digit(*current)) {
00323 
00324       // Make sure there is a time to proces
00325       if (current == s) {
00326         return "Malformed time";
00327       }
00328 
00329       unit = *current;
00330 
00331       switch (unit) {
00332       case 'w':
00333         multiplier = 7 * 24 * 60 * 60;
00334         break;
00335       case 'd':
00336         multiplier = 24 * 60 * 60;
00337         break;
00338       case 'h':
00339         multiplier = 60 * 60;
00340         break;
00341       case 'm':
00342         multiplier = 60;
00343         break;
00344       case 's':
00345         multiplier = 1;
00346         break;
00347       case '-':
00348         return "Negative time not permitted";
00349       default:
00350         return "Invalid time unit specified";
00351       }
00352 
00353       *current = '\0';
00354 
00355       // coverity[secure_coding]
00356       if (sscanf(s, "%d", &tmp) != 1) {
00357         // Really should not happen since everything
00358         //   in the string is digit
00359         ink_assert(0);
00360         return "Malformed time";
00361       }
00362 
00363       result += (multiplier * tmp);
00364       s = current + 1;
00365 
00366     }
00367     current++;
00368   }
00369 
00370   // Read any trailing seconds
00371   if (current != s) {
00372     // coverity[secure_coding]
00373     if (sscanf(s, "%d", &tmp) != 1) {
00374       // Really should not happen since everything
00375       //   in the string is digit
00376       ink_assert(0);
00377       return "Malformed time";
00378     } else {
00379       result += tmp;
00380     }
00381   }
00382   // We rolled over the int
00383   if (result < 0) {
00384     return "Time too big";
00385   }
00386 
00387   *seconds = result;
00388   return NULL;
00389 }
00390 
00391 const matcher_tags http_dest_tags = {
00392   "dest_host", "dest_domain", "dest_ip", "url_regex", "url", "host_regex", true
00393 };
00394 
00395 const matcher_tags ip_allow_tags = {
00396   NULL, NULL, "src_ip", NULL, NULL, NULL, false
00397 };
00398 
00399 const matcher_tags socks_server_tags = {
00400   NULL, NULL, "dest_ip", NULL, NULL, NULL, false
00401 };
00402 
00403 // char* parseConfigLine(char* line, matcher_line* p_line,
00404 //                       const matcher_tags* tags)
00405 //
00406 //   Parse out a config file line suitable for passing to
00407 //    a ControlMatcher object
00408 //
00409 //   If successful, NULL is returned.  If unsuccessful,
00410 //     a static error string is returned
00411 //
00412 const char *
00413 parseConfigLine(char *line, matcher_line *p_line, const matcher_tags * tags)
00414 {
00415   enum pState
00416   {
00417     FIND_LABEL, PARSE_LABEL,
00418     PARSE_VAL, START_PARSE_VAL, CONSUME
00419   };
00420 
00421   pState state = FIND_LABEL;
00422   bool inQuote = false;
00423   char *copyForward = NULL;
00424   char *copyFrom = NULL;
00425   char *s = line;
00426   char *label = NULL;
00427   char *val = NULL;
00428   int num_el = 0;
00429   matcher_type type = MATCH_NONE;
00430 
00431   // Zero out the parsed line structure
00432   memset(p_line, 0, sizeof(matcher_line));
00433 
00434   if (*s == '\0') {
00435     return NULL;
00436   }
00437 
00438   do {
00439 
00440     switch (state) {
00441     case FIND_LABEL:
00442       if (!isspace(*s)) {
00443         state = PARSE_LABEL;
00444         label = s;
00445       }
00446       s++;
00447       break;
00448     case PARSE_LABEL:
00449       if (*s == '=') {
00450         *s = '\0';
00451         state = START_PARSE_VAL;
00452       }
00453       s++;
00454       break;
00455     case START_PARSE_VAL:
00456       // Init state needed for parsing values
00457       copyForward = NULL;
00458       copyFrom = NULL;
00459 
00460       if (*s == '"') {
00461         inQuote = true;
00462         val = s + 1;
00463       } else if (*s == '\\') {
00464         inQuote = false;
00465         val = s + 1;
00466       } else {
00467         inQuote = false;
00468         val = s;
00469 
00470       }
00471 
00472       if (inQuote == false && (isspace(*s) || *(s + 1) == '\0')) {
00473         state = CONSUME;
00474       } else {
00475         state = PARSE_VAL;
00476       }
00477 
00478       s++;
00479       break;
00480     case PARSE_VAL:
00481       if (inQuote == true) {
00482         if (*s == '\\') {
00483           // The next character is esacped
00484           //
00485           // To remove the escaped character
00486           // we need to copy
00487           //  the rest of the entry over it
00488           //  but since we do not know where the
00489           //  end is right now, defer the work
00490           //  into the future
00491 
00492           if (copyForward != NULL) {
00493             // Perform the prior copy forward
00494             int bytesCopy = s - copyFrom;
00495             memcpy(copyForward, copyFrom, s - copyFrom);
00496             ink_assert(bytesCopy > 0);
00497 
00498             copyForward += bytesCopy;
00499             copyFrom = s + 1;
00500           } else {
00501             copyForward = s;
00502             copyFrom = s + 1;
00503           }
00504 
00505           // Scroll past the escape character
00506           s++;
00507 
00508           // Handle the case that places us
00509           //  at the end of the file
00510           if (*s == '\0') {
00511             break;
00512           }
00513         } else if (*s == '"') {
00514           state = CONSUME;
00515           *s = '\0';
00516         }
00517       } else if ((*s == '\\' && ParseRules::is_digit(*(s + 1)))
00518                  || !ParseRules::is_char(*s)) {
00519         // INKqa10511
00520         // traffic server need to handle unicode characters
00521         // right now ignore the entry
00522         return "Unrecognized encoding scheme";
00523       } else if (isspace(*s)) {
00524         state = CONSUME;
00525         *s = '\0';
00526       }
00527 
00528       s++;
00529 
00530       // If we are now at the end of the line,
00531       //   we need to consume final data
00532       if (*s == '\0') {
00533         state = CONSUME;
00534       }
00535       break;
00536     case CONSUME:
00537       break;
00538     }
00539 
00540     if (state == CONSUME) {
00541 
00542       // See if there are any quote copy overs
00543       //   we've pushed into the future
00544       if (copyForward != NULL) {
00545         int toCopy = (s - 1) - copyFrom;
00546         memcpy(copyForward, copyFrom, toCopy);
00547         *(copyForward + toCopy) = '\0';
00548       }
00549 
00550       p_line->line[0][num_el] = label;
00551       p_line->line[1][num_el] = val;
00552       type = MATCH_NONE;
00553 
00554       // Check to see if this the primary specifier we are looking for
00555       if (tags->match_ip && strcasecmp(tags->match_ip, label) == 0) {
00556         type = MATCH_IP;
00557       } else if (tags->match_host && strcasecmp(tags->match_host, label) == 0) {
00558         type = MATCH_HOST;
00559       } else if (tags->match_domain && strcasecmp(tags->match_domain, label) == 0) {
00560         type = MATCH_DOMAIN;
00561       } else if (tags->match_regex && strcasecmp(tags->match_regex, label) == 0) {
00562         type = MATCH_REGEX;
00563       } else if (tags->match_url && strcasecmp(tags->match_url, label) == 0) {
00564         type = MATCH_URL;
00565       } else if (tags->match_host_regex && strcasecmp(tags->match_host_regex, label) == 0) {
00566         type = MATCH_HOST_REGEX;
00567       }
00568       // If this a destination tag, use it
00569       if (type != MATCH_NONE) {
00570         // Check to see if this second destination specifier
00571         if (p_line->type != MATCH_NONE) {
00572           if (tags->dest_error_msg == false) {
00573             return "Muliple Sources Specified";
00574           } else {
00575             return "Muliple Destinations Specified";
00576           }
00577         } else {
00578           p_line->dest_entry = num_el;
00579           p_line->type = type;
00580         }
00581       }
00582       num_el++;
00583 
00584       if (num_el > MATCHER_MAX_TOKENS) {
00585         return "Malformed line: Too many tokens";
00586       }
00587 
00588       state = FIND_LABEL;
00589     }
00590   } while (*s != '\0');
00591 
00592   p_line->num_el = num_el;
00593 
00594   if (state != CONSUME && state != FIND_LABEL) {
00595     return "Malformed entry";
00596   }
00597 
00598   if (!tags->empty() && p_line->type == MATCH_NONE) {
00599     if (tags->dest_error_msg == false) {
00600       return "No source specifier";
00601     } else {
00602       return "No destination specifier";
00603     }
00604   }
00605 
00606   return NULL;
00607 }