ApacheTrafficServer: HttpCompat.cc Source File

Go to the documentation of this file.
00001 /** @file
00002 
00003   A brief file description
00004 
00005   @section license License
00006 
00007   Licensed to the Apache Software Foundation (ASF) under one
00008   or more contributor license agreements.  See the NOTICE file
00009   distributed with this work for additional information
00010   regarding copyright ownership.  The ASF licenses this file
00011   to you under the Apache License, Version 2.0 (the
00012   "License"); you may not use this file except in compliance
00013   with the License.  You may obtain a copy of the License at
00014 
00015       http://www.apache.org/licenses/LICENSE-2.0
00016 
00017   Unless required by applicable law or agreed to in writing, software
00018   distributed under the License is distributed on an "AS IS" BASIS,
00019   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020   See the License for the specific language governing permissions and
00021   limitations under the License.
00022  */
00023 
00024 #include "libts.h"
00025 #include "HttpCompat.h"
00026 #include "HdrUtils.h"    /* MAGIC_EDITING_TAG */
00027 
00028 
00029 //////////////////////////////////////////////////////////////////////////////
00030 //
00031 //      HttpCompat::parse_tok_list
00032 //
00033 //      Takes a string containing an HTTP list broken on the separator
00034 //      character <sep>, and returns a StrList object containing a
00035 //      dynamically allocated list of elements.  This is essentially a
00036 //      fancy strtok that runs to completion and hands you back all tokens.
00037 //
00038 //      The routine either allocates and copies each string token, or
00039 //      just maintains the point to the raw text token, depending on the
00040 //      mode of the StrList object.
00041 //
00042 //////////////////////////////////////////////////////////////////////////////
00043 
00044 void
00045 HttpCompat::parse_tok_list(StrList * list, int trim_quotes, const char *string, char sep)
00046 {
00047   if (string == NULL)
00048     return;
00049   HttpCompat::parse_tok_list(list, trim_quotes, string, (int) strlen(string), sep);
00050 }
00051 
00052 void
00053 HttpCompat::parse_tok_list(StrList * list, int trim_quotes, const char *string, int len, char sep)
00054 {
00055   int in_quote;
00056   const char quot = '\"';
00057   const char *s, *e, *l, *s_before_skipping_ws;
00058   int index, byte_length, hit_sep;
00059 
00060   if ((string == NULL) || (list == NULL) || (sep == NUL))
00061     return;
00062 
00063   s = string;
00064   l = s + len - 1;
00065   index = 0;
00066 
00067   hit_sep = 0;
00068   s_before_skipping_ws = s;
00069 
00070   while (s <= l) {
00071     //////////////////////////////////////////////////////////
00072     // find the start of the first token, skipping over any //
00073     // whitespace or empty tokens, to leave <s> pointing at //
00074     // a NUL, a character, or a double quote.               //
00075     //////////////////////////////////////////////////////////
00076 
00077     while ((s <= l) && ParseRules::is_ws(*s))
00078       ++s;                      // skip whitespace
00079 
00080     //////////////////////////////////////////////////////////
00081     // if we are pointing at a separator, this was an empty //
00082     // token, so add the empty token, and continue parsing. //
00083     //////////////////////////////////////////////////////////
00084 
00085     if ((s <= l) && (*s == sep)) {
00086       list->append_string(s_before_skipping_ws, 0);
00087       ++index;
00088       s_before_skipping_ws = s + 1;
00089       s = s_before_skipping_ws;
00090       hit_sep = 1;
00091       continue;
00092     }
00093     //////////////////////////////////////////////////////////////////
00094     // at this point, <s> points to EOS, a double quote, or another //
00095     // character --- if EOS, then break out of the loop, and either //
00096     // tack on a final empty token if we had a trailing separator,  //
00097     // or just exit.                                                //
00098     //////////////////////////////////////////////////////////////////
00099 
00100     if (s > l)
00101       break;
00102 
00103     ///////////////////////////////////////////////////////////////////
00104     // we are pointing to the first character of a token now, either //
00105     // a character, or a double quote --- the next step is to scan   //
00106     // for the next separator or end of string, being careful not to //
00107     // include separators inside quotes.                             //
00108     ///////////////////////////////////////////////////////////////////
00109 
00110 #define is_unquoted_separator(c) ((c == sep) && !in_quote)
00111 
00112     if (*s == quot) {
00113       in_quote = 1;
00114       e = s + 1;                // start after quote
00115       if (trim_quotes)
00116         ++s;                    // trim starting quote
00117     } else {
00118       in_quote = 0;
00119       e = s;
00120     }
00121 
00122     while ((e <= l) && !is_unquoted_separator(*e)) {
00123       if (*e == quot) {
00124         in_quote = !in_quote;
00125       }
00126       e++;
00127     }
00128 
00129     ///////////////////////////////////////////////////////////////////////
00130     // we point one char past the last character of string, or an        //
00131     // unquoted separator --- so back up into any previous whitespace or //
00132     // quote, leaving <e> pointed 1 char after the last token character. //
00133     ///////////////////////////////////////////////////////////////////////
00134 
00135     hit_sep = (e <= l);         // must have hit a separator if still inside string
00136 
00137     s_before_skipping_ws = e + 1;       // where to start next time
00138     while ((e > s) && ParseRules::is_ws(*(e - 1)))
00139       --e;                      // eat trailing ws
00140     if ((e > s) && (*(e - 1) == quot) && trim_quotes)
00141       --e;                      // eat trailing quote
00142 
00143     /////////////////////////////////////////////////////////////////////
00144     // now <e> points to the character AFTER the last character of the //
00145     // field, either a separator, a quote, or a NUL (other other char  //
00146     // after the last char in the string.                              //
00147     /////////////////////////////////////////////////////////////////////
00148 
00149     byte_length = (int) (e - s);
00150     ink_assert(byte_length >= 0);
00151 
00152     ///////////////////////////////////////////
00153     // add the text to the list, and move on //
00154     ///////////////////////////////////////////
00155 
00156     list->append_string(s, byte_length);
00157     s = s_before_skipping_ws;   // where to start next time
00158     ++index;
00159   }
00160 
00161   ////////////////////////////////////////////////////////////////////////////
00162   // fall out of loop when at end of string --- three possibilities:        //
00163   //   (1) at end of string after final token ("a,b,c" or "a,b,c   ")       //
00164   //   (2) at end of string after final separator ("a,b,c," or "a,b,c,   ") //
00165   //   (3) at end of string before any tokens ("" or "   ")                 //
00166   // for cases (2) & (3), we want to return an empty token                  //
00167   ////////////////////////////////////////////////////////////////////////////
00168 
00169   if (hit_sep || (index == 0)) {
00170     ink_assert(s == l + 1);
00171     list->append_string(s_before_skipping_ws, 0);
00172     ++index;
00173   }
00174 }
00175 
00176 //////////////////////////////////////////////////////////////////////////////
00177 //
00178 //      bool HttpCompat::lookup_param_in_strlist(
00179 //          StrList *param_list, char *param_name,
00180 //          char *param_val, int param_val_length)
00181 //
00182 //      Takes a list of parameter strings, and searches each parameter list
00183 //      element for the name <param_name>, and if followed by '=' and a value,
00184 //      the value string is stored in <param_val> up to <param_val_length>
00185 //      bytes minus 1 character for trailing NUL.
00186 //
00187 //      This routine can be used to search for charset=XXX, Q=XXX, and other
00188 //      kinds of parameters.  The param list can be constructed using the
00189 //      parse_comma_list and parse_semicolon_list functions.
00190 //
00191 //      The routine returns true if there was a match, false otherwise.
00192 //
00193 //////////////////////////////////////////////////////////////////////////////
00194 
00195 bool
00196 HttpCompat::lookup_param_in_strlist(StrList * param_list,  const char *param_name, char *param_val, int param_val_length)
00197 {
00198   int cnt;
00199   const char *s, *t;
00200   Str *param;
00201   bool is_match;
00202 
00203   for (param = param_list->head; param != NULL; param = param->next) {
00204     /////////////////////////////////////////////////////
00205     // compare this parameter to the target param_name //
00206     /////////////////////////////////////////////////////
00207 
00208     s = param->str;             // source str
00209     t = param_name;             // target str
00210     while (*s && *t && (ParseRules::ink_tolower(*s) == ParseRules::ink_tolower(*t))) {
00211       ++s;
00212       ++t;
00213     }
00214 
00215     ////////////////////////////////////////////////////////////////
00216     // match if target string empty, and if current string empty,  //
00217     // or points to space or '=' character.                       //
00218     ////////////////////////////////////////////////////////////////
00219 
00220     is_match = ((!*t) && ((!*s) || ParseRules::is_ws(*s) || (*s == '=')));
00221 
00222     /////////////////////////////////////////////////////////////
00223     // copy text after '=' into param_val, up to length limits //
00224     /////////////////////////////////////////////////////////////
00225 
00226     if (is_match) {
00227       param_val[0] = '\0';
00228 
00229       while (*s && ParseRules::is_ws(*s))
00230         s++;                    // skip white
00231       if (*s == '=') {
00232         ++s;                    // skip '='
00233         while (*s && ParseRules::is_ws(*s))
00234           s++;                  // skip white
00235 
00236         for (cnt = 0; *s && (cnt < param_val_length - 1); s++, cnt++)
00237           param_val[cnt] = *s;
00238         if (cnt < param_val_length)
00239           param_val[cnt++] = '\0';
00240       }
00241       return (true);
00242     }
00243   }
00244 
00245   return (false);
00246 }
00247 
00248 
00249 //////////////////////////////////////////////////////////////////////////////
00250 //
00251 //      bool HttpCompat::lookup_param_in_semicolon_string(
00252 //          char *semicolon_string, int semicolon_string_len,
00253 //          char *param_name, char *param_val, int param_val_length)
00254 //
00255 //      Takes a semicolon-separated string of parameters, and searches
00256 //      for a parameter named <param_name>, as in lookup_param_in_strlist.
00257 //
00258 //      The routine returns true if there was a match, false otherwise.
00259 //      If multiple parameters will be searched for in the same string,
00260 //      use lookup_param_in_strlist(), so the string is not tokenized
00261 //      multiple times.
00262 //
00263 //////////////////////////////////////////////////////////////////////////////
00264 
00265 bool
00266 HttpCompat::lookup_param_in_semicolon_string(const char *semicolon_string, int semicolon_string_len,
00267                                              const char *param_name, char *param_val, int param_val_length)
00268 {
00269   StrList l;
00270   bool result;
00271 
00272   parse_semicolon_list(&l, semicolon_string, semicolon_string_len);
00273   result = lookup_param_in_strlist(&l, param_name, param_val, param_val_length);
00274   return (result);
00275 }
00276 
00277 
00278 //////////////////////////////////////////////////////////////////////////////
00279 //
00280 //      void HttpCompat::parse_mime_type(
00281 //          char *mime_string, char *type, char *subtype,
00282 //          int type_len, int subtype_len)
00283 //
00284 //      This routine takes a pointer to a MIME type, and decomposes it
00285 //      into type and subtype fields, skipping over spaces, and placing
00286 //      the decomposed values into <type> and <subtype>.  The length
00287 //      fields describe the lengths of the type and subtype buffers,
00288 //      including the trailing NUL characters.
00289 //
00290 //////////////////////////////////////////////////////////////////////////////
00291 
00292 void
00293 HttpCompat::parse_mime_type(const char *mime_string, char *type, char *subtype, int type_len, int subtype_len)
00294 {
00295   const char *s, *e;
00296   char *d;
00297 
00298   *type = *subtype = '\0';
00299 
00300   /////////////////////
00301   // skip whitespace //
00302   /////////////////////
00303 
00304   for (s = mime_string; *s && ParseRules::is_ws(*s); s++);
00305 
00306   ///////////////////////////////////////////////////////////////////////
00307   // scan type (until NUL, out of room, comma/semicolon, space, slash) //
00308   ///////////////////////////////////////////////////////////////////////
00309 
00310   d = type;
00311   e = type + type_len;
00312   while (*s && (d < e - 1) && (!ParseRules::is_ws(*s)) && (*s != ';') && (*s != ',') && (*s != '/')) {
00313     *d++ = *s++;
00314   }
00315   *d++ = '\0';
00316 
00317   //////////////////////////////////////////////////////////////
00318   // skip remainder of text and space, then slash, then space //
00319   //////////////////////////////////////////////////////////////
00320 
00321   while (*s && (*s != ';') && (*s != ',') && (*s != '/'))
00322     ++s;
00323   if (*s == '/')
00324     ++s;
00325   while (*s && ParseRules::is_ws(*s))
00326     ++s;
00327 
00328   //////////////////////////////////////////////////////////////////////////
00329   // scan subtype (until NUL, out of room, comma/semicolon, space, slash) //
00330   //////////////////////////////////////////////////////////////////////////
00331 
00332   d = subtype;
00333   e = subtype + subtype_len;
00334   while (*s && (d < e - 1) && (!ParseRules::is_ws(*s)) && (*s != ';') && (*s != ',') && (*s != '/')) {
00335     *d++ = *s++;
00336   }
00337   *d++ = '\0';
00338 }
00339 
00340 void
00341 HttpCompat::parse_mime_type_with_len(const char *mime_string, int mime_string_len,
00342                                      char *type, char *subtype, int type_len, int subtype_len)
00343 {
00344   const char *s, *s_toofar, *e;
00345   char *d;
00346 
00347   *type = *subtype = '\0';
00348   s_toofar = mime_string + mime_string_len;
00349 
00350   /////////////////////
00351   // skip whitespace //
00352   /////////////////////
00353 
00354   for (s = mime_string; (s < s_toofar) && ParseRules::is_ws(*s); s++);
00355 
00356   ///////////////////////////////////////////////////////////////////////
00357   // scan type (until NUL, out of room, comma/semicolon, space, slash) //
00358   ///////////////////////////////////////////////////////////////////////
00359 
00360   d = type;
00361   e = type + type_len;
00362   while ((s < s_toofar) && (d < e - 1) && (!ParseRules::is_ws(*s)) && (*s != ';') && (*s != ',') && (*s != '/')) {
00363     *d++ = *s++;
00364   }
00365   *d++ = '\0';
00366 
00367   //////////////////////////////////////////////////////////////
00368   // skip remainder of text and space, then slash, then space //
00369   //////////////////////////////////////////////////////////////
00370 
00371   while ((s < s_toofar) && (*s != ';') && (*s != ',') && (*s != '/'))
00372     ++s;
00373   if ((s < s_toofar) && (*s == '/'))
00374     ++s;
00375   while ((s < s_toofar) && ParseRules::is_ws(*s))
00376     ++s;
00377 
00378   //////////////////////////////////////////////////////////////////////////
00379   // scan subtype (until NUL, out of room, comma/semicolon, space, slash) //
00380   //////////////////////////////////////////////////////////////////////////
00381 
00382   d = subtype;
00383   e = subtype + subtype_len;
00384   while ((s < s_toofar) && (d < e - 1) && (!ParseRules::is_ws(*s)) && (*s != ';') && (*s != ',') && (*s != '/')) {
00385     *d++ = *s++;
00386   }
00387   *d++ = '\0';
00388 }
00389 
00390 //////////////////////////////////////////////////////////////////////////////
00391 //
00392 //      bool HttpCompat::do_header_values_match(MIMEField *hv1, MIMEField *hv2)
00393 //
00394 //      This routine takes two HTTP header fields and determines
00395 //      if their values "match", as in section 14.43 of RFC2068:
00396 //
00397 //        "When the cache receives a subsequent request whose Request-URI
00398 //         specifies one or more cache entries including a Vary header, the
00399 //         cache MUST NOT use such a cache entry to construct a response to
00400 //         the new request unless all of the headers named in the cached
00401 //         Vary header are present in the new request, and all of the stored
00402 //         selecting request-headers from the previous request match the
00403 //         corresponding headers in the new request.
00404 //
00405 //         The selecting request-headers from two requests are defined to
00406 //         match if and only if the selecting request-headers in the first
00407 //         request can be transformed to the selecting request-headers in
00408 //         the second request by adding or removing linear whitespace (LWS)
00409 //         at places where this is allowed by the corresponding BNF, and/or
00410 //         combining multiple message-header fields with the same field
00411 //         name following the rules about message headers in section 4.2."
00412 //
00413 //////////////////////////////////////////////////////////////////////////////
00414 bool
00415 HttpCompat::do_header_values_rfc2068_14_43_match(MIMEField * hdr1, MIMEField * hdr2)
00416 {
00417   // If both headers are missing, the headers match.
00418   if (!hdr1 && !hdr2)
00419     return true;
00420 
00421   // If one header is missing, the headers do not match.
00422   if (!hdr1 || !hdr2)
00423     return false;
00424 
00425   // Make sure both headers have the same number of comma-
00426   // separated elements.
00427   HdrCsvIter iter1, iter2;
00428   if (iter1.count_values(hdr1) != iter2.count_values(hdr2))
00429     return false;
00430 
00431   int hdr1_val_len, hdr2_val_len;
00432   const char *hdr1_val = iter1.get_first(hdr1, &hdr1_val_len);
00433   const char *hdr2_val = iter2.get_first(hdr2, &hdr2_val_len);
00434 
00435   while (hdr1_val || hdr2_val) {
00436     if (hdr1_val_len != hdr2_val_len || ParseRules::strncasecmp_eow(hdr1_val, hdr2_val, hdr1_val_len) == false)
00437       return false;
00438 
00439     hdr1_val = iter1.get_next(&hdr1_val_len);
00440     hdr2_val = iter2.get_next(&hdr2_val_len);
00441   }
00442 
00443   return true;
00444 }
00445 
00446 //////////////////////////////////////////////////////////////////////////////
00447 //
00448 //      float HttpCompat::find_Q_param_in_strlist(StrList *strlist);
00449 //
00450 //      Takes a StrList formed from semicolon-parsing a value, and returns
00451 //      the value of the Q directive, or 1.0 by default.
00452 //
00453 //////////////////////////////////////////////////////////////////////////////
00454 
00455 float
00456 HttpCompat::find_Q_param_in_strlist(StrList * strlist)
00457 {
00458   float f, this_q;
00459   char q_string[8];
00460 
00461   this_q = 1.0;
00462   if (HttpCompat::lookup_param_in_strlist(strlist, (char *) "q", q_string, sizeof(q_string))) {
00463     // coverity[secure_coding]
00464     if (sscanf(q_string, "%f", &f) == 1)        // parse q
00465       this_q = (f<0 ? 0 : (f> 1 ? 1 : f));
00466   }
00467 
00468   return (this_q);
00469 }
00470 
00471 //////////////////////////////////////////////////////////////////////////////
00472 //
00473 //      float HttpCompat::match_accept_language
00474 //
00475 //      This routine returns the resulting Q factor from matching the
00476 //      content language tag <lang_str> against the Accept-Language value
00477 //      string <acpt_str>.
00478 //
00479 //      It also returns the index of the particular accept list piece
00480 //      that matches, and the length of the accept list piece that matches,
00481 //      in case you later want to resolve quality ties by position in the
00482 //      list, or by length of match.  In general, you want to sort the
00483 //      results of this call first by chosen Q, then by matching_length
00484 //      (longer is better), then by matching_index (lower is better).
00485 //      The first matching_index value is index 1.
00486 //
00487 //////////////////////////////////////////////////////////////////////////////
00488 
00489 static inline bool
00490 does_language_range_match(const char *pattern, int pattern_len, const char *tag, int tag_len)
00491 {
00492   bool match;
00493 
00494   while (pattern_len && tag_len && (ParseRules::ink_tolower(*pattern) == ParseRules::ink_tolower(*tag))) {
00495     ++pattern;
00496     ++tag;
00497     --pattern_len;
00498     --tag_len;
00499   }
00500 
00501   // matches if range equals tag, or if range is a lang prefix of tag
00502   if ((((pattern_len == 0) && (tag_len == 0)) || ((pattern_len == 0) && (*tag == '-'))))
00503     match = true;
00504   else
00505     match = false;
00506 
00507   return (match);
00508 }
00509 
00510 float
00511 HttpCompat::match_accept_language(const char *lang_str, int lang_len,
00512                                   StrList * acpt_lang_list,
00513                                   int *matching_length, int *matching_index, bool ignore_wildcards)
00514 {
00515   float Q, Q_wild;
00516   Str *a_value;
00517 
00518   Q = -1;                       // will never be returned as -1
00519   Q_wild = -1;                  // will never be returned as -1
00520   int match_count = 0;
00521   int wild_match_count = 0;
00522   int longest_match_len = 0;
00523 
00524   int index = 0;
00525   int Q_index = 0;
00526   int Q_wild_index = 0;
00527 
00528   *matching_index = 0;
00529   *matching_length = 0;
00530 
00531   ///////////////////////////////////////////////////////
00532   // rip the accept string into comma-separated values //
00533   ///////////////////////////////////////////////////////
00534   if (acpt_lang_list->count == 0)
00535     return (0.0);
00536 
00537   ////////////////////////////////////////
00538   // loop over each Accept-Language tag //
00539   ////////////////////////////////////////
00540   for (a_value = acpt_lang_list->head; a_value; a_value = a_value->next) {
00541     ++index;
00542 
00543     if (a_value->len == 0)
00544       continue;                 // blank tag
00545 
00546     ///////////////////////////////////////////////////////////
00547     // now rip the Accept-Language tag into head and Q parts //
00548     ///////////////////////////////////////////////////////////
00549     StrList a_param_list(false);
00550     HttpCompat::parse_semicolon_list(&a_param_list, a_value->str, (int) a_value->len);
00551     if (!a_param_list.head)
00552       continue;
00553 
00554     /////////////////////////////////////////////////////////////////////
00555     // This algorithm is a bit wierd --- the resulting Q factor is     //
00556     // the Q value corresponding to the LONGEST range field that       //
00557     // matched, or if none matched, then the Q value of any asterisk.  //
00558     // Also, if the lang value is "", meaning that no Content-Language //
00559     // was specified, this document matches all accept headers.        //
00560     /////////////////////////////////////////////////////////////////////
00561     const char *atag_str = a_param_list.head->str;
00562     int atag_len = (int) a_param_list.head->len;
00563 
00564     float tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
00565 
00566     if ((atag_len == 1) && (atag_str[0] == '*'))        // wildcard
00567     {
00568       ++wild_match_count;
00569       if (tq > Q_wild) {
00570         Q_wild = tq;
00571         Q_wild_index = index;
00572       }
00573     } else if (does_language_range_match(atag_str, atag_len, lang_str, lang_len)) {
00574       ++match_count;
00575       if (atag_len > longest_match_len) {
00576         longest_match_len = atag_len;
00577         Q = tq;
00578         Q_index = index;
00579       } else if (atag_len == longest_match_len) // if tie, pick higher Q
00580       {
00581         if (tq > Q) {
00582           Q = tq;
00583           Q_index = index;
00584         }
00585       }
00586     }
00587   }
00588 
00589   if ((ignore_wildcards == false) && wild_match_count && !match_count) {
00590     *matching_index = Q_wild_index;
00591     *matching_length = 1;
00592     return (Q_wild);
00593   } else if (match_count > 0)   // real match
00594   {
00595     *matching_index = Q_index;
00596     *matching_length = longest_match_len;
00597     return (Q);
00598   } else                        // no match
00599   {
00600     *matching_index = 0;
00601     *matching_length = 0;
00602     return (0.0);
00603   }
00604 }
00605 
00606 //////////////////////////////////////////////////////////////////////////////
00607 //
00608 //      float HttpCompat::match_accept_charset
00609 //
00610 //      This routine returns the resulting Q factor from matching the
00611 //      content language tag <lang_str> against the Accept-Language value
00612 //      string <acpt_str>.
00613 //
00614 //      It also returns the index of the particular accept list piece
00615 //      that matches, and the length of the accept list piece that matches,
00616 //      in case you later want to resolve quality ties by position in the
00617 //      list, or by length of match.  In general, you want to sort the
00618 //      results of this call first by chosen Q, then by matching_length
00619 //      (longer is better), then by matching_index (lower is better).
00620 //      The first matching_index value is index 1.
00621 //
00622 //////////////////////////////////////////////////////////////////////////////
00623 
00624 // FIX: not implemented!
00625 
00626 float
00627 HttpCompat::match_accept_charset(const char *charset_str, int charset_len,
00628                                  StrList * acpt_charset_list, int *matching_index, bool ignore_wildcards)
00629 {
00630   float Q, Q_wild;
00631   Str *a_value;
00632 
00633   Q = -1;                       // will never be returned as -1
00634   Q_wild = -1;                  // will never be returned as -1
00635   int match_count = 0;
00636   int wild_match_count = 0;
00637 
00638   int index = 0;
00639   int Q_index = 0;
00640   int Q_wild_index = 0;
00641 
00642   *matching_index = 0;
00643 
00644   ///////////////////////////////////////////////////////
00645   // rip the accept string into comma-separated values //
00646   ///////////////////////////////////////////////////////
00647   if (acpt_charset_list->count == 0)
00648     return (0.0);
00649 
00650   ///////////////////////////////////////
00651   // loop over each Accept-Charset tag //
00652   ///////////////////////////////////////
00653   for (a_value = acpt_charset_list->head; a_value; a_value = a_value->next) {
00654     ++index;
00655     if (a_value->len == 0)
00656       continue;                 // blank tag
00657 
00658     //////////////////////////////////////////////////////////
00659     // now rip the Accept-Charset tag into head and Q parts //
00660     //////////////////////////////////////////////////////////
00661     StrList a_param_list(false);
00662     HttpCompat::parse_semicolon_list(&a_param_list, a_value->str, (int) a_value->len);
00663     if (!a_param_list.head)
00664       continue;
00665 
00666     ///////////////////////////////////////////////////////////////
00667     // see if the Accept-Charset tag matches the current charset //
00668     ///////////////////////////////////////////////////////////////
00669     const char *atag_str = a_param_list.head->str;
00670     int atag_len = (int) a_param_list.head->len;
00671     float tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
00672 
00673     if ((atag_len == 1) && (atag_str[0] == '*'))        // wildcard
00674     {
00675       ++wild_match_count;
00676       if (tq > Q_wild) {
00677         Q_wild = tq;
00678         Q_wild_index = index;
00679       }
00680     } else if ((atag_len == charset_len) && (strncasecmp(atag_str, charset_str, charset_len) == 0)) {
00681       ++match_count;
00682       if (tq > Q) {
00683         Q = tq;
00684         Q_index = index;
00685       }
00686     }
00687   }
00688 
00689   if ((ignore_wildcards == false) && wild_match_count && !match_count) {
00690     *matching_index = Q_wild_index;
00691     return (Q_wild);
00692   } else if (match_count > 0)   // real match
00693   {
00694     *matching_index = Q_index;
00695     return (Q);
00696   } else                        // no match
00697   {
00698     *matching_index = 0;
00699     return (0.0);
00700   }
00701 }
00702 
00703 const char *
00704 HttpCompat::determine_set_by_language(RawHashTable * table_of_sets,
00705                                       StrList * acpt_language_list, StrList * acpt_charset_list,
00706                                       float *Q_best_ptr, int *La_best_ptr, int *Lc_best_ptr, int *I_best_ptr)
00707 {
00708   float Q, Ql, Qc, Q_best;
00709   int I, Idummy, I_best;
00710   int La, Lc, La_best, Lc_best;
00711   int is_the_default_set;
00712   const char *set_best;
00713 
00714   RawHashTable_Key k1;
00715   RawHashTable_Value v1;
00716   RawHashTable_Binding *b1;
00717   RawHashTable_IteratorState i1;
00718   RawHashTable *table_of_pages;
00719   HttpBodySetRawData *body_set;
00720 
00721   set_best = "default";
00722   Q_best = 0.00001;
00723   La_best = 0;
00724   Lc_best = INT_MAX;
00725   I_best = INT_MAX;
00726 
00727   Debug("body_factory_determine_set", "  INITIAL: [ set_best='%s', Q=%g, La=%d, Lc=%d, I=%d ]",
00728         set_best, Q_best, La_best, Lc_best, I_best);
00729 
00730   // FIX: eliminate this special case (which doesn't work anyway), by properly
00731   //      handling empty lists and empty pieces in match_accept_XXX
00732 
00733   // if no Accept-Language or Accept-Charset, just return default
00734   if ((acpt_language_list->count == 0) && (acpt_charset_list->count == 0)) {
00735     Q_best = 1;
00736     Debug("body_factory_determine_set", "  no constraints => returning '%s'", set_best);
00737     goto done;
00738   }
00739 
00740   if (table_of_sets != NULL) {
00741     ///////////////////////////////////////////
00742     // loop over set->body-types hash table //
00743     ///////////////////////////////////////////
00744 
00745     for (b1 = table_of_sets->firstBinding(&i1); b1 != NULL; b1 = table_of_sets->nextBinding(&i1)) {
00746       k1 = table_of_sets->getKeyFromBinding(b1);
00747       v1 = table_of_sets->getValueFromBinding(b1);
00748       const char *set_name = (const char *) k1;
00749 
00750       body_set = (HttpBodySetRawData *) v1;
00751       table_of_pages = body_set->table_of_pages;
00752 
00753       if ((set_name == NULL) || (table_of_pages == NULL))
00754         continue;
00755 
00756       //////////////////////////////////////////////////////////////////////
00757       // Take this error page language and match it against the           //
00758       // Accept-Language string passed in, to evaluate the match          //
00759       // quality.  Disable wildcard processing so we use "default"        //
00760       // if no set explicitly matches.  We also get back the index        //
00761       // of the match and the length of the match.                        //
00762       //                                                                  //
00763       // We optimize the match in a couple of ways:                       //
00764       //   (a) if Q is better ==> wins, else if tie,                      //
00765       //   (b) if accept tag length La is bigger ==> wins, else if tie,   //
00766       //   (c) if content tag length Lc is smaller ==> wins, else if tie, //
00767       //   (d) if index position I is smaller ==> wins                    //
00768       //////////////////////////////////////////////////////////////////////
00769 
00770       is_the_default_set = (strcmp(set_name, "default") == 0);
00771 
00772       Debug("body_factory_determine_set", "  --- SET: %-8s (Content-Language '%s', Content-Charset '%s')",
00773             set_name, body_set->content_language, body_set->content_charset);
00774 
00775       // if no Accept-Language hdr at all, treat as a wildcard that
00776       // slightly prefers "default".
00777       if (acpt_language_list->count == 0) {
00778         Ql = (is_the_default_set ? 1.0001 : 1.000);
00779         La = 0;
00780         Lc = INT_MAX;
00781         I = 1;
00782         Debug("body_factory_determine_set", "      SET: [%-8s] A-L not present => [ Ql=%g, La=%d, Lc=%d, I=%d ]",
00783               set_name, Ql, La, Lc, I);
00784       } else {
00785         Lc = strlen(body_set->content_language);
00786         Ql = HttpCompat::match_accept_language(body_set->content_language, Lc, acpt_language_list, &La, &I, true);
00787         Debug("body_factory_determine_set", "      SET: [%-8s] A-L match value => [ Ql=%g, La=%d, Lc=%d, I=%d ]",
00788               set_name, Ql, La, Lc, I);
00789       }
00790 
00791       /////////////////////////////////////////////////////////////
00792       // Take this error page language and match it against the  //
00793       // Accept-Charset string passed in, to evaluate the match  //
00794       // quality.  Disable wildcard processing so that only      //
00795       // explicit values match.  (Many browsers will send along  //
00796       // "*" with all lists, and we really don't want to send    //
00797       // strange character sets for these people --- we'd rather //
00798       // use a more portable "default" set.  The index value we  //
00799       // get back isn't used, because it's a little hard to know //
00800       // how to tradeoff language indices vs. charset indices.   //
00801       // If someone cares, we could surely work charset indices  //
00802       // into the sorting computation below.                     //
00803       /////////////////////////////////////////////////////////////
00804 
00805       // if no Accept-Charset hdr at all, treat as a wildcard that
00806       // slightly prefers "default".
00807       if (acpt_charset_list->count == 0) {
00808         Qc = (is_the_default_set ? 1.0001 : 1.000);
00809         Idummy = 1;
00810         Debug("body_factory_determine_set", "      SET: [%-8s] A-C not present => [ Qc=%g ]", set_name, Qc);
00811       } else {
00812         Qc = HttpCompat::match_accept_charset(body_set->content_charset, strlen(body_set->content_charset),
00813                                               acpt_charset_list, &Idummy, true);
00814         Debug("body_factory_determine_set", "      SET: [%-8s] A-C match value => [ Qc=%g ]", set_name, Qc);
00815       }
00816 
00817 
00818       /////////////////////////////////////////////////////////////////
00819       // We get back the Q value, the matching field length, and the //
00820       // matching field index.  We sort by largest Q value, but if   //
00821       // there is a Q tie, we sub sort on longer matching length,    //
00822       // and if there is a tie on Q and L, we sub sort on position   //
00823       // index, preferring values earlier in Accept-Language list.   //
00824       /////////////////////////////////////////////////////////////////
00825 
00826       Q = min(Ql, Qc);
00827 
00828       //////////////////////////////////////////////////////////
00829       // normally the Q for default pages should be slightly  //
00830       // less than for normal pages, but default pages should //
00831       // always match to a slight level, in case everything   //
00832       // else doesn't match (matches with Q=0).               //
00833       //////////////////////////////////////////////////////////
00834 
00835       if (is_the_default_set) {
00836         Q = Q + -0.00005;
00837         if (Q < 0.00001)
00838           Q = 0.00001;
00839       }
00840 
00841       Debug("body_factory_determine_set", "      NEW: [ set='%s', Q=%g, La=%d, Lc=%d, I=%d ]", set_name, Q, La, Lc, I);
00842       Debug("body_factory_determine_set", "      OLD: [ set='%s', Q=%g, La=%d, Lc=%d, I=%d ]",
00843             set_best, Q_best, La_best, Lc_best, I_best);
00844 
00845       if (((Q > Q_best)) ||
00846           ((Q == Q_best) && (La > La_best)) ||
00847           ((Q == Q_best) && (La == La_best) && (Lc < Lc_best)) ||
00848           ((Q == Q_best) && (La == La_best) && (Lc == Lc_best) && (I < I_best))) {
00849         Q_best = Q;
00850         La_best = La;
00851         Lc_best = Lc;
00852         I_best = I;
00853         set_best = set_name;
00854 
00855         Debug("body_factory_determine_set", "   WINNER: [ set_best='%s', Q=%g, La=%d, Lc=%d, I=%d ]",
00856               set_best, Q_best, La_best, Lc_best, I_best);
00857       } else {
00858         Debug("body_factory_determine_set", "    LOSER: [ set_best='%s', Q=%g, La=%d, Lc=%d, I=%d ]",
00859               set_best, Q_best, La_best, Lc_best, I_best);
00860       }
00861     }
00862   }
00863 
00864 done:
00865 
00866   *Q_best_ptr = Q_best;
00867   *La_best_ptr = La_best;
00868   *Lc_best_ptr = Lc_best;
00869   *I_best_ptr = I_best;
00870   return (set_best);
00871 }