• Main Page
  • Related Pages
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

HttpTransactCache.cc

Go to the documentation of this file.
00001 /** @file
00002 
00003   A brief file description
00004 
00005   @section license License
00006 
00007   Licensed to the Apache Software Foundation (ASF) under one
00008   or more contributor license agreements.  See the NOTICE file
00009   distributed with this work for additional information
00010   regarding copyright ownership.  The ASF licenses this file
00011   to you under the Apache License, Version 2.0 (the
00012   "License"); you may not use this file except in compliance
00013   with the License.  You may obtain a copy of the License at
00014 
00015       http://www.apache.org/licenses/LICENSE-2.0
00016 
00017   Unless required by applicable law or agreed to in writing, software
00018   distributed under the License is distributed on an "AS IS" BASIS,
00019   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020   See the License for the specific language governing permissions and
00021   limitations under the License.
00022  */
00023 
00024 #include "libts.h"
00025 
00026 #include "HttpTransact.h"
00027 #include "HttpTransactHeaders.h"
00028 #include "HttpTransactCache.h"
00029 #include "time.h"
00030 #include "HTTP.h"
00031 #include "HttpCompat.h"
00032 #include "Error.h"
00033 #include "InkErrno.h"
00034 
00035 ClassAllocator<CacheLookupHttpConfig> CacheLookupHttpConfigAllocator("CacheLookupHttpConfigAllocator");
00036 
00037 CacheLookupHttpConfig global_cache_lookup_config;
00038 
00039 /**
00040   Find the pointer and length of an etag, after stripping off any leading
00041   "W/" prefix, and surrounding double quotes.
00042 
00043 */
00044 inline static const char *
00045 find_etag(const char *raw_tag_field, int raw_tag_field_len, int *length)
00046 {
00047   const char *quote;
00048   int etag_length = 0;
00049   const char *etag_start = raw_tag_field;
00050   const char *etag_end = raw_tag_field + raw_tag_field_len;
00051 
00052   if ((raw_tag_field_len >= 2) && (etag_start[0] == 'W' && etag_start[1] == '/')) {
00053     etag_start += 2;
00054   }
00055 
00056   etag_length = etag_end - etag_start;
00057 
00058   if ((etag_start < etag_end) && (*etag_start == '"')) {
00059     ++etag_start;
00060     --etag_length;
00061     quote = (const char *) memchr(etag_start, '"', etag_length);
00062     if (quote)
00063       etag_length = quote - etag_start;
00064   }
00065   *length = etag_length;
00066   return etag_start;
00067 }
00068 
00069 /**
00070   Match an etag raw_tag_field with a list of tags in the comma-separated
00071   string field_to_match, using strong rules.
00072 
00073 */
00074 inline static bool
00075 do_strings_match_strongly(const char *raw_tag_field,
00076                           int raw_tag_field_len, const char *comma_sep_tag_list, int comma_sep_tag_list_len)
00077 {
00078   StrList tag_list;
00079   const char *etag_start;
00080   int n, etag_length;
00081 
00082 
00083   // Can never match a weak tag with a strong compare
00084   if ((raw_tag_field_len >= 2) && (raw_tag_field[0] == 'W' && raw_tag_field[1] == '/')) {
00085     return false;
00086   }
00087   // Find the unalterated tag
00088   etag_start = find_etag(raw_tag_field, raw_tag_field_len, &etag_length);
00089 
00090   // Rip the field list into a comma-separated field list
00091   HttpCompat::parse_comma_list(&tag_list, comma_sep_tag_list, comma_sep_tag_list_len);
00092 
00093   // Loop over all the tags in the tag list
00094   for (Str * tag = tag_list.head; tag; tag = tag->next) {
00095     // If field is "*", then we got a match
00096     if ((tag->len == 1) && (tag->str[0] == '*'))
00097       return true;
00098 
00099     n = 0;
00100 
00101     if (((int) (tag->len - n) == etag_length) && (strncmp(etag_start, tag->str + n, etag_length) == 0)) {
00102       return true;
00103     }
00104   }
00105 
00106   return false;
00107 }
00108 
00109 /**
00110   Match an etag raw_tag_field with a list of tags in the comma-separated
00111   string field_to_match, using weak rules.
00112 
00113 */
00114 inline static bool
00115 do_strings_match_weakly(const char *raw_tag_field,
00116                         int raw_tag_field_len, const char *comma_sep_tag_list, int comma_sep_tag_list_len)
00117 {
00118   StrList tag_list;
00119   const char *etag_start;
00120   const char *cur_tag;
00121   int etag_length, cur_tag_len;
00122 
00123   // Find the unalterated tag
00124   etag_start = find_etag(raw_tag_field, raw_tag_field_len, &etag_length);
00125 
00126   // Rip the field list into a comma-separated field list
00127   HttpCompat::parse_comma_list(&tag_list, comma_sep_tag_list, comma_sep_tag_list_len);
00128 
00129   for (Str * tag = tag_list.head; tag; tag = tag->next) {
00130     // If field is "*", then we got a match
00131     if ((tag->len == 1) && (tag->str[0] == '*'))
00132       return true;
00133 
00134     // strip off the leading 'W/' and quotation marks from the
00135     // current tag, then compare for equality with above tag.
00136     cur_tag = find_etag(tag->str, tag->len, &cur_tag_len);
00137     if ((cur_tag_len == etag_length) && (strncmp(cur_tag, etag_start, cur_tag_len) == 0))
00138       return true;
00139   }
00140   return false;
00141 }
00142 
00143 inline static bool
00144 is_asterisk(char *s)
00145 {
00146   return ((s[0] == '*') && (s[1] == NUL));
00147 }
00148 
00149 inline static bool
00150 is_empty(char *s)
00151 {
00152   return (s[0] == NUL);
00153 }
00154 
00155 /**
00156   Given a set of alternates, select the best match.
00157 
00158   The current school of thought: quality 1st, freshness 2nd.  Loop through
00159   alternates and find the one with the highest quality factor. Then
00160   determine if it is fresh enough. If not, find the next best match. In
00161   keeping with "quality is job 1", subsequent matches will only be
00162   considered if their quality is equal to the quality of the first match.
00163 
00164   @return index in cache alternates vector.
00165 
00166 */
00167 int
00168 HttpTransactCache::SelectFromAlternates(CacheHTTPInfoVector * cache_vector,
00169                                         HTTPHdr * client_request, CacheLookupHttpConfig * http_config_params)
00170 {
00171   time_t current_age, best_age = NUM_SECONDS_IN_ONE_YEAR;
00172   time_t t_now = 0;
00173   int best_index = -1;
00174   float best_Q = -1.0;
00175   float unacceptable_Q = 0.0;
00176 
00177   int alt_count = cache_vector->count();
00178   if (alt_count == 0) {
00179     return -1;
00180   }
00181 
00182 
00183   Debug("http_match", "[SelectFromAlternates] # alternates = %d", alt_count);
00184   Debug("http_seq", "[SelectFromAlternates] %d alternates for this cached doc", alt_count);
00185   if (diags->on("http_alts")) {
00186     ACQUIRE_PRINT_LOCK()
00187       fprintf(stderr, "[alts] There are %d alternates for this request header.\n", alt_count);
00188     RELEASE_PRINT_LOCK()
00189   }
00190   // used by ICP to bypass this function
00191   if (http_config_params == &global_cache_lookup_config)
00192     return 0;
00193 
00194   if (!client_request->valid()) {
00195     return 0;
00196   }
00197   // so that plugins can make cache reads for http
00198   // docs to check if the doc exists in the cache
00199   if (!client_request->valid()) {
00200     return 0;
00201   }
00202 
00203   for (int i = 0; i < alt_count; i++) {
00204     float Q;
00205     CacheHTTPInfo *obj = cache_vector->get(i);
00206     HTTPHdr *cached_request = obj->request_get();
00207     HTTPHdr *cached_response = obj->response_get();
00208 
00209     if (!(obj->object_key_get() == zero_key)) {
00210       ink_assert(cached_request->valid());
00211       ink_assert(cached_response->valid());
00212 
00213       Q = calculate_quality_of_match(http_config_params, client_request, cached_request, cached_response);
00214 
00215       if (alt_count > 1) {
00216         if (t_now == 0)
00217           t_now = ink_cluster_time();
00218         current_age = HttpTransactHeaders::calculate_document_age(obj->request_sent_time_get(),
00219                                                                   obj->response_received_time_get(),
00220                                                                   cached_response, cached_response->get_date(), t_now);
00221         // Overflow?
00222         if (current_age < 0)
00223           current_age = NUM_SECONDS_IN_ONE_YEAR; // TODO: Should we make a different define for "max cache age" ?
00224       } else {
00225         current_age = (time_t) 0;
00226       }
00227 
00228       if (diags->on("http_alts")) {
00229         fprintf(stderr, "[alts] ---- alternate #%d (Q = %g) has these request/response hdrs:\n", i + 1, Q);
00230         char b[4096];
00231         int used, tmp, offset;
00232         int done;
00233 
00234         offset = 0;
00235         do {
00236           used = 0;
00237           tmp = offset;
00238           done = cached_request->print(b, sizeof(b) - 1, &used, &tmp);
00239           offset += used;
00240           b[used] = '\0';
00241           fprintf(stderr, "%s", b);
00242         } while (!done);
00243 
00244         offset = 0;
00245         do {
00246           used = 0;
00247           tmp = offset;
00248           done = cached_response->print(b, sizeof(b) - 1, &used, &tmp);
00249           offset += used;
00250           b[used] = '\0';
00251           fprintf(stderr, "%s", b);
00252         } while (!done);
00253       }
00254 
00255       if ((Q > best_Q) || ((Q == best_Q) && (current_age <= best_age))) {
00256         best_Q = Q;
00257         best_age = current_age;
00258         best_index = i;
00259       }
00260     }
00261   }
00262   Debug("http_seq", "[SelectFromAlternates] Chosen alternate # %d", best_index);
00263   if (diags->on("http_alts")) {
00264     ACQUIRE_PRINT_LOCK()
00265       fprintf(stderr, "[alts] and the winner is alternate number %d\n", best_index + 1);
00266     RELEASE_PRINT_LOCK()
00267   }
00268 
00269   if ((best_index != -1) && (best_Q > unacceptable_Q)) {
00270     return best_index;
00271   } else {
00272     return -1;
00273   }
00274 }
00275 
00276 /**
00277   For cached req/res and incoming req, return quality of match.
00278 
00279   The current school of thought: quality 1st, freshness 2nd.  This
00280   function takes a user agent request client_request and the two headers
00281   for a cached object (obj_client_request and obj_origin_server_response),
00282   and returns a floating point number for how well the object matches
00283   the client's request.
00284 
00285   Two factors currently affect a match: Accept headers, which filter and
00286   sort the matches, and Vary headers, which constrain whether a dynamic
00287   document matches a request.
00288 
00289   Note: According to the specs, specific matching takes precedence over
00290   wildcard matching. For example, listed in precedence: text/html;q=0.5,
00291   text/ascii, image/'*', '*'/'*'. So, ideally, in choosing between
00292   alternates, we should given preference to those which matched
00293   specifically over those which matched with wildcards.
00294 
00295   @return quality (-1: no match, 0..1: poor..good).
00296 
00297 */
00298 float
00299 HttpTransactCache::calculate_quality_of_match(CacheLookupHttpConfig * http_config_param,
00300                                               HTTPHdr * client_request,
00301                                               HTTPHdr * obj_client_request,
00302                                               HTTPHdr * obj_origin_server_response)
00303 {
00304   // For PURGE requests, any alternate is good really.
00305   if (client_request->method_get_wksidx() == HTTP_WKSIDX_PURGE)
00306     return (float)1.0;
00307 
00308   // Now calculate a quality based on all sorts of logic
00309   float q[4], Q;
00310   MIMEField *accept_field;
00311   MIMEField *cached_accept_field;
00312   MIMEField *content_field;
00313 
00314   // vary_skip_mask is used as a bitmask, 0b01 or 0b11 depending on the presence of Vary.
00315   // This allows us to AND each of the four configs against it; Table:
00316   //
00317   //   Conf   Mask          Conf   Mask         Conf   Mask
00318   //   ----   ----          ----   ----         ----   ----
00319   //    00  &  01 == false   01  &  01 == true   10  &  01 == false
00320   //    00  &  11 == false   01  &  11 == true   10  &  11 == true
00321   //
00322   // A true value means the check for that config can be skipped. Note: from a users
00323   // perspective, the configs are simply 0, 1 or 2.
00324   unsigned int vary_skip_mask = obj_origin_server_response->presence(MIME_PRESENCE_VARY) ? 1 : 3;
00325 
00326   // Make debug output happy
00327   q[1] = (q[2] = (q[3] = -2.0));
00328 
00329   // This content_field is used for a couple of headers, so get it first
00330   content_field = obj_origin_server_response->field_find(MIME_FIELD_CONTENT_TYPE, MIME_LEN_CONTENT_TYPE);
00331 
00332   // Accept: header
00333   if (http_config_param->ignore_accept_mismatch & vary_skip_mask) {
00334     // Ignore it
00335     q[0] = 1.0;
00336   } else {
00337     accept_field = client_request->field_find(MIME_FIELD_ACCEPT, MIME_LEN_ACCEPT);
00338 
00339     // A NULL Accept or a NULL Content-Type field are perfect matches.
00340     if (content_field == NULL || accept_field == NULL) {
00341       q[0] = 1.0; // TODO: Why should this not be 1.001 ?? // leif
00342     } else {
00343       q[0] = calculate_quality_of_accept_match(accept_field, content_field);
00344     }
00345   }
00346 
00347   if (q[0] >= 0.0) {
00348     // Accept-Charset: header
00349     if (http_config_param->ignore_accept_charset_mismatch & vary_skip_mask) {
00350       // Ignore it
00351       q[1] = 1.0;
00352     } else {
00353       accept_field = client_request->field_find(MIME_FIELD_ACCEPT_CHARSET, MIME_LEN_ACCEPT_CHARSET);
00354       cached_accept_field = obj_client_request->field_find(MIME_FIELD_ACCEPT_CHARSET, MIME_LEN_ACCEPT_CHARSET);
00355 
00356       // absence in both requests counts as exact match
00357       if (accept_field == NULL && cached_accept_field == NULL) {
00358         Debug("http_alternate", "Exact match for ACCEPT CHARSET (not in request nor cache)");
00359         q[1] = 1.001; //slightly higher weight to this guy
00360       } else {
00361         q[1] = calculate_quality_of_accept_charset_match(accept_field, content_field, cached_accept_field);
00362       }
00363     }
00364 
00365     if (q[1] >= 0.0) {
00366       // Accept-Encoding: header
00367       if (http_config_param->ignore_accept_encoding_mismatch & vary_skip_mask) {
00368         // Ignore it
00369         q[2] = 1.0;
00370       } else {
00371         accept_field = client_request->field_find(MIME_FIELD_ACCEPT_ENCODING, MIME_LEN_ACCEPT_ENCODING);
00372         content_field = obj_origin_server_response->field_find(MIME_FIELD_CONTENT_ENCODING, MIME_LEN_CONTENT_ENCODING);
00373         cached_accept_field = obj_client_request->field_find(MIME_FIELD_ACCEPT_ENCODING, MIME_LEN_ACCEPT_ENCODING);
00374 
00375         // absence in both requests counts as exact match
00376         if (accept_field == NULL && cached_accept_field == NULL) {
00377           Debug("http_alternate", "Exact match for ACCEPT ENCODING (not in request nor cache)");
00378           q[2] = 1.001; //slightly higher weight to this guy
00379         } else {
00380           q[2] = calculate_quality_of_accept_encoding_match(accept_field, content_field, cached_accept_field);
00381         }
00382       }
00383 
00384       if (q[2] >= 0.0) {
00385         // Accept-Language: header
00386         if (http_config_param->ignore_accept_language_mismatch & vary_skip_mask) {
00387           // Ignore it
00388           q[3] = 1.0;
00389         } else {
00390           accept_field = client_request->field_find(MIME_FIELD_ACCEPT_LANGUAGE, MIME_LEN_ACCEPT_LANGUAGE);
00391           content_field = obj_origin_server_response->field_find(MIME_FIELD_CONTENT_LANGUAGE, MIME_LEN_CONTENT_LANGUAGE);
00392           cached_accept_field = obj_client_request->field_find(MIME_FIELD_ACCEPT_LANGUAGE, MIME_LEN_ACCEPT_LANGUAGE);
00393 
00394           // absence in both requests counts as exact match
00395           if (accept_field == NULL && cached_accept_field == NULL) {
00396             Debug("http_alternate", "Exact match for ACCEPT LANGUAGE (not in request nor cache)");
00397             q[3] = 1.001; //slightly higher weight to this guy
00398           } else {
00399             q[3] = calculate_quality_of_accept_language_match(accept_field, content_field, cached_accept_field);
00400           }
00401         }
00402       }
00403     }
00404   }
00405 
00406   // final quality is minimum Q, or -1, if some match failed //
00407   Q = ((q[0] < 0) || (q[1] < 0) || (q[2] < 0) || (q[3] < 0)) ? -1.0 : q[0] * q[1] * q[2] * q[3];
00408 
00409   Debug("http_match", "    CalcQualityOfMatch: Accept match = %g", q[0]);
00410   Debug("http_seq", "    CalcQualityOfMatch: Accept match = %g", q[0]);
00411   Debug("http_alternate", "Content-Type and Accept %f", q[0]);
00412 
00413   Debug("http_match", "    CalcQualityOfMatch: AcceptCharset match = %g", q[1]);
00414   Debug("http_seq", "    CalcQualityOfMatch: AcceptCharset match = %g", q[1]);
00415   Debug("http_alternate", "Content-Type and Accept-Charset %f", q[1]);
00416 
00417   Debug("http_match", "    CalcQualityOfMatch: AcceptEncoding match = %g", q[2]);
00418   Debug("http_seq", "    CalcQualityOfMatch: AcceptEncoding match = %g", q[2]);
00419   Debug("http_alternate", "Content-Encoding and Accept-Encoding %f", q[2]);
00420 
00421   Debug("http_match", "    CalcQualityOfMatch: AcceptLanguage match = %g", q[3]);
00422   Debug("http_seq", "    CalcQualityOfMatch: AcceptLanguage match = %g", q[3]);
00423   Debug("http_alternate", "Content-Language and Accept-Language %f", q[3]);
00424 
00425   Debug("http_alternate", "Mult's Quality Factor: %f", Q);
00426   Debug("http_alternate", "----------End of Alternate----------");
00427 
00428   int force_alt = 0;
00429 
00430   if (Q > 0.0) {
00431     APIHook *hook;
00432     HttpAltInfo info;
00433     float qvalue;
00434 
00435     hook = http_global_hooks->get(TS_HTTP_SELECT_ALT_HOOK);
00436     if (hook) {
00437       info.m_client_req.copy_shallow(client_request);
00438       info.m_cached_req.copy_shallow(obj_client_request);
00439       info.m_cached_resp.copy_shallow(obj_origin_server_response);
00440       qvalue = 1.0;
00441 
00442       while (hook) {
00443         info.m_qvalue = 1.0;
00444         hook->invoke(TS_EVENT_HTTP_SELECT_ALT, &info);
00445         hook = hook->m_link.next;
00446         if (info.m_qvalue < 0.0) {
00447           info.m_qvalue = 0.0;
00448         } else if (info.m_qvalue > 1.0) {
00449           if (info.m_qvalue == FLT_MAX)
00450             force_alt = 1;
00451           info.m_qvalue = 1.0;
00452         }
00453         qvalue *= info.m_qvalue;
00454       }
00455       Q *= qvalue;
00456 
00457       // Clear out any SDK allocated values from the
00458       //   hdr handles
00459       info.m_client_req.clear();
00460       info.m_cached_req.clear();
00461       info.m_cached_resp.clear();
00462     }
00463   }
00464 
00465   if (Q >= 0.0 && !force_alt ) {                 // make sense to check 'variability' only if Q >= 0.0
00466     // set quality to -1, if cached copy would vary for this request //
00467     Variability_t variability = CalcVariability(http_config_param, client_request,
00468                                                 obj_client_request, obj_origin_server_response);
00469 
00470     if (variability != VARIABILITY_NONE) {
00471       Q = -1.0;
00472     }
00473 
00474     Debug("http_match", "    CalcQualityOfMatch: CalcVariability says variability = %d",
00475           (variability != VARIABILITY_NONE));
00476     Debug("http_seq", "    CalcQualityOfMatch: CalcVariability says variability = %d",
00477           (variability != VARIABILITY_NONE));
00478     Debug("http_match", "    CalcQualityOfMatch: Returning final Q = %g", Q);
00479     Debug("http_seq", "    CalcQualityOfMatch: Returning final Q = %g", Q);
00480   }
00481 
00482   return Q;
00483 }
00484 
00485 /**
00486   Match request Accept with response Content-Type.
00487 
00488   If the Accept field mime-type value is *, do not attempt to match,
00489   but note the q value for the wildcard match. If the type is not *,
00490   but the subtype is * and the Accept type and Content type match,
00491   again do not attempt to match, but note the q value. If neither of
00492   these two cases, match, keeping track of the highest q value for the
00493   matches. At the end of the loop over the Accept header field values,
00494   if the highest q value is -1.0 (there was no specific match), if there
00495   was a wildcard subtype match, set the q value to the wildcard subtype q
00496   value. If there is still no match, and there is a wildcard type match,
00497   set the q value to the wildcard type q value.
00498 
00499   We allow no Content-Type headers in responses to match with quality 1.0.
00500 
00501   @return quality (-1: no match, 0..1: poor..good).
00502 
00503 */
00504 static inline bool
00505 do_content_types_match(char *type1, char *subtype1, char *type2, char *subtype2)
00506 {
00507   return ((is_asterisk(type1) ||
00508            is_empty(type1) ||
00509            (strcasecmp(type1, type2) == 0)) &&
00510           (is_asterisk(subtype1) || is_empty(subtype1) || (strcasecmp(subtype1, subtype2) == 0)));
00511 }
00512 
00513 float
00514 HttpTransactCache::calculate_quality_of_accept_match(MIMEField * accept_field, MIMEField * content_field)
00515 {
00516   float q = -1.0;
00517   const char *c_raw, *a_raw;
00518   int c_raw_len, a_raw_len;
00519   char c_type[32], c_subtype[32];
00520   Str *a_value;
00521   StrList c_param_list, a_values_list;
00522   bool wildcard_type_present = false;
00523   bool wildcard_subtype_present = false;
00524   float wildcard_type_q = 1.0;
00525   float wildcard_subtype_q = 1.0;
00526 
00527   ink_assert((accept_field != NULL) && (content_field != NULL));
00528 
00529   // Extract the content-type field value before the semicolon.
00530   // This has to be done just once because assuming single
00531   // content-type in document. If more than one content
00532   // type, will have to do as in content-language, content-
00533   // encoding matching where we loop over both accept and
00534   // content-type fields.
00535 
00536   c_raw = content_field->value_get(&c_raw_len);
00537   HttpCompat::parse_semicolon_list(&c_param_list, c_raw, c_raw_len);
00538   Str *c_param = c_param_list.head;
00539 
00540   if (!c_param) {
00541     return (1.0);
00542   }
00543   // Parse the type and subtype of the Content-Type field.
00544   HttpCompat::parse_mime_type(c_param->str, c_type, c_subtype, sizeof(c_type), sizeof(c_subtype));
00545 
00546   // Now loop over Accept field values.
00547   // TODO: Should we check the return value (count) from this?
00548   accept_field->value_get_comma_list(&a_values_list);
00549 
00550   for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
00551     // Get the raw string to the current comma-sep Accept field value
00552     a_raw = a_value->str;
00553     a_raw_len = a_value->len;
00554 
00555     // Extract the field value before the semicolon
00556     StrList a_param_list;
00557     HttpCompat::parse_semicolon_list(&a_param_list, a_raw, a_raw_len);
00558 
00559     // Read the next type/subtype media-range
00560     Str *a_param = a_param_list.head;
00561     if (!a_param)
00562       continue;
00563 
00564     // Parse the type and subtype of the Accept field
00565     char a_type[32], a_subtype[32];
00566     HttpCompat::parse_mime_type(a_param->str, a_type, a_subtype, sizeof(a_type), sizeof(a_subtype));
00567 
00568 //      printf("matching Content-type; '%s/%s' with Accept value '%s/%s'\n",
00569 //             c_type,c_subtype,a_type,a_subtype);
00570 
00571     // Is there a wildcard in the type or subtype?
00572     if (is_asterisk(a_type)) {
00573       wildcard_type_present = true;
00574       wildcard_type_q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
00575     } else if (is_asterisk(a_subtype) && (strcasecmp(a_type, c_type) == 0)) {
00576       wildcard_subtype_present = true;
00577       wildcard_subtype_q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
00578     } else {
00579 
00580       // No wildcard. Do explicit matching of accept and content values.
00581       if (do_content_types_match(a_type, a_subtype, c_type, c_subtype)) {
00582 
00583         float tq;
00584         tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
00585         q = (tq > q ? tq : q);
00586       }
00587     }
00588   }
00589 
00590   // At this point either there is an explicit match, in
00591   // which case q will not be -1.0 and will be returned.
00592   // If there was no explicit match, but the accept field
00593   // had wildcards, return the wildcard match q value.
00594 
00595   // No explicit match, but wildcard subtype match
00596   if ((q == -1.0) && (wildcard_subtype_present == true)) {
00597     q = wildcard_subtype_q;
00598   }
00599   // No explicit match, but wildcard type match.
00600   if ((q == -1.0) && (wildcard_type_present == true)) {
00601     q = wildcard_type_q;
00602   }
00603   return (q);
00604 }
00605 
00606 /**
00607   Match request Accept-Charset with response Content-Type.
00608 
00609   Extract the response charset from the Content-Type field - the charset
00610   is after the semicolon. Loop through the charsets in the request's
00611   Accept-Charset field. If the Accept-Charset value is a wildcard, do not
00612   attempt to match. Otherwise match and note the highest q value. If after
00613   the loop the q value is -1, indicating no match, then if Accept-Charset
00614   had a wildcard, allow it to match - setting q to the wildcard q value.
00615   If there is still no match and the Content-Type was the default charset,
00616   allow a match with a q value of 1.0.
00617 
00618   We allow no Content-Type headers in responses to match with quality 1.0.
00619 
00620   @return quality (-1: no match, 0..1: poor..good).
00621 
00622 */
00623 static inline bool
00624 does_charset_match(char *charset1, char *charset2)
00625 {
00626   return (is_asterisk(charset1) || is_empty(charset1) || (strcasecmp(charset1, charset2) == 0));
00627 }
00628 
00629 
00630 float
00631 HttpTransactCache::calculate_quality_of_accept_charset_match(MIMEField * accept_field,
00632                                                              MIMEField * content_field, MIMEField * cached_accept_field)
00633 {
00634   float q = -1.0;
00635   const char *c_raw, *a_raw, *ca_raw;
00636   int c_raw_len, a_raw_len, ca_raw_len;
00637   StrList a_values_list;
00638   Str *a_value;
00639   char c_charset[128];
00640   char *a_charset;
00641   int a_charset_len;
00642   const char *default_charset = "utf-8";
00643   bool wildcard_present = false;
00644   float wildcard_q = 1.0;
00645 
00646   // prefer exact matches
00647   if (accept_field && cached_accept_field) {
00648     a_raw = accept_field->value_get(&a_raw_len);
00649     ca_raw = cached_accept_field->value_get(&ca_raw_len);
00650     if (a_raw && ca_raw && a_raw_len == ca_raw_len && !strncmp(a_raw, ca_raw, a_raw_len)) {
00651       Debug("http_alternate", "Exact match for ACCEPT CHARSET");
00652       return (float) 1.001;     //slightly higher weight to this guy
00653     }
00654   }
00655   // return match if either ac or ct is missing
00656   // this check is different from accept-encoding
00657   if (accept_field == NULL || content_field == NULL) {
00658     return (float) 1.0;
00659   }
00660   // get the charset of this content-type //
00661   c_raw = content_field->value_get(&c_raw_len);
00662   if (!HttpCompat::lookup_param_in_semicolon_string(c_raw, c_raw_len, "charset", c_charset, sizeof(c_charset) - 1)) {
00663     ink_strlcpy(c_charset, default_charset, sizeof(c_charset));
00664   }
00665   // Now loop over Accept-Charset field values.
00666   // TODO: Should we check the return value (count) from this?
00667   accept_field->value_get_comma_list(&a_values_list);
00668 
00669   for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
00670     // Get the raw string to the current comma-sep Accept-Charset field value
00671     a_raw = a_value->str;
00672     a_raw_len = a_value->len;
00673 
00674     // Extract the field value before the semicolon
00675     StrList a_param_list(true); // FIXME: copies & NUL-terminates strings
00676     HttpCompat::parse_semicolon_list(&a_param_list, a_raw, a_raw_len);
00677 
00678     if (a_param_list.head) {
00679       a_charset = (char *) a_param_list.head->str;
00680       a_charset_len = a_param_list.head->len;
00681     } else
00682       continue;
00683 
00684 //      printf("matching Content-type; '%s' with Accept-Charset value '%s'\n",
00685 //             c_charset,a_charset);
00686 
00687     // dont match wildcards //
00688     if ((a_charset_len == 1) && (a_charset[0] == '*')) {
00689       wildcard_present = true;
00690       wildcard_q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
00691     } else {
00692       // if type matches, get the Q factor //
00693       if (does_charset_match(a_charset, c_charset)) {
00694         float tq;
00695         tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
00696         q = (tq > q ? tq : q);
00697       }
00698     }
00699   }
00700 
00701   // if no match and wildcard present, allow match //
00702   if ((q == -1.0) && (wildcard_present == true)) {
00703     q = wildcard_q;
00704   }
00705   // if no match, still allow default_charset //
00706   if ((q == -1) && (strcasecmp(c_charset, default_charset) == 0)) {
00707     q = 1.0;
00708   }
00709   return (q);
00710 }
00711 
00712 /**
00713   Match request Accept-Encoding with response Content-Encoding.
00714 
00715   First determine if the cached document has identity encoding. This
00716   can be the case if the document has no Content-Encoding header field
00717   or if the Content-Encoding field explicitly lists "identity". Then,
00718   if there is no Accept-Encoding header and the cached response uses
00719   identity encoding return a match. If there is no Accept-Encoding header
00720   and the cached document uses some other form of encoding, also return
00721   a match, albeit one with a slightly lower q value (0.999).
00722 
00723   If none of the above cases occurs, compare Content-Encoding with
00724   Accept-Encoding, by looping over the Content-Encoding values (there
00725   may be more than one, since a document may be gzipped, followed by
00726   compressed, etc.). If any of the Content-Encoding values are not in
00727   the Accept-Encoding header, exit the loop. Before exiting, if there
00728   has not been a match, match a wildcard in the Accept-Encoding field
00729   and if still no match, match an identity encoding - this may happen
00730   if the request did not list "identity" in the Accept-Encoding field,
00731   but the response listed it in the Content-Encoding field. In this last
00732   case, match with a q value of 0.001.
00733 
00734   The return values are:
00735     - -1.0: Doesn't match
00736     - 0.999: No Accept-Encoding header, and Content-Encoding does not list
00737       "identity".
00738     - 0.001: Accept-Encoding was not empty, but Content-Encoding was
00739       either empty or explicitly listed "identity".
00740     - 0.0..1.0: Matches with a quality between 0 (poor) and 1 (good).
00741 
00742   @return quality (-1: no match, 0..1: poor..good).
00743 
00744 */
00745 static inline bool
00746 does_encoding_match(char *enc1, const char *enc2)
00747 {
00748   if (is_asterisk(enc1) || ((strcasecmp(enc1, enc2)) == 0))
00749     return true;
00750 
00751   //rfc2616,sec3.5: applications SHOULD consider "x-gzip" and "x-compress" to be
00752   //                equivalent to "gzip" and "compress" respectively
00753   if ((!strcasecmp(enc1, "gzip") && !strcasecmp(enc2, "x-gzip")) ||
00754       (!strcasecmp(enc1, "x-gzip") && !strcasecmp(enc2, "gzip")) ||
00755       (!strcasecmp(enc1, "compress") && !strcasecmp(enc2, "x-compress")) ||
00756       (!strcasecmp(enc1, "x-compress") && !strcasecmp(enc2, "compress"))
00757     ) {
00758     return true;
00759   }
00760 
00761   return false;
00762 }
00763 
00764 ContentEncoding
00765 HttpTransactCache::match_gzip(MIMEField * accept_field)
00766 {
00767   Str *a_value;
00768   const char *a_raw;
00769   StrList a_values_list;
00770   if (!accept_field) {
00771     return NO_GZIP;
00772   }
00773   // TODO: Should we check the return value (count) here?
00774   accept_field->value_get_comma_list(&a_values_list);
00775 
00776   for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
00777     char *a_encoding = NULL;
00778     StrList a_param_list;
00779     a_raw = a_value->str;
00780     HttpCompat::parse_semicolon_list(&a_param_list, a_raw);
00781     if (a_param_list.head)
00782       a_encoding = (char *) a_param_list.head->str;
00783     else
00784       continue;
00785     float q;
00786     q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
00787     if (q != 0 && does_encoding_match(a_encoding, "gzip")) {
00788       return GZIP;
00789     }
00790   }
00791   return NO_GZIP;
00792 }
00793 
00794 // TODO: This used to take a length for c_raw, but that was never used, so removed it from the prototype.
00795 static inline bool
00796 match_accept_content_encoding(const char *c_raw,
00797                               MIMEField * accept_field, bool * wildcard_present, float *wildcard_q, float *q)
00798 {
00799   Str *a_value;
00800   const char *a_raw;
00801   StrList a_values_list;
00802 
00803   if (!accept_field) {
00804     return false;
00805   }
00806   // loop over Accept-Encoding elements, looking for match //
00807   // TODO: Should we check the return value (count) here?
00808   accept_field->value_get_comma_list(&a_values_list);
00809 
00810   for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
00811     char *a_encoding = NULL;
00812     StrList a_param_list;
00813 
00814     // Get the raw string to the current comma-sep Accept-Charset field value
00815     a_raw = a_value->str;
00816 
00817     // break Accept-Encoding piece into semi-colon separated parts //
00818     HttpCompat::parse_semicolon_list(&a_param_list, a_raw);
00819     if (a_param_list.head)
00820       a_encoding = (char *) a_param_list.head->str;
00821     else
00822       continue;
00823 
00824     if (is_asterisk(a_encoding)) {
00825       *wildcard_present = true;
00826       *wildcard_q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
00827       return true;
00828     } else if (does_encoding_match(a_encoding, c_raw)) {
00829       // if type matches, get the Q factor //
00830       float tq;
00831       tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
00832       *q = (tq > *q ? tq : *q);
00833 
00834       return true;
00835     } else {
00836       // so this c_raw value did not match this a_raw value. big deal.
00837     }
00838   }
00839   return false;
00840 }
00841 
00842 float
00843 HttpTransactCache::calculate_quality_of_accept_encoding_match(MIMEField * accept_field,
00844                                                               MIMEField * content_field,
00845                                                               MIMEField * cached_accept_field)
00846 {
00847 
00848   float q = -1.0;
00849   bool is_identity_encoding = false;
00850   const char *c_encoding;
00851   int c_encoding_len;
00852   bool wildcard_present = false;
00853   float wildcard_q = 1.0;
00854   StrList c_values_list;
00855   Str *c_value;
00856   const char *a_raw, *ca_raw;
00857   int a_raw_len, ca_raw_len;
00858 
00859 
00860   // prefer exact matches
00861   if (accept_field && cached_accept_field) {
00862     a_raw = accept_field->value_get(&a_raw_len);
00863     ca_raw = cached_accept_field->value_get(&ca_raw_len);
00864     if (a_raw && ca_raw && a_raw_len == ca_raw_len && !strncmp(a_raw, ca_raw, a_raw_len)) {
00865       Debug("http_alternate", "Exact match for ACCEPT ENCODING");
00866       return (float) 1.001;     //slightly higher weight to this guy
00867     }
00868   }
00869   // return match if both ae and ce are missing
00870   // this check is different from accept charset
00871   if (accept_field == NULL && content_field == NULL) {
00872     return (float) 1.0;
00873   }
00874   // if no Content-Encoding, treat as "identity" //
00875   if (!content_field) {
00876     Debug("http_match", "[calculate_quality_accept_encoding_match]: " "response hdr does not have content-encoding.");
00877     is_identity_encoding = true;
00878   } else {
00879     // TODO: Should we check the return value (count) here?
00880     content_field->value_get_comma_list(&c_values_list);
00881 
00882     content_field->value_get(&c_encoding_len);
00883     if (c_encoding_len == 0) {
00884       is_identity_encoding = true;
00885     } else {
00886       // does this document have the identity encoding? //
00887       for (c_value = c_values_list.head; c_value; c_value = c_value->next) {
00888         c_encoding = c_value->str;
00889         c_encoding_len = c_value->len;
00890         if ((c_encoding_len >= 8) && (strncasecmp(c_encoding, "identity", 8) == 0)) {
00891           is_identity_encoding = true;
00892           break;
00893         }
00894       }
00895     }
00896   }
00897 
00898   ///////////////////////////////////////////////////////////////////////
00899   // if no Accept-Encoding header, only match identity                 //
00900   //   The 1.1 spec says servers MAY assume that clients will accept   //
00901   //   any encoding if no header is sent.  Unforntunately, this does   //
00902   //   not work 1.0 clients & is particularly thorny when the proxy    //
00903   //   created the enconding as the result of a transform.  Http 1.1   //
00904   //   purists would say that if proxy encodes something it's really   //
00905   //   a transfer-encoding and not a content-encoding but again this   //
00906   //   causes problems with 1.0 clients                                //
00907   ///////////////////////////////////////////////////////////////////////
00908   if (!accept_field) {
00909     if (is_identity_encoding) {
00910       if (!cached_accept_field) {
00911         return ((float) 1.0);
00912       } else {
00913         return ((float) 0.001);
00914       }
00915     } else {
00916       return ((float) -1.0);
00917     }
00918   }
00919 
00920   // handle special case where no content-encoding in response, but
00921   // request has an accept-encoding header, possibly with the identity
00922   // field, with a q value;
00923   if (!content_field) {
00924     if (!match_accept_content_encoding("identity",
00925                                        accept_field, &wildcard_present, &wildcard_q, &q)) {
00926 
00927       // CE was not returned, and AE does not have identity
00928       if (match_gzip(accept_field) == GZIP && match_gzip(cached_accept_field) == GZIP) {
00929         return (float) 1.0;
00930       }
00931       goto encoding_wildcard;
00932     }
00933     //use q from identity match
00934 
00935   } else {
00936     // "Accept-encoding must correctly handle multiple content encoding"
00937     // The combined quality factor is the product of all quality factors.
00938     // (Note that there may be other possible choice, eg, min(),
00939     // but I think multiplication is the best.)
00940     // For example, if "content-encoding: a, b", and quality factors
00941     // of a and b (in accept-encoding header) are q_a and q_b, resp,
00942     // then the combined quality factor is (q_a * q_b).
00943     // If any one of the content-encoding is not matched,
00944     // then the q value will not be changed.
00945     float combined_q = 1.0;
00946     for (c_value = c_values_list.head; c_value; c_value = c_value->next) {
00947       float this_q = -1.0;
00948       if (!match_accept_content_encoding(c_value->str,
00949                                          accept_field, &wildcard_present, &wildcard_q, &this_q)) {
00950         goto encoding_wildcard;
00951       }
00952       combined_q *= this_q;
00953     }
00954     q = combined_q;
00955   }
00956 
00957 encoding_wildcard:
00958   // match the wildcard now //
00959   if ((q == -1.0) && (wildcard_present == true)) {
00960     q = wildcard_q;
00961   }
00962   /////////////////////////////////////////////////////////////////////////
00963   // there was an Accept-Encoding, but it didn't match anything, at      //
00964   // any quality level --- if this is an identity-coded document, that's //
00965   // still okay, but otherwise, this is just not a match at all.         //
00966   /////////////////////////////////////////////////////////////////////////
00967   if ((q == -1.0) && is_identity_encoding) {
00968     if (match_gzip(accept_field) == GZIP) {
00969       if (match_gzip(cached_accept_field) == GZIP) {
00970         return (float) 1.0;
00971       } else {
00972         // always try to fetch GZIP content if we have not tried sending AE before
00973         return (float) -1.0;
00974       }
00975     } else if (cached_accept_field && match_gzip(cached_accept_field) != GZIP) {
00976       return (float) 0.001;
00977     } else {
00978       return (float) -1.0;
00979     }
00980   }
00981 //      q = (float)-1.0;
00982   return (q);
00983 }
00984 
00985 /**
00986   Match request Accept-Language with response Content-Language.
00987 
00988   Language matching is a little more complicated because of "ranges".
00989   First, no Accept-Language header or no Content-Language headers match
00990   with q of 1. Otherwise, loop over Content-Languages. If there is a
00991   match with a language in the Accept-Language field, keep track of
00992   how many characters were in the value. The q value for the longest
00993   range is returned. If there was no explicit match or a mismatch,
00994   try wildcard matching.
00995 
00996   @return quality (-1: no match, 0..1: poor..good).
00997 
00998 */
00999 static inline bool
01000 does_language_range_match(const char *range1, const char *range2)
01001 {
01002   while (*range1 && *range2 && (ParseRules::ink_tolower(*range1) == ParseRules::ink_tolower(*range2))) {
01003     range1 += 1;
01004     range2 += 1;
01005   }
01006 
01007   // matches if range equals tag, or if range is a lang prefix of tag
01008   if ((((*range1 == NUL) && (*range2 == NUL)) || ((*range1 == NUL) && (*range2 == '-')))) {
01009     return true;
01010   }
01011 
01012   return false;
01013 }
01014 
01015 static inline bool
01016 match_accept_content_language(const char *c_raw,
01017                               MIMEField * accept_field,
01018                               bool * wildcard_present,
01019                               float *wildcard_q, float *q, int *a_range_length)
01020 {
01021   const char *a_raw;
01022   int a_raw_len;
01023   StrList a_values_list;
01024   Str *a_value;
01025 
01026   ink_assert(accept_field != NULL);
01027 
01028   // loop over each language-range pattern //
01029   // TODO: Should we check the return value (count) here?
01030   accept_field->value_get_comma_list(&a_values_list);
01031 
01032   for (a_value = a_values_list.head; a_value; a_value = a_value->next) {
01033     a_raw = a_value->str;
01034     a_raw_len = a_value->len;
01035 
01036     char *a_range;
01037     StrList a_param_list;
01038 
01039     HttpCompat::parse_semicolon_list(&a_param_list, a_raw, a_raw_len);
01040     float tq = HttpCompat::find_Q_param_in_strlist(&a_param_list);
01041 
01042     /////////////////////////////////////////////////////////////////////
01043     // This algorithm is a bit wierd --- the resulting Q factor is     //
01044     // the Q value corresponding to the LONGEST range field that       //
01045     // matched, or if none matched, then the Q value of any asterisk.  //
01046     // Also, if the lang value is "", meaning that no Content-Language //
01047     // was specified, this document matches all accept headers.        //
01048     /////////////////////////////////////////////////////////////////////
01049     if (a_param_list.head) {
01050       a_range = (char *) a_param_list.head->str;
01051       *a_range_length = a_param_list.head->len;
01052     } else {
01053       continue;
01054     }
01055 
01056     if (is_asterisk(a_range)) {
01057       *wildcard_present = true;
01058       *wildcard_q = HttpCompat::find_Q_param_in_strlist(&a_param_list);
01059       return true;
01060     } else if (does_language_range_match(a_range, c_raw)) {
01061       *q = tq;
01062 // This is disabled, so removed max_a_range_length from prototype
01063 //          if (*a_range_length > *max_a_range_length) {
01064 //              *q = tq;
01065 //              *max_a_range_length = *a_range_length;
01066 //          }
01067       return true;
01068     } else {
01069     }
01070   }
01071 
01072   return false;
01073 }
01074 
01075 
01076 // FIX: This code is icky, and i suspect wrong in places, particularly
01077 //      beacuse parts of match_accept_content_language are commented out.
01078 //      It looks like lots of hacks were done.  The code should probably
01079 //      be updated to use the code in HttpCompat::match_accept_language.
01080 
01081 float
01082 HttpTransactCache::calculate_quality_of_accept_language_match(MIMEField * accept_field,
01083                                                               MIMEField * content_field,
01084                                                               MIMEField * cached_accept_field)
01085 {
01086   float q = -1.0;
01087   int a_range_length;
01088   bool wildcard_present = false;
01089   float wildcard_q = 1.0;
01090   float min_q = 1.0;
01091   bool match_found = false;
01092   StrList c_values_list;
01093   Str *c_value;
01094   const char *c_raw, *a_raw, *ca_raw;
01095   int a_raw_len, ca_raw_len;
01096 
01097   // Bug 2393700 prefer exact matches
01098   if (accept_field && cached_accept_field) {
01099     a_raw = accept_field->value_get(&a_raw_len);
01100     ca_raw = cached_accept_field->value_get(&ca_raw_len);
01101     if (a_raw && ca_raw && a_raw_len == ca_raw_len && !strncmp(a_raw, ca_raw, a_raw_len)) {
01102       Debug("http_alternate", "Exact match for ACCEPT LANGUAGE");
01103       return (float) 1.001;     //slightly higher weight to this guy
01104     }
01105   }
01106 
01107   if (!accept_field) {
01108     return (1.0);
01109   }
01110   // handle special case where no content-language in response, but
01111   // request has an accept-language header, possibly with the identity
01112   // field, with a q value;
01113 
01114   if (!content_field) {
01115     if (match_accept_content_language("identity",
01116                                       accept_field,
01117                                       &wildcard_present, &wildcard_q, &q, &a_range_length)) {
01118       goto language_wildcard;
01119     }
01120     Debug("http_match", "[calculate_quality_accept_language_match]: " "response hdr does not have content-language.");
01121     return (1.0);
01122   }
01123 
01124   // loop over content languages //
01125   // TODO: Should we check the return value (count) here?
01126   content_field->value_get_comma_list(&c_values_list);
01127   for (c_value = c_values_list.head; c_value; c_value = c_value->next) {
01128     c_raw = c_value->str;
01129 
01130     // get Content-Language value //
01131     if (match_accept_content_language(c_raw,
01132                                       accept_field,
01133                                       &wildcard_present, &wildcard_q, &q, &a_range_length)) {
01134       min_q = (min_q < q ? min_q : q);
01135       match_found = true;
01136     }
01137   }
01138   if (match_found) {
01139     q = min_q;
01140   } else {
01141     q = -1.0;
01142   }
01143 
01144 language_wildcard:
01145   // match the wildcard now //
01146   if ((q == -1.0) && (wildcard_present == true)) {
01147     q = wildcard_q;
01148   }
01149   return (q);
01150 }
01151 
01152 /**
01153   If the cached object contains a Vary header, then the object only
01154   matches if ALL of the headers named in Vary are present in the new
01155   request, and these match the headers in the stored request.  We relax
01156   this rule to allow matches if neither the current nor original client
01157   headers contained a varying header. This is different from what is
01158   stated in the specs.
01159 
01160 */
01161 Variability_t
01162 HttpTransactCache::CalcVariability(CacheLookupHttpConfig * http_config_params, HTTPHdr * client_request,
01163                                    HTTPHdr * obj_client_request, HTTPHdr * obj_origin_server_response)
01164 {
01165   ink_assert(http_config_params != NULL);
01166   ink_assert(client_request != NULL);
01167   ink_assert(obj_client_request != NULL);
01168   ink_assert(obj_origin_server_response != NULL);
01169 
01170   Variability_t variability = VARIABILITY_NONE;
01171   if (http_config_params->cache_enable_default_vary_headers ||
01172       obj_origin_server_response->presence(MIME_PRESENCE_VARY)) {
01173     ///////////////////////////////////////////////////////////////////////
01174     // If the origin server sent a Vary header in the response, use that //
01175     // Vary, otherwise use the default. Ivry adds: However if the origin //
01176     // server was a non-compliant 1.1 and did not send a Vary header,    //
01177     // treat as 1.0 with no Vary header.                                 //
01178     ///////////////////////////////////////////////////////////////////////
01179     StrList vary_list;
01180     int num_vary_values = obj_origin_server_response->value_get_comma_list(MIME_FIELD_VARY, MIME_LEN_VARY, &vary_list);
01181 
01182     if (num_vary_values <= 0) {   // no vary hdr, so use defaults if enabled
01183       const char *vary_values = NULL;
01184       const char *content_type;
01185       int content_type_len;
01186       char type[32], subtype[32];
01187 
01188       content_type = obj_origin_server_response->value_get(MIME_FIELD_CONTENT_TYPE,
01189                                                            MIME_LEN_CONTENT_TYPE, &content_type_len);
01190 
01191       if (content_type) {
01192         HttpCompat::parse_mime_type_with_len(content_type, content_type_len, type, subtype, sizeof(type),
01193                                              sizeof(subtype));
01194       } else {
01195         type[0] = '\0';
01196         subtype[0] = '\0';
01197       }
01198 
01199       Debug("http_match", "      type = '%s', subtype = '%s'", type, subtype);
01200 
01201       if (http_config_params->cache_enable_default_vary_headers) {
01202         if (strcasecmp(type, "text") == 0) {
01203           Debug("http_match", "      Using default text vary headers");
01204           vary_values = http_config_params->cache_vary_default_text;
01205         } else if (strcasecmp(type, "image") == 0) {
01206           Debug("http_match", "      Using default image vary headers");
01207           vary_values = http_config_params->cache_vary_default_images;
01208         } else {
01209           Debug("http_match", "      Using default other vary headers");
01210           vary_values = http_config_params->cache_vary_default_other;
01211         }
01212       }
01213       // convert the comma-sep string from the config var into a list
01214       HttpCompat::parse_comma_list(&vary_list, (vary_values ? vary_values : ""));
01215     }
01216 
01217     if (is_debug_tag_set("http_match") && (vary_list.head)) {
01218       Debug("http_match", "Vary list of %d elements", vary_list.count);
01219       vary_list.dump(stderr);
01220     }
01221 
01222     // for each field that varies, see if current & original hdrs match //
01223     for (Str *field = vary_list.head; field != NULL; field = field->next) {
01224       if (field->len == 0)
01225         continue;
01226 
01227       /////////////////////////////////////////////////////////////
01228       // If the field name is unhandled, we should probably do a //
01229       // string comparison on the values of this extension field //
01230       // but currently we just treat it equivalent to a '*'.     //
01231       /////////////////////////////////////////////////////////////
01232 
01233       Debug("http_match", "Vary: %s", field->str);
01234       if (((field->str[0] == '*') && (field->str[1] == NUL))) {
01235         Debug("http_match", "Wildcard variability --- object not served from cache\n");
01236         variability = VARIABILITY_ALL;
01237         break;
01238       }
01239       ////////////////////////////////////////////////////////////////////////////////////////
01240       // Special case: if 'proxy.config.http.global_user_agent_header' set                  //
01241       // we should ignore Vary: User-Agent even if 'proxy.config.cache.vary_on_user_agent'  //
01242       // is 1. Actually the 'proxy.config.cache.vary_on_user_agent' is useless in such case //
01243       ///////////////////////////////////////////////////////////////////////////////////////
01244       if (http_config_params->cache_global_user_agent_header &&
01245           !strcasecmp((char *) field->str, "User-Agent"))
01246         continue;
01247 
01248       // Disable Vary mismatch checking for Accept-Encoding.  This is only safe to
01249       // set if you are promising to fix any Accept-Encoding/Content-Encoding mismatches.
01250       if (http_config_params->ignore_accept_encoding_mismatch && 
01251           !strcasecmp((char *) field->str, "Accept-Encoding"))
01252         continue;
01253 
01254       ///////////////////////////////////////////////////////////////////
01255       // Take the current vary field and look up the headers in        //
01256       // the current client, and the original client.  The cached      //
01257       // object varies unless BOTH the current client and the original //
01258       // client contain the header, and the header values are equal.   //
01259       // We relax this to allow a match if NEITHER have the header.    //
01260       //                                                               //
01261       // While header "equality" appears to be header-specific, the    //
01262       // RFC2068 spec implies that matching only needs to account for  //
01263       // differences in whitespace and support for multiple headers    //
01264       // with the same name.  Case is presumably also insignificant.   //
01265       // Other variations (such as q=1 vs. a field with no q factor)   //
01266       // mean that the values DO NOT match.                            //
01267       ///////////////////////////////////////////////////////////////////
01268 
01269       ink_assert(strlen(field->str) == field->len);
01270 
01271       char *field_name_str = (char *) hdrtoken_string_to_wks(field->str, field->len);
01272       if (field_name_str == NULL)
01273         field_name_str = (char *) field->str;
01274 
01275       MIMEField *cached_hdr_field = obj_client_request->field_find(field_name_str, field->len);
01276       MIMEField *current_hdr_field = client_request->field_find(field_name_str, field->len);
01277 
01278       // Header values match? //
01279       if (!HttpCompat::do_header_values_rfc2068_14_43_match(cached_hdr_field, current_hdr_field)) {
01280         variability = VARIABILITY_SOME;
01281         break;
01282       }
01283     }
01284   }
01285 
01286   return variability;
01287 }
01288 
01289 /**
01290   If the request has If-modified-since or If-none-match,
01291   HTTP_STATUS_NOT_MODIFIED is returned if both or the existing one
01292   (if only one exists) fails; otherwise, the response's status code
01293   is returned.
01294 
01295   If the request has If-unmodified-since or If-match,
01296   HTTP_STATUS_PRECONDITION_FAILED is returned if one fails; otherwise,
01297   the response's status code is returned.
01298 
01299   If the request is a RANGE request with If-range,
01300   HTTP_STATUS_RANGE_NOT_SATISFIABLE is returned if the If-range condition
01301   is not satisfied (or fails); that means the document is changed and
01302   the whole document should be returned with 200 status code. Otherwise,
01303   the response's status code is returned.
01304 
01305   @return status code: HTTP_STATUS_NOT_MODIFIED,
01306     HTTP_STATUS_PRECONDITION_FAILED, or HTTP_STATUS_RANGE_NOT_SATISFIABLE.
01307 
01308 */
01309 HTTPStatus
01310 HttpTransactCache::match_response_to_request_conditionals(HTTPHdr * request, HTTPHdr * response)
01311 {
01312   HTTPStatus response_code = HTTP_STATUS_NONE;
01313 
01314   ink_assert(response->status_get() != HTTP_STATUS_NOT_MODIFIED);
01315   ink_assert(response->status_get() != HTTP_STATUS_PRECONDITION_FAILED);
01316   ink_assert(response->status_get() != HTTP_STATUS_RANGE_NOT_SATISFIABLE);
01317 
01318 
01319   if (!(request->presence(MIME_PRESENCE_IF_MODIFIED_SINCE |
01320                           MIME_PRESENCE_IF_NONE_MATCH |
01321                           MIME_PRESENCE_IF_UNMODIFIED_SINCE | MIME_PRESENCE_IF_MATCH | MIME_PRESENCE_RANGE))) {
01322     return response->status_get();
01323   }
01324   // return NOT_MODIFIED only if both If-modified-since and If-none-match fail
01325 
01326   // If-Modified-Since //
01327   if (request->presence(MIME_PRESENCE_IF_MODIFIED_SINCE)) {
01328     // lm_value is zero if Last-modified not exists
01329     ink_time_t lm_value = response->get_last_modified();
01330 
01331     // we won't return NOT_MODIFIED if Last-modified not exists
01332     if ((lm_value == 0) || (request->get_if_modified_since() < lm_value)) {
01333       return response->status_get();
01334     } else {
01335       // we cannot return NOT_MODIFIED yet, need to check If-none-match
01336       response_code = HTTP_STATUS_NOT_MODIFIED;
01337 
01338       if (!request->presence(MIME_PRESENCE_IF_NONE_MATCH)) {
01339         return response_code;
01340       }
01341     }
01342   }
01343 
01344   // If-None-Match: may match weakly //
01345   if (request->presence(MIME_PRESENCE_IF_NONE_MATCH)) {
01346     int raw_etags_len, comma_sep_tag_list_len;
01347     const char *raw_etags = response->value_get(MIME_FIELD_ETAG, MIME_LEN_ETAG, &raw_etags_len);
01348     const char *comma_sep_tag_list = NULL;
01349 
01350     if (raw_etags) {
01351       comma_sep_tag_list = request->value_get(MIME_FIELD_IF_NONE_MATCH,
01352                                               MIME_LEN_IF_NONE_MATCH, &comma_sep_tag_list_len);
01353     } else {
01354       // no Etag in the response, so there is nothing to match
01355       // against those in If-none-match
01356       goto L1;
01357     }
01358 
01359     if (!comma_sep_tag_list) {
01360       comma_sep_tag_list = "";
01361       comma_sep_tag_list_len = 0;
01362     }
01363 
01364     if (!raw_etags) {
01365       raw_etags = "";
01366       raw_etags_len = 0;
01367     }
01368     ////////////////////////////////////////////////////////////////////////
01369     // If we have an etag and a if-none-match, we are talking to someone  //
01370     // who is doing a 1.1 revalidate. Since this is a GET request with no //
01371     // sub-ranges, we can do a weak validation.                           //
01372     ////////////////////////////////////////////////////////////////////////
01373     if (do_strings_match_weakly(raw_etags, raw_etags_len, comma_sep_tag_list, comma_sep_tag_list_len)) {
01374       // the response already failed If-modified-since (if one exists)
01375       return HTTP_STATUS_NOT_MODIFIED;
01376     } else {
01377       return response->status_get();
01378     }
01379   }
01380 
01381 L1:
01382   // There is no If-none-match, and If-modified-since failed,
01383   // so return NOT_MODIFIED
01384   if (response_code != HTTP_STATUS_NONE) {
01385     return response_code;
01386   }
01387 
01388   // return PRECONDITIONAL_FAILED if either If-unmodified-since
01389   // or If-match fails
01390   // BUT, return the original response code only if both pass
01391 
01392   // If-Unmodified-Since //
01393   if (request->presence(MIME_PRESENCE_IF_UNMODIFIED_SINCE)) {
01394     // lm_value is zero if Last-modified not exists
01395     ink_time_t lm_value = response->get_last_modified();
01396 
01397     // Condition fails if Last-modified not exists
01398     if ((request->get_if_unmodified_since() < lm_value) || (lm_value == 0)) {
01399       return HTTP_STATUS_PRECONDITION_FAILED;
01400     } else {
01401       // we cannot return yet, need to check If-match
01402       response_code = response->status_get();
01403     }
01404   }
01405 
01406   // If-Match: must match strongly //
01407   if (request->presence(MIME_PRESENCE_IF_MATCH)) {
01408     int raw_etags_len, comma_sep_tag_list_len;
01409     const char *raw_etags = response->value_get(MIME_FIELD_ETAG, MIME_LEN_ETAG, &raw_etags_len);
01410     const char *comma_sep_tag_list = NULL;
01411 
01412     if (raw_etags) {
01413       comma_sep_tag_list = request->value_get(MIME_FIELD_IF_MATCH, MIME_LEN_IF_MATCH, &comma_sep_tag_list_len);
01414     }
01415 
01416     if (!comma_sep_tag_list) {
01417       comma_sep_tag_list = "";
01418       comma_sep_tag_list_len = 0;
01419     }
01420 
01421     if (!raw_etags) {
01422       raw_etags = "";
01423       raw_etags_len = 0;
01424     }
01425 
01426     if (do_strings_match_strongly(raw_etags, raw_etags_len, comma_sep_tag_list, comma_sep_tag_list_len)) {
01427       // at the point, the response passed both If-unmodified-since
01428       // and If-match, so we can return the original response code
01429       return response->status_get();
01430     } else {
01431       return HTTP_STATUS_PRECONDITION_FAILED;
01432     }
01433   }
01434   // There is no If-match, and If-unmodified-since passed,
01435   // so return the original response code
01436   if (response_code != HTTP_STATUS_NONE) {
01437     return response_code;
01438   }
01439 
01440   // Handling If-Range header:
01441   // As Range && If-Range don't occur often, we want to put the
01442   // If-Range code in the end
01443   if (request->presence(MIME_PRESENCE_RANGE) && request->presence(MIME_PRESENCE_IF_RANGE)) {
01444     int raw_len, comma_sep_list_len;
01445 
01446     const char *if_value = request->value_get(MIME_FIELD_IF_RANGE,
01447                                               MIME_LEN_IF_RANGE,
01448                                               &comma_sep_list_len);
01449 
01450     // this is an ETag, similar to If-Match
01451     if (!if_value || if_value[0] == '"' || (comma_sep_list_len > 1 && if_value[1] == '/')) {
01452       if (!if_value) {
01453         if_value = "";
01454         comma_sep_list_len = 0;
01455       }
01456 
01457       const char *raw_etags = response->value_get(MIME_FIELD_ETAG, MIME_LEN_ETAG, &raw_len);
01458 
01459       if (!raw_etags) {
01460         raw_etags = "";
01461         raw_len = 0;
01462       }
01463 
01464       if (do_strings_match_strongly(raw_etags, raw_len, if_value, comma_sep_list_len)) {
01465         return response->status_get();
01466       } else {
01467         return HTTP_STATUS_RANGE_NOT_SATISFIABLE;
01468       }
01469     }
01470     // this a Date, similar to If-Unmodified-Since
01471     else {
01472       // lm_value is zero if Last-modified not exists
01473       ink_time_t lm_value = response->get_last_modified();
01474 
01475       // condition fails if Last-modified not exists
01476       if ((request->get_if_range_date() < lm_value) || (lm_value == 0)) {
01477         return HTTP_STATUS_RANGE_NOT_SATISFIABLE;
01478       } else {
01479         return response->status_get();
01480       }
01481     }
01482   }
01483 
01484   return response->status_get();
01485 }
01486 
01487 
01488 /*---------------------------------------------------
01489  *        class CacheLookupHttpConfig
01490  *---------------------------------------------------*/
01491 int
01492 CacheLookupHttpConfig::marshal_length()
01493 {
01494   int len = (int) sizeof(int32_t);
01495   len += (cache_vary_default_text ? strlen(cache_vary_default_text) + 1 : 1);
01496   len += (cache_vary_default_images ? strlen(cache_vary_default_images) + 1 : 1);
01497   len += (cache_vary_default_other ? strlen(cache_vary_default_other) + 1 : 1);
01498   return len;
01499 }
01500 
01501 int
01502 CacheLookupHttpConfig::marshal(char *buf, int length)
01503 {
01504   int32_t i32_tmp;
01505   char *p = buf;
01506   int len;
01507 
01508   if ((length -= sizeof(int32_t)) < 0)
01509     return -1;
01510 
01511   i32_tmp = (int32_t) cache_enable_default_vary_headers;
01512   memcpy(p, &i32_tmp, sizeof(int32_t));
01513   p += sizeof(int32_t);
01514 
01515   len = (cache_vary_default_text ? strlen(cache_vary_default_text) + 1 : 1);
01516   if ((length -= len) < 0)
01517     return -1;
01518   ink_strlcpy(p, (cache_vary_default_text ? cache_vary_default_text : ""), length);
01519   p += len;
01520 
01521   len = (cache_vary_default_images ? strlen(cache_vary_default_images) + 1 : 1);
01522   if ((length -= len) < 0)
01523     return -1;
01524   ink_strlcpy(p, (cache_vary_default_images ? cache_vary_default_images : ""), length);
01525   p += len;
01526 
01527   len = (cache_vary_default_other ? strlen(cache_vary_default_other) + 1 : 1);
01528   if ((length -= len) < 0)
01529     return -1;
01530   ink_strlcpy(p, (cache_vary_default_other ? cache_vary_default_other : ""), length);
01531   p += len;
01532 
01533   return (p - buf);
01534 }
01535 
01536 int
01537 CacheLookupHttpConfig::unmarshal(Arena * arena, const char *buf, int buflen)
01538 {
01539   const char *p = buf;
01540   int length = buflen;
01541   int len;
01542   int32_t i32_tmp;
01543 
01544   if ((length -= sizeof(int32_t)) < 0)
01545     return -1;
01546 
01547   memcpy(&i32_tmp, p, sizeof(int32_t));
01548   cache_enable_default_vary_headers = (bool) i32_tmp;
01549   p += sizeof(int32_t);
01550 
01551   len = strlen(p) + 1;
01552   if ((length -= len) < 0)
01553     return -1;
01554   cache_vary_default_text = arena->str_store(((len == 2) ? "" : p), len - 1);
01555   p += len;
01556 
01557   len = strlen(p) + 1;
01558   if ((length -= len) < 0)
01559     return -1;
01560   cache_vary_default_images = arena->str_store(((len == 2) ? "" : p), len - 1);
01561   p += len;
01562 
01563   len = strlen(p) + 1;
01564   if ((length -= len) < 0)
01565     return -1;
01566   cache_vary_default_other = arena->str_store(((len == 2) ? "" : p), len - 1);
01567   p += len;
01568 
01569   return (p - buf);
01570 }

Generated by  doxygen 1.7.1