• Main Page
  • Related Pages
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

URL.cc

Go to the documentation of this file.
00001 /** @file
00002 
00003   A brief file description
00004 
00005   @section license License
00006 
00007   Licensed to the Apache Software Foundation (ASF) under one
00008   or more contributor license agreements.  See the NOTICE file
00009   distributed with this work for additional information
00010   regarding copyright ownership.  The ASF licenses this file
00011   to you under the Apache License, Version 2.0 (the
00012   "License"); you may not use this file except in compliance
00013   with the License.  You may obtain a copy of the License at
00014 
00015       http://www.apache.org/licenses/LICENSE-2.0
00016 
00017   Unless required by applicable law or agreed to in writing, software
00018   distributed under the License is distributed on an "AS IS" BASIS,
00019   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020   See the License for the specific language governing permissions and
00021   limitations under the License.
00022  */
00023 
00024 #include <assert.h>
00025 #include <new>
00026 #include "libts.h"
00027 #include "URL.h"
00028 #include "MIME.h"
00029 #include "HTTP.h"
00030 #include "Diags.h"
00031 
00032 const char *URL_SCHEME_FILE;
00033 const char *URL_SCHEME_FTP;
00034 const char *URL_SCHEME_GOPHER;
00035 const char *URL_SCHEME_HTTP;
00036 const char *URL_SCHEME_HTTPS;
00037 const char *URL_SCHEME_WSS;
00038 const char *URL_SCHEME_WS;
00039 const char *URL_SCHEME_MAILTO;
00040 const char *URL_SCHEME_NEWS;
00041 const char *URL_SCHEME_NNTP;
00042 const char *URL_SCHEME_PROSPERO;
00043 const char *URL_SCHEME_TELNET;
00044 const char *URL_SCHEME_TUNNEL;
00045 const char *URL_SCHEME_WAIS;
00046 const char *URL_SCHEME_PNM;
00047 const char *URL_SCHEME_RTSP;
00048 const char *URL_SCHEME_RTSPU;
00049 const char *URL_SCHEME_MMS;
00050 const char *URL_SCHEME_MMSU;
00051 const char *URL_SCHEME_MMST;
00052 
00053 int URL_WKSIDX_FILE;
00054 int URL_WKSIDX_FTP;
00055 int URL_WKSIDX_GOPHER;
00056 int URL_WKSIDX_HTTP;
00057 int URL_WKSIDX_HTTPS;
00058 int URL_WKSIDX_WS;
00059 int URL_WKSIDX_WSS;
00060 int URL_WKSIDX_MAILTO;
00061 int URL_WKSIDX_NEWS;
00062 int URL_WKSIDX_NNTP;
00063 int URL_WKSIDX_PROSPERO;
00064 int URL_WKSIDX_TELNET;
00065 int URL_WKSIDX_TUNNEL;
00066 int URL_WKSIDX_WAIS;
00067 int URL_WKSIDX_PNM;
00068 int URL_WKSIDX_RTSP;
00069 int URL_WKSIDX_RTSPU;
00070 int URL_WKSIDX_MMS;
00071 int URL_WKSIDX_MMSU;
00072 int URL_WKSIDX_MMST;
00073 
00074 int URL_LEN_FILE;
00075 int URL_LEN_FTP;
00076 int URL_LEN_GOPHER;
00077 int URL_LEN_HTTP;
00078 int URL_LEN_HTTPS;
00079 int URL_LEN_WS;
00080 int URL_LEN_WSS;
00081 int URL_LEN_MAILTO;
00082 int URL_LEN_NEWS;
00083 int URL_LEN_NNTP;
00084 int URL_LEN_PROSPERO;
00085 int URL_LEN_TELNET;
00086 int URL_LEN_TUNNEL;
00087 int URL_LEN_WAIS;
00088 int URL_LEN_PNM;
00089 int URL_LEN_RTSP;
00090 int URL_LEN_RTSPU;
00091 int URL_LEN_MMS;
00092 int URL_LEN_MMSU;
00093 int URL_LEN_MMST;
00094 
00095 int url_hash_method = 0;
00096 
00097 
00098 /*-------------------------------------------------------------------------
00099   -------------------------------------------------------------------------*/
00100 URLHashContext::HashType URLHashContext::Setting = URLHashContext::MMH;
00101 
00102 URLHashContext::URLHashContext() {
00103   switch (Setting) {
00104   case UNSPECIFIED:
00105   case MD5:
00106     new(_obj) MD5Context;
00107     break;
00108   case MMH:
00109     new(_obj) MMHContext;
00110     break;
00111   default: ink_assert("Invalid global URL hash context");
00112   };
00113 }
00114 
00115 void
00116 url_init()
00117 {
00118   static int init = 1;
00119 
00120   if (init) {
00121     init = 0;
00122 
00123     hdrtoken_init();
00124 
00125     URL_SCHEME_FILE = hdrtoken_string_to_wks("file");
00126     URL_SCHEME_FTP = hdrtoken_string_to_wks("ftp");
00127     URL_SCHEME_GOPHER = hdrtoken_string_to_wks("gopher");
00128     URL_SCHEME_HTTP = hdrtoken_string_to_wks("http");
00129     URL_SCHEME_HTTPS = hdrtoken_string_to_wks("https");
00130     URL_SCHEME_WSS = hdrtoken_string_to_wks("wss");
00131     URL_SCHEME_WS = hdrtoken_string_to_wks("ws");
00132     URL_SCHEME_MAILTO = hdrtoken_string_to_wks("mailto");
00133     URL_SCHEME_NEWS = hdrtoken_string_to_wks("news");
00134     URL_SCHEME_NNTP = hdrtoken_string_to_wks("nntp");
00135     URL_SCHEME_PROSPERO = hdrtoken_string_to_wks("prospero");
00136     URL_SCHEME_TELNET = hdrtoken_string_to_wks("telnet");
00137     URL_SCHEME_TUNNEL = hdrtoken_string_to_wks("tunnel");
00138     URL_SCHEME_WAIS = hdrtoken_string_to_wks("wais");
00139     URL_SCHEME_PNM = hdrtoken_string_to_wks("pnm");
00140     URL_SCHEME_RTSP = hdrtoken_string_to_wks("rtsp");
00141     URL_SCHEME_RTSPU = hdrtoken_string_to_wks("rtspu");
00142     URL_SCHEME_MMS = hdrtoken_string_to_wks("mms");
00143     URL_SCHEME_MMSU = hdrtoken_string_to_wks("mmsu");
00144     URL_SCHEME_MMST = hdrtoken_string_to_wks("mmst");
00145     
00146     ink_assert(URL_SCHEME_FILE && 
00147       URL_SCHEME_FTP &&
00148       URL_SCHEME_GOPHER && 
00149       URL_SCHEME_HTTP && 
00150       URL_SCHEME_HTTPS && 
00151       URL_SCHEME_WS &&
00152       URL_SCHEME_WSS &&
00153       URL_SCHEME_MAILTO && 
00154       URL_SCHEME_NEWS && 
00155       URL_SCHEME_NNTP && 
00156       URL_SCHEME_PROSPERO && 
00157       URL_SCHEME_TELNET &&
00158       URL_SCHEME_TUNNEL &&
00159       URL_SCHEME_WAIS &&
00160       URL_SCHEME_PNM &&
00161       URL_SCHEME_RTSP && 
00162       URL_SCHEME_RTSPU && 
00163       URL_SCHEME_MMS &&
00164       URL_SCHEME_MMSU && 
00165       URL_SCHEME_MMST
00166     );
00167     
00168     URL_WKSIDX_FILE = hdrtoken_wks_to_index(URL_SCHEME_FILE);
00169     URL_WKSIDX_FTP = hdrtoken_wks_to_index(URL_SCHEME_FTP);
00170     URL_WKSIDX_GOPHER = hdrtoken_wks_to_index(URL_SCHEME_GOPHER);
00171     URL_WKSIDX_HTTP = hdrtoken_wks_to_index(URL_SCHEME_HTTP);
00172     URL_WKSIDX_HTTPS = hdrtoken_wks_to_index(URL_SCHEME_HTTPS);
00173     URL_WKSIDX_WS = hdrtoken_wks_to_index(URL_SCHEME_WS);
00174     URL_WKSIDX_WSS = hdrtoken_wks_to_index(URL_SCHEME_WSS);
00175     URL_WKSIDX_MAILTO = hdrtoken_wks_to_index(URL_SCHEME_MAILTO);
00176     URL_WKSIDX_NEWS = hdrtoken_wks_to_index(URL_SCHEME_NEWS);
00177     URL_WKSIDX_NNTP = hdrtoken_wks_to_index(URL_SCHEME_NNTP);
00178     URL_WKSIDX_PROSPERO = hdrtoken_wks_to_index(URL_SCHEME_PROSPERO);
00179     URL_WKSIDX_TELNET = hdrtoken_wks_to_index(URL_SCHEME_TELNET);
00180     URL_WKSIDX_TUNNEL = hdrtoken_wks_to_index(URL_SCHEME_TUNNEL);
00181     URL_WKSIDX_WAIS = hdrtoken_wks_to_index(URL_SCHEME_WAIS);
00182     URL_WKSIDX_PNM = hdrtoken_wks_to_index(URL_SCHEME_PNM);
00183     URL_WKSIDX_RTSP = hdrtoken_wks_to_index(URL_SCHEME_RTSP);
00184     URL_WKSIDX_RTSPU = hdrtoken_wks_to_index(URL_SCHEME_RTSPU);
00185     URL_WKSIDX_MMS = hdrtoken_wks_to_index(URL_SCHEME_MMS);
00186     URL_WKSIDX_MMSU = hdrtoken_wks_to_index(URL_SCHEME_MMSU);
00187     URL_WKSIDX_MMST = hdrtoken_wks_to_index(URL_SCHEME_MMST);
00188 
00189     URL_LEN_FILE = hdrtoken_wks_to_length(URL_SCHEME_FILE);
00190     URL_LEN_FTP = hdrtoken_wks_to_length(URL_SCHEME_FTP);
00191     URL_LEN_GOPHER = hdrtoken_wks_to_length(URL_SCHEME_GOPHER);
00192     URL_LEN_HTTP = hdrtoken_wks_to_length(URL_SCHEME_HTTP);
00193     URL_LEN_HTTPS = hdrtoken_wks_to_length(URL_SCHEME_HTTPS);
00194     URL_LEN_WS = hdrtoken_wks_to_length(URL_SCHEME_WS);
00195     URL_LEN_WSS = hdrtoken_wks_to_length(URL_SCHEME_WSS);
00196     URL_LEN_MAILTO = hdrtoken_wks_to_length(URL_SCHEME_MAILTO);
00197     URL_LEN_NEWS = hdrtoken_wks_to_length(URL_SCHEME_NEWS);
00198     URL_LEN_NNTP = hdrtoken_wks_to_length(URL_SCHEME_NNTP);
00199     URL_LEN_PROSPERO = hdrtoken_wks_to_length(URL_SCHEME_PROSPERO);
00200     URL_LEN_TELNET = hdrtoken_wks_to_length(URL_SCHEME_TELNET);
00201     URL_LEN_TUNNEL = hdrtoken_wks_to_length(URL_SCHEME_TUNNEL);
00202     URL_LEN_WAIS = hdrtoken_wks_to_length(URL_SCHEME_WAIS);
00203     URL_LEN_PNM = hdrtoken_wks_to_length(URL_SCHEME_PNM);
00204     URL_LEN_RTSP = hdrtoken_wks_to_length(URL_SCHEME_RTSP);
00205     URL_LEN_RTSPU = hdrtoken_wks_to_length(URL_SCHEME_RTSPU);
00206     URL_LEN_MMS = hdrtoken_wks_to_length(URL_SCHEME_MMS);
00207     URL_LEN_MMSU = hdrtoken_wks_to_length(URL_SCHEME_MMSU);
00208     URL_LEN_MMST = hdrtoken_wks_to_length(URL_SCHEME_MMST);
00209 
00210     ink_assert(URLHashContext::OBJ_SIZE >= sizeof(MD5Context));
00211     ink_assert(URLHashContext::OBJ_SIZE >= sizeof(MMHContext));
00212 
00213   }
00214 }
00215 
00216 /*-------------------------------------------------------------------------
00217   -------------------------------------------------------------------------*/
00218 
00219 /***********************************************************************
00220  *                                                                     *
00221  *             U R L    C R E A T I O N    A N D    C O P Y            *
00222  *                                                                     *
00223  ***********************************************************************/
00224 
00225 URLImpl *
00226 url_create(HdrHeap * heap)
00227 {
00228   URLImpl *url;
00229 
00230   url = (URLImpl *) heap->allocate_obj(sizeof(URLImpl), HDR_HEAP_OBJ_URL);
00231   obj_clear_data((HdrHeapObjImpl *) url);
00232   url->m_url_type = URL_TYPE_NONE;
00233   url->m_scheme_wks_idx = -1;
00234   url_clear_string_ref(url);
00235   return url;
00236 }
00237 
00238 /*-------------------------------------------------------------------------
00239   -------------------------------------------------------------------------*/
00240 
00241 void
00242 url_clear(URLImpl * url_impl)
00243 {
00244   obj_clear_data((HdrHeapObjImpl *) url_impl);
00245   url_impl->m_url_type = URL_TYPE_NONE;
00246   url_impl->m_scheme_wks_idx = -1;
00247 }
00248 
00249 /*-------------------------------------------------------------------------
00250   -------------------------------------------------------------------------*/
00251 
00252 URLImpl *
00253 url_copy(URLImpl * s_url, HdrHeap * s_heap, HdrHeap * d_heap, bool inherit_strs)
00254 {
00255   URLImpl *d_url = url_create(d_heap);
00256   url_copy_onto(s_url, s_heap, d_url, d_heap, inherit_strs);
00257   return d_url;
00258 }
00259 
00260 /*-------------------------------------------------------------------------
00261   -------------------------------------------------------------------------*/
00262 
00263 void
00264 url_copy_onto(URLImpl * s_url, HdrHeap * s_heap, URLImpl * d_url, HdrHeap * d_heap, bool inherit_strs)
00265 {
00266   if (s_url != d_url) {
00267     obj_copy_data((HdrHeapObjImpl *) s_url, (HdrHeapObjImpl *) d_url);
00268     if (inherit_strs && (s_heap != d_heap))
00269       d_heap->inherit_string_heaps(s_heap);
00270   }
00271 }
00272 
00273 /*-------------------------------------------------------------------------
00274   -------------------------------------------------------------------------*/
00275 
00276 void
00277 url_nuke_proxy_stuff(URLImpl * d_url)
00278 {
00279   d_url->m_len_scheme = 0;
00280   d_url->m_len_user = 0;
00281   d_url->m_len_password = 0;
00282   d_url->m_len_host = 0;
00283   d_url->m_len_port = 0;
00284 
00285   d_url->m_ptr_scheme = NULL;
00286   d_url->m_ptr_user = NULL;
00287   d_url->m_ptr_password = NULL;
00288   d_url->m_ptr_host = NULL;
00289   d_url->m_ptr_port = NULL;
00290 
00291   d_url->m_scheme_wks_idx = -1;
00292   d_url->m_port = 0;
00293 }
00294 
00295 /*-------------------------------------------------------------------------
00296   -------------------------------------------------------------------------*/
00297 
00298 /**
00299   This routine is like url_copy_onto, but clears the
00300   scheme/host/user/pass/port components, resulting in a server-style URL.
00301 
00302 */
00303 void
00304 url_copy_onto_as_server_url(URLImpl * s_url, HdrHeap * s_heap, URLImpl * d_url, HdrHeap * d_heap, bool inherit_strs)
00305 {
00306   url_nuke_proxy_stuff(d_url);
00307 
00308   d_url->m_ptr_path = s_url->m_ptr_path;
00309   d_url->m_ptr_params = s_url->m_ptr_params;
00310   d_url->m_ptr_query = s_url->m_ptr_query;
00311   d_url->m_ptr_fragment = s_url->m_ptr_fragment;
00312   url_clear_string_ref(d_url);
00313 
00314   d_url->m_len_path = s_url->m_len_path;
00315   d_url->m_len_params = s_url->m_len_params;
00316   d_url->m_len_query = s_url->m_len_query;
00317   d_url->m_len_fragment = s_url->m_len_fragment;
00318 
00319   d_url->m_url_type = s_url->m_url_type;
00320   d_url->m_type_code = s_url->m_type_code;
00321 
00322   if (inherit_strs && (s_heap != d_heap))
00323     d_heap->inherit_string_heaps(s_heap);
00324 }
00325 
00326 /*-------------------------------------------------------------------------
00327   -------------------------------------------------------------------------*/
00328 
00329 /***********************************************************************
00330  *                                                                     *
00331  *                        M A R S H A L I N G                          *
00332  *                                                                     *
00333  ***********************************************************************/
00334 int
00335 URLImpl::marshal(MarshalXlate * str_xlate, int num_xlate)
00336 {
00337 
00338   HDR_MARSHAL_STR(m_ptr_scheme, str_xlate, num_xlate);
00339   HDR_MARSHAL_STR(m_ptr_user, str_xlate, num_xlate);
00340   HDR_MARSHAL_STR(m_ptr_password, str_xlate, num_xlate);
00341   HDR_MARSHAL_STR(m_ptr_host, str_xlate, num_xlate);
00342   HDR_MARSHAL_STR(m_ptr_port, str_xlate, num_xlate);
00343   HDR_MARSHAL_STR(m_ptr_path, str_xlate, num_xlate);
00344   HDR_MARSHAL_STR(m_ptr_params, str_xlate, num_xlate);
00345   HDR_MARSHAL_STR(m_ptr_query, str_xlate, num_xlate);
00346   HDR_MARSHAL_STR(m_ptr_fragment, str_xlate, num_xlate);
00347 //    HDR_MARSHAL_STR(m_ptr_printed_string, str_xlate, num_xlate);
00348   return 0;
00349 }
00350 
00351 void
00352 URLImpl::unmarshal(intptr_t offset)
00353 {
00354   HDR_UNMARSHAL_STR(m_ptr_scheme, offset);
00355   HDR_UNMARSHAL_STR(m_ptr_user, offset);
00356   HDR_UNMARSHAL_STR(m_ptr_password, offset);
00357   HDR_UNMARSHAL_STR(m_ptr_host, offset);
00358   HDR_UNMARSHAL_STR(m_ptr_port, offset);
00359   HDR_UNMARSHAL_STR(m_ptr_path, offset);
00360   HDR_UNMARSHAL_STR(m_ptr_params, offset);
00361   HDR_UNMARSHAL_STR(m_ptr_query, offset);
00362   HDR_UNMARSHAL_STR(m_ptr_fragment, offset);
00363 //    HDR_UNMARSHAL_STR(m_ptr_printed_string, offset);
00364 }
00365 
00366 void
00367 URLImpl::move_strings(HdrStrHeap * new_heap)
00368 {
00369   HDR_MOVE_STR(m_ptr_scheme, m_len_scheme);
00370   HDR_MOVE_STR(m_ptr_user, m_len_user);
00371   HDR_MOVE_STR(m_ptr_password, m_len_password);
00372   HDR_MOVE_STR(m_ptr_host, m_len_host);
00373   HDR_MOVE_STR(m_ptr_port, m_len_port);
00374   HDR_MOVE_STR(m_ptr_path, m_len_path);
00375   HDR_MOVE_STR(m_ptr_params, m_len_params);
00376   HDR_MOVE_STR(m_ptr_query, m_len_query);
00377   HDR_MOVE_STR(m_ptr_fragment, m_len_fragment);
00378   HDR_MOVE_STR(m_ptr_printed_string, m_len_printed_string);
00379 }
00380 
00381 size_t
00382 URLImpl::strings_length()
00383 {
00384   size_t ret = 0;
00385 
00386   ret += m_len_scheme;
00387   ret += m_len_user;
00388   ret += m_len_password;
00389   ret += m_len_host;
00390   ret += m_len_port;
00391   ret += m_len_path;
00392   ret += m_len_params;
00393   ret += m_len_query;
00394   ret += m_len_fragment;
00395   ret += m_len_printed_string;
00396   return ret;
00397 }
00398 
00399 void
00400 URLImpl::check_strings(HeapCheck * heaps, int num_heaps)
00401 {
00402   CHECK_STR(m_ptr_scheme, m_len_scheme, heaps, num_heaps);
00403   CHECK_STR(m_ptr_user, m_len_user, heaps, num_heaps);
00404   CHECK_STR(m_ptr_password, m_len_password, heaps, num_heaps);
00405   CHECK_STR(m_ptr_host, m_len_host, heaps, num_heaps);
00406   CHECK_STR(m_ptr_port, m_len_port, heaps, num_heaps);
00407   CHECK_STR(m_ptr_path, m_len_path, heaps, num_heaps);
00408   CHECK_STR(m_ptr_params, m_len_params, heaps, num_heaps);
00409   CHECK_STR(m_ptr_query, m_len_query, heaps, num_heaps);
00410   CHECK_STR(m_ptr_fragment, m_len_fragment, heaps, num_heaps);
00411 //    CHECK_STR(m_ptr_printed_string, m_len_printed_string, heaps, num_heaps);
00412 }
00413 
00414 /***********************************************************************
00415  *                                                                     *
00416  *                               S E T                                 *
00417  *                                                                     *
00418  ***********************************************************************/
00419 
00420 const char *
00421 url_scheme_set(HdrHeap * heap, URLImpl * url, const char *scheme_str, int scheme_wks_idx, int length, bool copy_string)
00422 {
00423   const char *scheme_wks;
00424   url_called_set(url);
00425   if (length == 0)
00426     scheme_str = NULL;
00427 
00428   mime_str_u16_set(heap, scheme_str, length, &(url->m_ptr_scheme), &(url->m_len_scheme), copy_string);
00429 
00430   url->m_scheme_wks_idx = scheme_wks_idx;
00431   if (scheme_wks_idx >= 0)
00432     scheme_wks = hdrtoken_index_to_wks(scheme_wks_idx);
00433   else
00434     scheme_wks = NULL;
00435 
00436   if (scheme_wks == URL_SCHEME_HTTP || scheme_wks == URL_SCHEME_WS)
00437     url->m_url_type = URL_TYPE_HTTP;
00438   else if (scheme_wks == URL_SCHEME_HTTPS || scheme_wks == URL_SCHEME_WSS)
00439     url->m_url_type = URL_TYPE_HTTPS;
00440   else
00441     url->m_url_type = URL_TYPE_HTTP;
00442 
00443   return scheme_wks;          // tokenized string or NULL if not well known
00444 }
00445 
00446 /*-------------------------------------------------------------------------
00447   -------------------------------------------------------------------------*/
00448 
00449 void
00450 url_user_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00451 {
00452   url_called_set(url);
00453   if (length == 0)
00454     value = NULL;
00455   mime_str_u16_set(heap, value, length, &(url->m_ptr_user), &(url->m_len_user), copy_string);
00456 }
00457 
00458 /*-------------------------------------------------------------------------
00459   -------------------------------------------------------------------------*/
00460 
00461 void
00462 url_password_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00463 {
00464   url_called_set(url);
00465   if (length == 0)
00466     value = NULL;
00467   mime_str_u16_set(heap, value, length, &(url->m_ptr_password), &(url->m_len_password), copy_string);
00468 }
00469 
00470 /*-------------------------------------------------------------------------
00471   -------------------------------------------------------------------------*/
00472 
00473 void
00474 url_host_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00475 {
00476   url_called_set(url);
00477   if (length == 0) value = NULL;
00478   mime_str_u16_set(heap, value, length, &(url->m_ptr_host), &(url->m_len_host), copy_string);
00479 }
00480 
00481 /*-------------------------------------------------------------------------
00482   -------------------------------------------------------------------------*/
00483 
00484 void
00485 url_port_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00486 {
00487   url_called_set(url);
00488   if (length == 0)
00489     value = NULL;
00490   mime_str_u16_set(heap, value, length, &(url->m_ptr_port), &(url->m_len_port), copy_string);
00491 
00492   url->m_port = 0;
00493   for (int i = 0; i < length; i++) {
00494     if (!ParseRules::is_digit(value[i]))
00495       break;
00496     url->m_port = url->m_port * 10 + (value[i] - '0');
00497   }
00498 }
00499 
00500 /*-------------------------------------------------------------------------
00501   -------------------------------------------------------------------------*/
00502 
00503 void
00504 url_port_set(HdrHeap * heap, URLImpl * url, unsigned int port)
00505 {
00506   url_called_set(url);
00507   if (port > 0) {
00508     char value[6];
00509     int length;
00510 
00511     length = ink_fast_itoa(port, value, sizeof(value));
00512     mime_str_u16_set(heap, value, length, &(url->m_ptr_port), &(url->m_len_port), true);
00513   } else {
00514     mime_str_u16_set(heap, NULL, 0, &(url->m_ptr_port), &(url->m_len_port), true);
00515   }
00516   url->m_port = port;
00517 }
00518 
00519 /*-------------------------------------------------------------------------
00520   -------------------------------------------------------------------------*/
00521 
00522 void
00523 url_path_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00524 {
00525   url_called_set(url);
00526   if (length == 0)
00527     value = NULL;
00528   mime_str_u16_set(heap, value, length, &(url->m_ptr_path), &(url->m_len_path), copy_string);
00529 }
00530 
00531 /*-------------------------------------------------------------------------
00532   -------------------------------------------------------------------------*/
00533 
00534 // empties params/query/fragment component
00535 // url_{params|query|fragment}_set()
00536 
00537 void
00538 url_params_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00539 {
00540   url_called_set(url);
00541   mime_str_u16_set(heap, value, length, &(url->m_ptr_params), &(url->m_len_params), copy_string);
00542 }
00543 
00544 /*-------------------------------------------------------------------------
00545   -------------------------------------------------------------------------*/
00546 
00547 void
00548 url_query_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00549 {
00550   url_called_set(url);
00551   mime_str_u16_set(heap, value, length, &(url->m_ptr_query), &(url->m_len_query), copy_string);
00552 }
00553 
00554 /*-------------------------------------------------------------------------
00555   -------------------------------------------------------------------------*/
00556 
00557 void
00558 url_fragment_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00559 {
00560   url_called_set(url);
00561   mime_str_u16_set(heap, value, length, &(url->m_ptr_fragment), &(url->m_len_fragment), copy_string);
00562 }
00563 
00564 /*-------------------------------------------------------------------------
00565   -------------------------------------------------------------------------*/
00566 
00567 void
00568 url_type_set(URLImpl * url, unsigned int typecode)
00569 {
00570   url_called_set(url);
00571   url->m_type_code = typecode;
00572 }
00573 
00574 /*-------------------------------------------------------------------------
00575   -------------------------------------------------------------------------*/
00576 
00577 /***********************************************************************
00578  *                                                                     *
00579  *                               G E T                                 *
00580  *                                                                     *
00581  ***********************************************************************/
00582 
00583 /*-------------------------------------------------------------------------
00584   -------------------------------------------------------------------------*/
00585 
00586 void
00587 url_called_set(URLImpl * url)
00588 {
00589   url->m_clean = !url->m_ptr_printed_string;
00590 }
00591 
00592 void
00593 url_clear_string_ref(URLImpl * url)
00594 {
00595   if (url->m_ptr_printed_string) {
00596     url->m_len_printed_string = 0;
00597     url->m_ptr_printed_string = NULL;
00598     url->m_clean = true;
00599   }
00600   return;
00601 }
00602 
00603 char *
00604 url_string_get_ref(HdrHeap * heap, URLImpl * url, int *length)
00605 {
00606   if (!url)
00607     return NULL;
00608 
00609   if (url->m_ptr_printed_string && url->m_clean) {
00610     if (length)
00611       *length = url->m_len_printed_string;
00612     return (char *) url->m_ptr_printed_string;
00613   } else {                      //either not clean or never printed
00614     int len = url_length_get(url);
00615     char *buf;
00616     int index = 0;
00617     int offset = 0;
00618 
00619     /* stuff alloc'd here gets gc'd on HdrHeap::destroy() */
00620     buf = heap->allocate_str(len + 1);
00621     url_print(url, buf, len, &index, &offset);
00622     buf[len] = '\0';
00623 
00624     if (length) {
00625       *length = len;
00626     }
00627     url->m_clean = true;          //reset since we have url_print()'ed again
00628     url->m_len_printed_string = len;
00629     url->m_ptr_printed_string = buf;
00630     return buf;
00631   }
00632 }
00633 
00634 char *
00635 url_string_get(URLImpl * url, Arena * arena, int *length, HdrHeap * heap)
00636 {
00637   int len = url_length_get(url);
00638   char *buf;
00639   char *buf2;
00640   int index = 0;
00641   int offset = 0;
00642 
00643   buf = arena ? arena->str_alloc(len) : (char *)ats_malloc(len + 1);
00644 
00645   url_print(url, buf, len, &index, &offset);
00646   buf[len] = '\0';
00647 
00648   /* see string_get_ref() */
00649   if (heap) {
00650     buf2 = heap->allocate_str(len + 1);
00651     memcpy(buf2, buf, len);
00652     buf2[len] = '\0';
00653     url->m_clean = true;          //reset since we have url_print()'ed again
00654     url->m_len_printed_string = len;
00655     url->m_ptr_printed_string = buf2;
00656   }
00657 
00658   if (length) {
00659     *length = len;
00660   }
00661   return buf;
00662 }
00663 
00664 /*-------------------------------------------------------------------------
00665   -------------------------------------------------------------------------*/
00666 
00667 char *
00668 url_string_get_buf(URLImpl * url, char *dstbuf, int dstbuf_size, int *length)
00669 {
00670   int len = url_length_get(url);
00671   int index = 0;
00672   int offset = 0;
00673   char *buf = 0;
00674 
00675   if (dstbuf && dstbuf_size > 0) {
00676     buf = dstbuf;
00677     if (len >= dstbuf_size)
00678       len = dstbuf_size - 1;
00679     url_print(url, dstbuf, len, &index, &offset);
00680     buf[len] = 0;
00681 
00682     if (length)
00683       *length = len;
00684   }
00685   return buf;
00686 }
00687 
00688 /*-------------------------------------------------------------------------
00689   -------------------------------------------------------------------------*/
00690 
00691 const char *
00692 url_scheme_get(URLImpl * url, int *length)
00693 {
00694   const char *str;
00695 
00696   if (url->m_scheme_wks_idx >= 0) {
00697     str = hdrtoken_index_to_wks(url->m_scheme_wks_idx);
00698     *length = hdrtoken_index_to_length(url->m_scheme_wks_idx);
00699   } else {
00700     str = url->m_ptr_scheme;
00701     *length = url->m_len_scheme;
00702   }
00703   return str;
00704 }
00705 
00706 /*-------------------------------------------------------------------------
00707   -------------------------------------------------------------------------*/
00708 
00709 const char *
00710 url_user_get(URLImpl * url, int *length)
00711 {
00712   *length = url->m_len_user;
00713   return url->m_ptr_user;
00714 }
00715 
00716 /*-------------------------------------------------------------------------
00717   -------------------------------------------------------------------------*/
00718 
00719 const char *
00720 url_password_get(URLImpl * url, int *length)
00721 {
00722   *length = url->m_len_password;
00723   return url->m_ptr_password;
00724 }
00725 
00726 /*-------------------------------------------------------------------------
00727   -------------------------------------------------------------------------*/
00728 
00729 const char *
00730 url_host_get(URLImpl * url, int *length)
00731 {
00732   *length = url->m_len_host;
00733   return url->m_ptr_host;
00734 }
00735 
00736 /*-------------------------------------------------------------------------
00737   -------------------------------------------------------------------------*/
00738 
00739 int
00740 url_port_get(URLImpl * url)
00741 {
00742   return url->m_port;
00743 }
00744 
00745 /*-------------------------------------------------------------------------
00746   -------------------------------------------------------------------------*/
00747 
00748 const char *
00749 url_path_get(URLImpl * url, int *length)
00750 {
00751   *length = url->m_len_path;
00752   return url->m_ptr_path;
00753 }
00754 
00755 /*-------------------------------------------------------------------------
00756   -------------------------------------------------------------------------*/
00757 
00758 const char *
00759 url_params_get(URLImpl * url, int *length)
00760 {
00761   *length = url->m_len_params;
00762   return url->m_ptr_params;
00763 }
00764 
00765 /*-------------------------------------------------------------------------
00766   -------------------------------------------------------------------------*/
00767 
00768 const char *
00769 url_query_get(URLImpl * url, int *length)
00770 {
00771   *length = url->m_len_query;
00772   return url->m_ptr_query;
00773 }
00774 
00775 /*-------------------------------------------------------------------------
00776   -------------------------------------------------------------------------*/
00777 
00778 const char *
00779 url_fragment_get(URLImpl * url, int *length)
00780 {
00781   *length = url->m_len_fragment;
00782   return url->m_ptr_fragment;
00783 }
00784 
00785 /*-------------------------------------------------------------------------
00786   -------------------------------------------------------------------------*/
00787 
00788 int
00789 url_type_get(URLImpl * url)
00790 {
00791   return url->m_type_code;
00792 }
00793 
00794 /*-------------------------------------------------------------------------
00795   -------------------------------------------------------------------------*/
00796 
00797 /***********************************************************************
00798  *                                                                     *
00799  *               U R L    S T R I N G    F U N C T I O N S             *
00800  *                                                                     *
00801  ***********************************************************************/
00802 
00803 /*-------------------------------------------------------------------------
00804   -------------------------------------------------------------------------*/
00805 
00806 int
00807 url_length_get(URLImpl * url)
00808 {
00809   int length = 0;
00810 
00811   if (url->m_ptr_scheme) {
00812     if ((url->m_scheme_wks_idx >= 0) && (hdrtoken_index_to_wks(url->m_scheme_wks_idx) == URL_SCHEME_FILE))
00813       length += url->m_len_scheme + 1;  // +1 for ":"
00814     else
00815       length += url->m_len_scheme + 3;  // +3 for "://"
00816   }
00817 
00818   if (url->m_ptr_user) {
00819     length += url->m_len_user + 1;      // +1 for "@"
00820     if (url->m_ptr_password)
00821       length += url->m_len_password + 1;        // +1 for ":"
00822   }
00823 
00824   if (url->m_ptr_host) {
00825     length += url->m_len_host;
00826     if (url->m_ptr_port && url->m_port)
00827       length += url->m_len_port + 1;    // +1 for ":"
00828   }
00829 
00830   if (url->m_ptr_path) {
00831     length += url->m_len_path + 1;      // +1 for /
00832   }
00833   else {
00834     length += 1;                // +1 for /
00835   }
00836 
00837   if (url->m_ptr_params && url->m_len_params > 0) {
00838     length += url->m_len_params + 1;  // +1 for ";"
00839   }
00840 
00841   if (url->m_ptr_query && url->m_len_query > 0) {
00842     length += url->m_len_query + 1;   // +1 for "?"
00843   }
00844 
00845   if (url->m_ptr_fragment && url->m_len_fragment > 0) {
00846     length += url->m_len_fragment + 1;        // +1 for "#"
00847   }
00848 
00849   return length;
00850 }
00851 
00852 /*-------------------------------------------------------------------------
00853   -------------------------------------------------------------------------*/
00854 
00855 char *
00856 url_to_string(URLImpl * url, Arena * arena, int *length)
00857 {
00858   int len;
00859   int idx;
00860   char *str;
00861 
00862   len = url_length_get(url) + 1;
00863 
00864   if (length)
00865     *length = len;
00866 
00867   if (arena)
00868     str = arena->str_alloc(len);
00869   else
00870     str = (char *)ats_malloc(len + 1);
00871 
00872   idx = 0;
00873 
00874   if (url->m_ptr_scheme) {
00875     memcpy(&str[idx], url->m_ptr_scheme, url->m_len_scheme);
00876     idx += url->m_len_scheme;
00877     if ((url->m_scheme_wks_idx >= 0) && (hdrtoken_index_to_wks(url->m_scheme_wks_idx) == URL_SCHEME_FILE)) {
00878       str[idx++] = ':';
00879     } else {
00880       str[idx++] = ':';
00881       str[idx++] = '/';
00882       str[idx++] = '/';
00883     }
00884   }
00885 
00886   if (url->m_ptr_user) {
00887     memcpy(&str[idx], url->m_ptr_user, url->m_len_user);
00888     idx += url->m_len_user;
00889     if (url->m_ptr_password) {
00890       str[idx++] = ':';
00891       memcpy(&str[idx], url->m_ptr_password, url->m_len_password);
00892       idx += url->m_len_password;
00893     }
00894     str[idx++] = '@';
00895   }
00896 
00897   if (url->m_ptr_host) {
00898     memcpy(&str[idx], url->m_ptr_host, url->m_len_host);
00899     idx += url->m_len_host;
00900     if (url->m_ptr_port != 0) {
00901       str[idx++] = ':';
00902       memcpy(&str[idx], url->m_ptr_port, url->m_len_port);
00903       idx += url->m_len_port;
00904     }
00905   }
00906 
00907   memcpy(&str[idx], url->m_ptr_path, url->m_len_path);
00908   idx += url->m_len_path;
00909 
00910   if (url->m_ptr_params && url->m_len_params > 0) {
00911     str[idx++] = ';';
00912     memcpy(&str[idx], url->m_ptr_params, url->m_len_params);
00913     idx += url->m_len_params;
00914   }
00915 
00916   if (url->m_ptr_query && url->m_len_query > 0) {
00917     str[idx++] = '?';
00918     memcpy(&str[idx], url->m_ptr_query, url->m_len_query);
00919     idx += url->m_len_query;
00920   }
00921 
00922   if (url->m_ptr_fragment && url->m_len_fragment > 0) {
00923     str[idx++] = '#';
00924     memcpy(&str[idx], url->m_ptr_fragment, url->m_len_fragment);
00925     idx += url->m_len_fragment;
00926   }
00927 
00928   str[idx++] = '\0';
00929 
00930   ink_release_assert(idx == len);
00931 
00932   return str;
00933 }
00934 
00935 /*-------------------------------------------------------------------------
00936   -------------------------------------------------------------------------*/
00937 
00938 /***********************************************************************
00939  *                                                                     *
00940  *                     E S C A P E - H A N D L I N G                   *
00941  *                                                                     *
00942  ***********************************************************************/
00943 
00944 void
00945 unescape_str(char *&buf, char *buf_e, const char *&str, const char *str_e, int &state)
00946 {
00947   int copy_len;
00948   char *first_pct;
00949   int buf_len = (int) (buf_e - buf);
00950   int str_len = (int) (str_e - str);
00951   int min_len = (int) (str_len < buf_len ? str_len : buf_len);
00952 
00953   first_pct = ink_memcpy_until_char(buf, (char *) str, min_len, '%');
00954   copy_len = (int) (first_pct - str);
00955   str += copy_len;
00956   buf += copy_len;
00957   if (copy_len == min_len)
00958     return;
00959 
00960   while (str < str_e && (buf != buf_e)) {
00961     switch (state) {
00962     case 0:
00963       if (str[0] == '%') {
00964         str += 1;
00965         state = 1;
00966       } else {
00967         *buf++ = str[0];
00968         str += 1;
00969       }
00970       break;
00971     case 1:
00972       if (ParseRules::is_hex(str[0])) {
00973         str += 1;
00974         state = 2;
00975       } else {
00976         *buf++ = str[-1];
00977         state = 0;
00978       }
00979       break;
00980     case 2:
00981       if (ParseRules::is_hex(str[0])) {
00982         int tmp;
00983 
00984         if (ParseRules::is_alpha(str[-1])) {
00985           tmp = (ParseRules::ink_toupper(str[-1]) - 'A' + 10) * 16;
00986         } else {
00987           tmp = (str[-1] - '0') * 16;
00988         }
00989         if (ParseRules::is_alpha(str[0])) {
00990           tmp += (ParseRules::ink_toupper(str[0]) - 'A' + 10);
00991         } else {
00992           tmp += str[0] - '0';
00993         }
00994 
00995         *buf++ = tmp;
00996         str += 1;
00997         state = 0;
00998       } else {
00999         *buf++ = str[-2];
01000         state = 3;
01001       }
01002       break;
01003     case 3:
01004       *buf++ = str[-1];
01005       state = 0;
01006       break;
01007     }
01008   }
01009 }
01010 
01011 /*-------------------------------------------------------------------------
01012   -------------------------------------------------------------------------*/
01013 
01014 void
01015 unescape_str_tolower(char *&buf, char *end, const char *&str, const char *str_e, int &state)
01016 {
01017   while (str < str_e && (buf != end)) {
01018     switch (state) {
01019     case 0:
01020       if (str[0] == '%') {
01021         str += 1;
01022         state = 1;
01023       } else {
01024         *buf++ = ParseRules::ink_tolower(str[0]);
01025         str += 1;
01026       }
01027       break;
01028     case 1:
01029       if (ParseRules::is_hex(str[0])) {
01030         str += 1;
01031         state = 2;
01032       } else {
01033         *buf++ = ParseRules::ink_tolower(str[-1]);
01034         state = 0;
01035       }
01036       break;
01037     case 2:
01038       if (ParseRules::is_hex(str[0])) {
01039         int tmp;
01040 
01041         if (ParseRules::is_alpha(str[-1])) {
01042           tmp = (ParseRules::ink_toupper(str[-1]) - 'A' + 10) * 16;
01043         } else {
01044           tmp = (str[-1] - '0') * 16;
01045         }
01046         if (ParseRules::is_alpha(str[0])) {
01047           tmp += (ParseRules::ink_toupper(str[0]) - 'A' + 10);
01048         } else {
01049           tmp += str[0] - '0';
01050         }
01051 
01052         *buf++ = tmp;
01053         str += 1;
01054         state = 0;
01055       } else {
01056         *buf++ = ParseRules::ink_tolower(str[-2]);
01057         state = 3;
01058       }
01059       break;
01060     case 3:
01061       *buf++ = ParseRules::ink_tolower(str[-1]);
01062       state = 0;
01063       break;
01064     }
01065   }
01066 }
01067 
01068 /*-------------------------------------------------------------------------
01069   -------------------------------------------------------------------------*/
01070 
01071 char *
01072 url_unescapify(Arena * arena, const char *str, int length)
01073 {
01074   char *buffer;
01075   char *t, *e;
01076   int s;
01077 
01078   if (length == -1)
01079     length = (int) strlen(str);
01080 
01081   buffer = arena->str_alloc(length);
01082   t = buffer;
01083   e = buffer + length;
01084   s = 0;
01085 
01086   unescape_str(t, e, str, str + length, s);
01087   *t = '\0';
01088 
01089   return buffer;
01090 }
01091 
01092 /*-------------------------------------------------------------------------
01093   -------------------------------------------------------------------------*/
01094 
01095 /***********************************************************************
01096  *                                                                     *
01097  *                            P A R S I N G                            *
01098  *                                                                     *
01099  ***********************************************************************/
01100 
01101 #define GETNEXT(label) { \
01102     cur += 1;            \
01103     if (cur >= end) {    \
01104         goto label;      \
01105     }                    \
01106 }
01107 
01108 MIMEParseResult
01109 url_parse_scheme(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings_p)
01110 {
01111   const char *cur = *start;
01112   const char *scheme_wks;
01113   const char *scheme_start = NULL;
01114   const char *scheme_end = NULL;
01115   int scheme_wks_idx;
01116 
01117   while(' ' == *cur && ++cur < end)
01118     ;
01119   if (cur < end) {
01120     scheme_start = scheme_end = cur;
01121     // special case 'http:' for performance
01122     if ((end - cur >= 5) && (((cur[0] ^ 'h') | (cur[1] ^ 't') | (cur[2] ^ 't') | (cur[3] ^ 'p') | (cur[4] ^ ':')) == 0)) {
01123       scheme_end = cur + 4;                   // point to colon
01124       url_scheme_set(heap, url, scheme_start, URL_WKSIDX_HTTP, 4, copy_strings_p);
01125     } else if ('/' != *cur) {
01126       // For forward transparent mode, the URL for the method can just be a path,
01127       // so don't scan that for a scheme, as we could find a false positive if there
01128       // is a URL in the parameters (which is legal).
01129       while (':' != *cur && ++cur < end)
01130         ;
01131       if (cur < end) { // found a colon
01132         scheme_wks_idx = hdrtoken_tokenize(scheme_start, cur - scheme_start, &scheme_wks);
01133     
01134         /*  Distinguish between a scheme only and a username by looking past the colon. If it is missing
01135             or it's a slash, presume scheme. Otherwise it's a username with a password.
01136         */
01137         if ((scheme_wks_idx > 0 && hdrtoken_wks_to_token_type(scheme_wks) == HDRTOKEN_TYPE_SCHEME) || // known scheme
01138            (cur >= end-1 || cur[1] == '/')) // no more data or slash past colon
01139         {
01140           scheme_end = cur;
01141           url_scheme_set(heap, url, scheme_start, scheme_wks_idx, scheme_end - scheme_start, copy_strings_p);
01142         }
01143       }
01144     }
01145     *start = scheme_end;
01146     return PARSE_CONT;
01147   }
01148   return PARSE_ERROR; // no non-whitespace found
01149 }
01150 
01151 MIMEParseResult
01152 url_parse(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings_p)
01153 {
01154   MIMEParseResult zret = url_parse_scheme(heap, url, start, end, copy_strings_p);
01155   return PARSE_CONT == zret ? url_parse_http(heap, url, start, end, copy_strings_p) : zret;
01156 }
01157 
01158 MIMEParseResult
01159 url_parse_no_path_component_breakdown(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings_p)
01160 {
01161   MIMEParseResult zret = url_parse_scheme(heap, url, start, end, copy_strings_p);
01162   return PARSE_CONT == zret ? url_parse_http_no_path_component_breakdown(heap, url, start, end, copy_strings_p) : zret;
01163 }
01164 
01165 /**
01166   Parse internet URL.
01167 
01168   @verbatim
01169   [://][user[:password]@]host[:port]
01170 
01171   some.place/
01172   some.place:80/
01173   foo@some.place:80/
01174   foo:bar@some.place:80/
01175   foo:bar@some.place/
01176   foo:42@some.place/
01177   @endverbatim
01178 
01179 */
01180 
01181 MIMEParseResult
01182 url_parse_internet(HdrHeap* heap, URLImpl* url,
01183                        char const ** start, char const *end,
01184                        bool copy_strings_p)
01185 {
01186   char const* cur = *start;
01187   char const* base; // Base for host/port field.
01188   char const* bracket = 0; // marker for open bracket, if any.
01189   ts::ConstBuffer user, passw, host, port;
01190   static size_t const MAX_COLON = 8; // max # of valid colons.
01191   size_t n_colon = 0;
01192   char const* last_colon = 0; // pointer to last colon seen.
01193 
01194   // Do a quick check for "://"
01195   if (end - cur > 3 &&
01196       (((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
01197     cur += 3;
01198   } else if (':' == *cur && (++cur >= end ||
01199                              ('/' == *cur && (++cur >= end ||
01200                                               ('/' == *cur && ++cur >= end))))) {
01201     return PARSE_ERROR;
01202   }
01203   base = cur;
01204   // skipped leading stuff, start real parsing.
01205   while (cur < end) {
01206     // Note: Each case is responsible for incrementing @a cur if
01207     // appropriate!
01208     switch (*cur) {
01209     case ']' : // address close
01210       if (0 == bracket || n_colon >= MAX_COLON)
01211         return PARSE_ERROR;
01212       ++cur;
01213       /* We keep the brackets because there are too many other places
01214          that depend on them and it's too painful to keep track if
01215          they should be used. I thought about being clever with
01216          stripping brackets from non-IPv6 content but that gets ugly
01217          as well. Just not worth it.
01218        */
01219       host.set(bracket, cur);
01220       // Spec requires This constitute the entire host so the next
01221       // character must be missing (EOS), slash, or colon.
01222       if (cur >= end || '/' == *cur) { // done which is OK
01223         last_colon = 0;
01224         break;
01225       } else if (':' != *cur) { // otherwise it must be a colon
01226         return PARSE_ERROR;
01227       }
01228       /* We want to prevent more than 1 colon following so we set @a
01229          n_colon appropriately.
01230       */
01231       n_colon = MAX_COLON - 1;
01232       // FALL THROUGH
01233     case ':' : // track colons, fail if too many.
01234       if (++n_colon > MAX_COLON)
01235         return PARSE_ERROR;
01236       last_colon = cur;
01237       ++cur;
01238       break;
01239     case '@' : // user/password marker.
01240       if (user || n_colon > 1)
01241         return PARSE_ERROR; // we already got one, or too many colons.
01242       if (n_colon) {
01243         user.set(base, last_colon);
01244         passw.set(last_colon+1, cur);
01245         n_colon= 0;
01246         last_colon = 0;
01247       } else {
01248         user.set(base, cur);
01249       }
01250       ++cur;
01251       base = cur;
01252       break;
01253     case '[' : // address open
01254       if (bracket || base != cur) // must be first char in field
01255         return PARSE_ERROR;
01256       bracket = cur; // location and flag.
01257       ++cur;
01258       break;
01259     case '/' : // we're done with this phase.
01260       end = cur; // cause loop exit
01261       break;
01262     default:
01263       ++cur;
01264       break;
01265     };
01266   }
01267   // Time to pick up the pieces. At this pointer cur._ptr is the first
01268   // character past the parse area.
01269 
01270   if (user) {
01271     url_user_set(heap, url, user._ptr, user._size, copy_strings_p);
01272     if (passw)
01273       url_password_set(heap, url, passw._ptr, passw._size, copy_strings_p);
01274   }
01275 
01276   // @a host not set means no brackets to mark explicit host.
01277   if (!host) {
01278     if (1 == n_colon || MAX_COLON == n_colon) { // presume port.
01279       host.set(base, last_colon);
01280     } else { // it's all host.
01281       host.set(base, cur);
01282       last_colon = 0; // prevent port setting.
01283     }
01284   }
01285   if (host._size)
01286     url_host_set(heap, url, host._ptr, host._size, copy_strings_p);
01287   
01288   if (last_colon) {
01289     ink_assert(n_colon);
01290     port.set(last_colon+1, cur);
01291     if (!port._size)
01292       return PARSE_ERROR; // colon w/o port value.
01293     url_port_set(heap, url, port._ptr, port._size, copy_strings_p);
01294   }
01295   if ('/' == *cur) ++cur; // must do this after filling in host/port.
01296   *start = cur;
01297   return PARSE_DONE;
01298 }
01299 /*-------------------------------------------------------------------------
01300   -------------------------------------------------------------------------*/
01301 
01302 // empties params/query/fragment component
01303 
01304 MIMEParseResult
01305 url_parse_http(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings)
01306 {
01307   MIMEParseResult err;
01308   const char *cur;
01309   const char *path_start = NULL;
01310   const char *path_end = NULL;
01311   const char *params_start = NULL;
01312   const char *params_end = NULL;
01313   const char *query_start = NULL;
01314   const char *query_end = NULL;
01315   const char *fragment_start = NULL;
01316   const char *fragment_end = NULL;
01317   char mask;
01318 
01319   err = url_parse_internet(heap, url, start, end, copy_strings);
01320   if (err < 0)
01321     return err;
01322 
01323   cur = *start;
01324   if (*start == end)
01325     goto done;
01326 
01327   path_start = cur;
01328   mask = ';' & '?' & '#';
01329 parse_path2:
01330   if ((*cur & mask) == mask) {
01331     if (*cur == ';') {
01332       path_end = cur;
01333       goto parse_params1;
01334     }
01335     if (*cur == '?') {
01336       path_end = cur;
01337       goto parse_query1;
01338     }
01339     if (*cur == '#') {
01340       path_end = cur;
01341       goto parse_fragment1;
01342     }
01343   } else {
01344     ink_assert((*cur != ';') && (*cur != '?') && (*cur != '#'));
01345   }
01346   GETNEXT(done);
01347   goto parse_path2;
01348 
01349 parse_params1:
01350   params_start = cur + 1;
01351   GETNEXT(done);
01352 parse_params2:
01353   if (*cur == '?') {
01354     params_end = cur;
01355     goto parse_query1;
01356   }
01357   if (*cur == '#') {
01358     params_end = cur;
01359     goto parse_fragment1;
01360   }
01361   GETNEXT(done);
01362   goto parse_params2;
01363 
01364 parse_query1:
01365   query_start = cur + 1;
01366   GETNEXT(done);
01367 parse_query2:
01368   if (*cur == '#') {
01369     query_end = cur;
01370     goto parse_fragment1;
01371   }
01372   GETNEXT(done);
01373   goto parse_query2;
01374 
01375 parse_fragment1:
01376   fragment_start = cur + 1;
01377   GETNEXT(done);
01378   fragment_end = end;
01379 
01380 done:
01381   if (path_start) {
01382     if (!path_end)
01383       path_end = cur;
01384     url_path_set(heap, url, path_start, path_end - path_start, copy_strings);
01385   }
01386   if (params_start) {
01387     if (!params_end)
01388       params_end = cur;
01389     url_params_set(heap, url, params_start, params_end - params_start, copy_strings);
01390   }
01391   if (query_start) {
01392     if (!query_end)
01393       query_end = cur;
01394     url_query_set(heap, url, query_start, query_end - query_start, copy_strings);
01395   }
01396   if (fragment_start) {
01397     if (!fragment_end)
01398       fragment_end = cur;
01399     url_fragment_set(heap, url, fragment_start, fragment_end - fragment_start, copy_strings);
01400   }
01401 
01402   *start = cur;
01403   return PARSE_DONE;
01404 }
01405 
01406 MIMEParseResult
01407 url_parse_http_no_path_component_breakdown(HdrHeap * heap,
01408                                            URLImpl * url, const char **start, const char *end, bool copy_strings)
01409 {
01410   const char *cur = *start;
01411   char const* host_end;
01412 
01413   // Do a quick check for "://" - our only format check.
01414   if (end - cur > 3 &&
01415       (((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
01416     cur += 3;
01417   } else if (':' == *cur && (++cur >= end ||
01418                              ('/' == *cur && (++cur >= end ||
01419                                               ('/' == *cur && ++cur >= end))))) {
01420     return PARSE_ERROR;
01421   }
01422 
01423   // Grab everything until EOS or slash.
01424   char const* base = cur;
01425   cur = static_cast<char const*>(memchr(cur, '/', end - cur));
01426   if (cur) {
01427     host_end = cur;
01428     ++cur;
01429   } else {
01430     host_end = cur = end;
01431   }
01432 
01433   // Did we find something for the host?
01434   if (base != host_end) {
01435     char const* port = 0;
01436     int port_len = 0;
01437 
01438     // Check for port. Search from the end stopping on the first non-digit
01439     // or more than 5 digits and a delimiter.
01440     port = host_end - 1;
01441     char const* port_limit = host_end - 6;
01442     if (port_limit < base) port_limit = base; // don't go past start.
01443     while (port >= port_limit && isdigit(*port))
01444       --port;
01445     // A port if we're still in the host area and we found a ':' as
01446     // the immediately preceeding character.
01447     if (port >= base && ':' == *port) {
01448       port_len = host_end - port - 1; // must compute this first.
01449       host_end = port; // then point at colon.
01450       ++port; // drop colon from port.
01451       url_port_set(heap, url, port, port_len, copy_strings);
01452     }
01453     // Now we can set the host.
01454     url_host_set(heap, url, base, host_end - base, copy_strings);
01455   }
01456 
01457   // path is anything that's left.
01458   if (cur < end) {
01459     url_path_set(heap, url, cur, end - cur, copy_strings);
01460     cur = end;
01461   }
01462   *start = cur;
01463   return PARSE_DONE;
01464 }
01465 
01466 /*-------------------------------------------------------------------------
01467   -------------------------------------------------------------------------*/
01468 
01469 /***********************************************************************
01470  *                                                                     *
01471  *                           P R I N T I N G                           *
01472  *                                                                     *
01473  ***********************************************************************/
01474 
01475 int
01476 url_print(URLImpl * url, char *buf_start, int buf_length, int *buf_index_inout, int *buf_chars_to_skip_inout)
01477 {
01478 #define TRY(x)  if (!x) return 0
01479 
01480   if (url->m_ptr_scheme) {
01481     TRY(mime_mem_print(url->m_ptr_scheme, url->m_len_scheme,
01482                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01483     // [amc] Why is "file:" special cased to be wrong?
01484 //    if ((url->m_scheme_wks_idx >= 0) && (hdrtoken_index_to_wks(url->m_scheme_wks_idx) == URL_SCHEME_FILE)) {
01485 //      TRY(mime_mem_print(":", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01486 //    } else {
01487       TRY(mime_mem_print("://", 3, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01488 //    }
01489   }
01490 
01491   if (url->m_ptr_user) {
01492     TRY(mime_mem_print(url->m_ptr_user, url->m_len_user,
01493                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01494     if (url->m_ptr_password) {
01495       TRY(mime_mem_print(":", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01496       TRY(mime_mem_print(url->m_ptr_password, url->m_len_password,
01497                          buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01498     }
01499     TRY(mime_mem_print("@", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01500   }
01501 
01502   if (url->m_ptr_host) {
01503     // Force brackets for IPv6. Note colon must occur in first 5 characters.
01504     // But it can be less (e.g. "::1").
01505     int n = url->m_len_host;
01506     bool bracket_p = '[' != *url->m_ptr_host && (0 != memchr(url->m_ptr_host, ':', n > 5 ? 5 : n));
01507     if (bracket_p)
01508       TRY(mime_mem_print("[", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01509     TRY(mime_mem_print(url->m_ptr_host, url->m_len_host,
01510                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01511     if (bracket_p)
01512       TRY(mime_mem_print("]", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01513     if (url->m_ptr_port && url->m_port) {
01514       TRY(mime_mem_print(":", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01515       TRY(mime_mem_print(url->m_ptr_port, url->m_len_port,
01516                          buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01517     }
01518   }
01519 
01520   TRY(mime_mem_print("/", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01521 
01522   if (url->m_ptr_path) {
01523     TRY(mime_mem_print(url->m_ptr_path, url->m_len_path, buf_start,
01524                        buf_length, buf_index_inout, buf_chars_to_skip_inout));
01525   }
01526 
01527   if (url->m_ptr_params && url->m_len_params > 0) {
01528     TRY(mime_mem_print(";", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01529     TRY(mime_mem_print(url->m_ptr_params, url->m_len_params,
01530                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01531   }
01532 
01533   if (url->m_ptr_query && url->m_len_query > 0) {
01534     TRY(mime_mem_print("?", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01535     TRY(mime_mem_print(url->m_ptr_query, url->m_len_query,
01536                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01537   }
01538 
01539   if (url->m_ptr_fragment && url->m_len_fragment > 0) {
01540     TRY(mime_mem_print("#", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01541     TRY(mime_mem_print(url->m_ptr_fragment, url->m_len_fragment,
01542                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01543   }
01544 
01545   return 1;
01546 
01547 #undef TRY
01548 }
01549 
01550 void
01551 url_describe(HdrHeapObjImpl * raw, bool /* recurse ATS_UNUSED */)
01552 {
01553   URLImpl *obj = (URLImpl *) raw;
01554 
01555   Debug("http", "[URLTYPE: %d, SWKSIDX: %d,\n", obj->m_url_type, obj->m_scheme_wks_idx);
01556   Debug("http", "\tSCHEME: \"%.*s\", SCHEME_LEN: %d,\n",
01557         obj->m_len_scheme, (obj->m_ptr_scheme ? obj->m_ptr_scheme : "NULL"), obj->m_len_scheme);
01558   Debug("http", "\tUSER: \"%.*s\", USER_LEN: %d,\n",
01559         obj->m_len_user, (obj->m_ptr_user ? obj->m_ptr_user : "NULL"), obj->m_len_user);
01560   Debug("http", "\tPASSWORD: \"%.*s\", PASSWORD_LEN: %d,\n",
01561         obj->m_len_password, (obj->m_ptr_password ? obj->m_ptr_password : "NULL"), obj->m_len_password);
01562   Debug("http", "\tHOST: \"%.*s\", HOST_LEN: %d,\n",
01563         obj->m_len_host, (obj->m_ptr_host ? obj->m_ptr_host : "NULL"), obj->m_len_host);
01564   Debug("http", "\tPORT: \"%.*s\", PORT_LEN: %d, PORT_NUM: %d\n",
01565         obj->m_len_port, (obj->m_ptr_port ? obj->m_ptr_port : "NULL"), obj->m_len_port, obj->m_port);
01566   Debug("http", "\tPATH: \"%.*s\", PATH_LEN: %d,\n",
01567         obj->m_len_path, (obj->m_ptr_path ? obj->m_ptr_path : "NULL"), obj->m_len_path);
01568   Debug("http", "\tPARAMS: \"%.*s\", PARAMS_LEN: %d,\n",
01569         obj->m_len_params, (obj->m_ptr_params ? obj->m_ptr_params : "NULL"), obj->m_len_params);
01570   Debug("http", "\tQUERY: \"%.*s\", QUERY_LEN: %d,\n",
01571         obj->m_len_query, (obj->m_ptr_query ? obj->m_ptr_query : "NULL"), obj->m_len_query);
01572   Debug("http", "\tFRAGMENT: \"%.*s\", FRAGMENT_LEN: %d]\n",
01573         obj->m_len_fragment, (obj->m_ptr_fragment ? obj->m_ptr_fragment : "NULL"), obj->m_len_fragment);
01574 }
01575 
01576 
01577 /*-------------------------------------------------------------------------
01578   -------------------------------------------------------------------------*/
01579 
01580 /***********************************************************************
01581  *                                                                     *
01582  *                        U R L    D I G E S T S                       *
01583  *                                                                     *
01584  ***********************************************************************/
01585 
01586 static inline void
01587 memcpy_tolower(char *d, const char *s, int n)
01588 {
01589   while (n--) {
01590     *d = ParseRules::ink_tolower(*s);
01591     s++;
01592     d++;
01593   }
01594 }
01595 
01596 
01597 #define BUFSIZE 512
01598 
01599 // fast path for MD5, HTTP, no user/password/params/query,
01600 // no buffer overflow, no unescaping needed
01601 
01602 static inline void
01603 url_MD5_get_fast(URLImpl * url, CryptoContext& ctx, CryptoHash* hash)
01604 {
01605   char buffer[BUFSIZE];
01606   char *p;
01607 
01608   p = buffer;
01609   memcpy_tolower(p, url->m_ptr_scheme, url->m_len_scheme);
01610   p += url->m_len_scheme;
01611   *p++ = ':';
01612   *p++ = '/';
01613   *p++ = '/';
01614   // no user
01615   *p++ = ':';
01616   // no password
01617   *p++ = '@';
01618   memcpy_tolower(p, url->m_ptr_host, url->m_len_host);
01619   p += url->m_len_host;
01620   *p++ = '/';
01621   memcpy(p, url->m_ptr_path, url->m_len_path);
01622   p += url->m_len_path;
01623   *p++ = ';';
01624   // no params
01625   *p++ = '?';
01626   // no query
01627 
01628   ink_assert(sizeof(url->m_port) == 2);
01629   uint16_t port = (uint16_t) url_canonicalize_port(url->m_url_type, url->m_port);
01630   *p++ = ((char *) &port)[0];
01631   *p++ = ((char *) &port)[1];
01632 
01633   ctx.update(buffer, p - buffer);
01634   ctx.finalize(hash);
01635 }
01636 
01637 
01638 static inline void
01639 url_MD5_get_general(URLImpl * url, CryptoContext& ctx, CryptoHash& hash)
01640 {
01641   char buffer[BUFSIZE];
01642   char *p, *e;
01643   const char *strs[13], *ends[13];
01644   const char *t;
01645   in_port_t port;
01646   int i, s;
01647 
01648   strs[0] = url->m_ptr_scheme;
01649   strs[1] = "://";
01650   strs[2] = url->m_ptr_user;
01651   strs[3] = ":";
01652   strs[4] = url->m_ptr_password;
01653   strs[5] = "@";
01654   strs[6] = url->m_ptr_host;
01655   strs[7] = "/";
01656   strs[8] = url->m_ptr_path;
01657 
01658   ends[0] = strs[0] + url->m_len_scheme;
01659   ends[1] = strs[1] + 3;
01660   ends[2] = strs[2] + url->m_len_user;
01661   ends[3] = strs[3] + 1;
01662   ends[4] = strs[4] + url->m_len_password;
01663   ends[5] = strs[5] + 1;
01664   ends[6] = strs[6] + url->m_len_host;
01665   ends[7] = strs[7] + 1;
01666   ends[8] = strs[8] + url->m_len_path;
01667 
01668   strs[9] = ";";
01669   strs[10] = url->m_ptr_params;
01670   strs[11] = "?";
01671   strs[12] = url->m_ptr_query;
01672   ends[9] = strs[9] + 1;
01673   ends[10] = strs[10] + url->m_len_params;
01674   ends[11] = strs[11] + 1;
01675   ends[12] = strs[12] + url->m_len_query;
01676 
01677   p = buffer;
01678   e = buffer + BUFSIZE;
01679 
01680   for (i = 0; i < 13; i++) {
01681     if (strs[i]) {
01682       t = strs[i];
01683       s = 0;
01684 
01685       while (t < ends[i]) {
01686         if ((i == 0) || (i == 6)) {           // scheme and host
01687           unescape_str_tolower(p, e, t, ends[i], s);
01688         } else {
01689           unescape_str(p, e, t, ends[i], s);
01690         }
01691 
01692         if (p == e) {
01693           ctx.update(buffer, BUFSIZE);
01694           p = buffer;
01695         }
01696       }
01697     }
01698   }
01699 
01700   if (p != buffer) ctx.update(buffer, p-buffer);
01701 
01702   port = url_canonicalize_port(url->m_url_type, url->m_port);
01703 
01704   ctx.update(&port, sizeof(port));
01705   ctx.finalize(hash);
01706 }
01707 
01708 void
01709 url_MD5_get(URLImpl * url, CryptoHash* hash)
01710 {
01711   URLHashContext ctx;
01712   if ((url_hash_method != 0) &&
01713       (url->m_url_type == URL_TYPE_HTTP) &&
01714       ((url->m_len_user + url->m_len_password + url->m_len_params + url->m_len_query) == 0) &&
01715       (3 + 1 + 1 + 1 + 1 + 1 + 2 +
01716        url->m_len_scheme +
01717        url->m_len_host +
01718        url->m_len_path < BUFSIZE) &&
01719       (memchr(url->m_ptr_host, '%', url->m_len_host) == NULL) &&
01720       (memchr(url->m_ptr_path, '%', url->m_len_path) == NULL)) {
01721     url_MD5_get_fast(url, ctx, hash);
01722 #ifdef DEBUG
01723     CryptoHash md5_general;
01724     url_MD5_get_general(url, ctx, md5_general);
01725     ink_assert(*hash == md5_general);
01726 #endif
01727   } else {
01728     url_MD5_get_general(url, ctx, *hash);
01729   }
01730 }
01731 
01732 #undef BUFSIZE
01733 
01734 /*-------------------------------------------------------------------------
01735   -------------------------------------------------------------------------*/
01736 
01737 void
01738 url_host_MD5_get(URLImpl * url, INK_MD5 * md5)
01739 {
01740   MD5Context ctx;
01741 
01742   if (url->m_ptr_scheme) {
01743     ctx.update(url->m_ptr_scheme, url->m_len_scheme);
01744   }
01745 
01746   ctx.update("://", 3);
01747 
01748   if (url->m_ptr_host) {
01749     ctx.update(url->m_ptr_host, url->m_len_host);
01750   }
01751 
01752   ctx.update(":", 1);
01753 
01754   // [amc] Why is this <int> and not <in_port_t>?
01755   // Especially since it's in_port_t for url_MD5_get.
01756   int port = url_canonicalize_port(url->m_url_type, url->m_port);
01757   ctx.update(&port, sizeof(port));
01758   ctx.finalize(*md5);
01759 }

Generated by  doxygen 1.7.1