00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 #include <assert.h>
00025 #include <new>
00026 #include "libts.h"
00027 #include "URL.h"
00028 #include "MIME.h"
00029 #include "HTTP.h"
00030 #include "Diags.h"
00031 
00032 const char *URL_SCHEME_FILE;
00033 const char *URL_SCHEME_FTP;
00034 const char *URL_SCHEME_GOPHER;
00035 const char *URL_SCHEME_HTTP;
00036 const char *URL_SCHEME_HTTPS;
00037 const char *URL_SCHEME_WSS;
00038 const char *URL_SCHEME_WS;
00039 const char *URL_SCHEME_MAILTO;
00040 const char *URL_SCHEME_NEWS;
00041 const char *URL_SCHEME_NNTP;
00042 const char *URL_SCHEME_PROSPERO;
00043 const char *URL_SCHEME_TELNET;
00044 const char *URL_SCHEME_TUNNEL;
00045 const char *URL_SCHEME_WAIS;
00046 const char *URL_SCHEME_PNM;
00047 const char *URL_SCHEME_RTSP;
00048 const char *URL_SCHEME_RTSPU;
00049 const char *URL_SCHEME_MMS;
00050 const char *URL_SCHEME_MMSU;
00051 const char *URL_SCHEME_MMST;
00052 
00053 int URL_WKSIDX_FILE;
00054 int URL_WKSIDX_FTP;
00055 int URL_WKSIDX_GOPHER;
00056 int URL_WKSIDX_HTTP;
00057 int URL_WKSIDX_HTTPS;
00058 int URL_WKSIDX_WS;
00059 int URL_WKSIDX_WSS;
00060 int URL_WKSIDX_MAILTO;
00061 int URL_WKSIDX_NEWS;
00062 int URL_WKSIDX_NNTP;
00063 int URL_WKSIDX_PROSPERO;
00064 int URL_WKSIDX_TELNET;
00065 int URL_WKSIDX_TUNNEL;
00066 int URL_WKSIDX_WAIS;
00067 int URL_WKSIDX_PNM;
00068 int URL_WKSIDX_RTSP;
00069 int URL_WKSIDX_RTSPU;
00070 int URL_WKSIDX_MMS;
00071 int URL_WKSIDX_MMSU;
00072 int URL_WKSIDX_MMST;
00073 
00074 int URL_LEN_FILE;
00075 int URL_LEN_FTP;
00076 int URL_LEN_GOPHER;
00077 int URL_LEN_HTTP;
00078 int URL_LEN_HTTPS;
00079 int URL_LEN_WS;
00080 int URL_LEN_WSS;
00081 int URL_LEN_MAILTO;
00082 int URL_LEN_NEWS;
00083 int URL_LEN_NNTP;
00084 int URL_LEN_PROSPERO;
00085 int URL_LEN_TELNET;
00086 int URL_LEN_TUNNEL;
00087 int URL_LEN_WAIS;
00088 int URL_LEN_PNM;
00089 int URL_LEN_RTSP;
00090 int URL_LEN_RTSPU;
00091 int URL_LEN_MMS;
00092 int URL_LEN_MMSU;
00093 int URL_LEN_MMST;
00094 
00095 int url_hash_method = 0;
00096 
00097 
00098 
00099 
00100 URLHashContext::HashType URLHashContext::Setting = URLHashContext::MMH;
00101 
00102 URLHashContext::URLHashContext() {
00103   switch (Setting) {
00104   case UNSPECIFIED:
00105   case MD5:
00106     new(_obj) MD5Context;
00107     break;
00108   case MMH:
00109     new(_obj) MMHContext;
00110     break;
00111   default: ink_assert("Invalid global URL hash context");
00112   };
00113 }
00114 
00115 void
00116 url_init()
00117 {
00118   static int init = 1;
00119 
00120   if (init) {
00121     init = 0;
00122 
00123     hdrtoken_init();
00124 
00125     URL_SCHEME_FILE = hdrtoken_string_to_wks("file");
00126     URL_SCHEME_FTP = hdrtoken_string_to_wks("ftp");
00127     URL_SCHEME_GOPHER = hdrtoken_string_to_wks("gopher");
00128     URL_SCHEME_HTTP = hdrtoken_string_to_wks("http");
00129     URL_SCHEME_HTTPS = hdrtoken_string_to_wks("https");
00130     URL_SCHEME_WSS = hdrtoken_string_to_wks("wss");
00131     URL_SCHEME_WS = hdrtoken_string_to_wks("ws");
00132     URL_SCHEME_MAILTO = hdrtoken_string_to_wks("mailto");
00133     URL_SCHEME_NEWS = hdrtoken_string_to_wks("news");
00134     URL_SCHEME_NNTP = hdrtoken_string_to_wks("nntp");
00135     URL_SCHEME_PROSPERO = hdrtoken_string_to_wks("prospero");
00136     URL_SCHEME_TELNET = hdrtoken_string_to_wks("telnet");
00137     URL_SCHEME_TUNNEL = hdrtoken_string_to_wks("tunnel");
00138     URL_SCHEME_WAIS = hdrtoken_string_to_wks("wais");
00139     URL_SCHEME_PNM = hdrtoken_string_to_wks("pnm");
00140     URL_SCHEME_RTSP = hdrtoken_string_to_wks("rtsp");
00141     URL_SCHEME_RTSPU = hdrtoken_string_to_wks("rtspu");
00142     URL_SCHEME_MMS = hdrtoken_string_to_wks("mms");
00143     URL_SCHEME_MMSU = hdrtoken_string_to_wks("mmsu");
00144     URL_SCHEME_MMST = hdrtoken_string_to_wks("mmst");
00145     
00146     ink_assert(URL_SCHEME_FILE && 
00147       URL_SCHEME_FTP &&
00148       URL_SCHEME_GOPHER && 
00149       URL_SCHEME_HTTP && 
00150       URL_SCHEME_HTTPS && 
00151       URL_SCHEME_WS &&
00152       URL_SCHEME_WSS &&
00153       URL_SCHEME_MAILTO && 
00154       URL_SCHEME_NEWS && 
00155       URL_SCHEME_NNTP && 
00156       URL_SCHEME_PROSPERO && 
00157       URL_SCHEME_TELNET &&
00158       URL_SCHEME_TUNNEL &&
00159       URL_SCHEME_WAIS &&
00160       URL_SCHEME_PNM &&
00161       URL_SCHEME_RTSP && 
00162       URL_SCHEME_RTSPU && 
00163       URL_SCHEME_MMS &&
00164       URL_SCHEME_MMSU && 
00165       URL_SCHEME_MMST
00166     );
00167     
00168     URL_WKSIDX_FILE = hdrtoken_wks_to_index(URL_SCHEME_FILE);
00169     URL_WKSIDX_FTP = hdrtoken_wks_to_index(URL_SCHEME_FTP);
00170     URL_WKSIDX_GOPHER = hdrtoken_wks_to_index(URL_SCHEME_GOPHER);
00171     URL_WKSIDX_HTTP = hdrtoken_wks_to_index(URL_SCHEME_HTTP);
00172     URL_WKSIDX_HTTPS = hdrtoken_wks_to_index(URL_SCHEME_HTTPS);
00173     URL_WKSIDX_WS = hdrtoken_wks_to_index(URL_SCHEME_WS);
00174     URL_WKSIDX_WSS = hdrtoken_wks_to_index(URL_SCHEME_WSS);
00175     URL_WKSIDX_MAILTO = hdrtoken_wks_to_index(URL_SCHEME_MAILTO);
00176     URL_WKSIDX_NEWS = hdrtoken_wks_to_index(URL_SCHEME_NEWS);
00177     URL_WKSIDX_NNTP = hdrtoken_wks_to_index(URL_SCHEME_NNTP);
00178     URL_WKSIDX_PROSPERO = hdrtoken_wks_to_index(URL_SCHEME_PROSPERO);
00179     URL_WKSIDX_TELNET = hdrtoken_wks_to_index(URL_SCHEME_TELNET);
00180     URL_WKSIDX_TUNNEL = hdrtoken_wks_to_index(URL_SCHEME_TUNNEL);
00181     URL_WKSIDX_WAIS = hdrtoken_wks_to_index(URL_SCHEME_WAIS);
00182     URL_WKSIDX_PNM = hdrtoken_wks_to_index(URL_SCHEME_PNM);
00183     URL_WKSIDX_RTSP = hdrtoken_wks_to_index(URL_SCHEME_RTSP);
00184     URL_WKSIDX_RTSPU = hdrtoken_wks_to_index(URL_SCHEME_RTSPU);
00185     URL_WKSIDX_MMS = hdrtoken_wks_to_index(URL_SCHEME_MMS);
00186     URL_WKSIDX_MMSU = hdrtoken_wks_to_index(URL_SCHEME_MMSU);
00187     URL_WKSIDX_MMST = hdrtoken_wks_to_index(URL_SCHEME_MMST);
00188 
00189     URL_LEN_FILE = hdrtoken_wks_to_length(URL_SCHEME_FILE);
00190     URL_LEN_FTP = hdrtoken_wks_to_length(URL_SCHEME_FTP);
00191     URL_LEN_GOPHER = hdrtoken_wks_to_length(URL_SCHEME_GOPHER);
00192     URL_LEN_HTTP = hdrtoken_wks_to_length(URL_SCHEME_HTTP);
00193     URL_LEN_HTTPS = hdrtoken_wks_to_length(URL_SCHEME_HTTPS);
00194     URL_LEN_WS = hdrtoken_wks_to_length(URL_SCHEME_WS);
00195     URL_LEN_WSS = hdrtoken_wks_to_length(URL_SCHEME_WSS);
00196     URL_LEN_MAILTO = hdrtoken_wks_to_length(URL_SCHEME_MAILTO);
00197     URL_LEN_NEWS = hdrtoken_wks_to_length(URL_SCHEME_NEWS);
00198     URL_LEN_NNTP = hdrtoken_wks_to_length(URL_SCHEME_NNTP);
00199     URL_LEN_PROSPERO = hdrtoken_wks_to_length(URL_SCHEME_PROSPERO);
00200     URL_LEN_TELNET = hdrtoken_wks_to_length(URL_SCHEME_TELNET);
00201     URL_LEN_TUNNEL = hdrtoken_wks_to_length(URL_SCHEME_TUNNEL);
00202     URL_LEN_WAIS = hdrtoken_wks_to_length(URL_SCHEME_WAIS);
00203     URL_LEN_PNM = hdrtoken_wks_to_length(URL_SCHEME_PNM);
00204     URL_LEN_RTSP = hdrtoken_wks_to_length(URL_SCHEME_RTSP);
00205     URL_LEN_RTSPU = hdrtoken_wks_to_length(URL_SCHEME_RTSPU);
00206     URL_LEN_MMS = hdrtoken_wks_to_length(URL_SCHEME_MMS);
00207     URL_LEN_MMSU = hdrtoken_wks_to_length(URL_SCHEME_MMSU);
00208     URL_LEN_MMST = hdrtoken_wks_to_length(URL_SCHEME_MMST);
00209 
00210     ink_assert(URLHashContext::OBJ_SIZE >= sizeof(MD5Context));
00211     ink_assert(URLHashContext::OBJ_SIZE >= sizeof(MMHContext));
00212 
00213   }
00214 }
00215 
00216 
00217 
00218 
00219 
00220 
00221 
00222 
00223 
00224 
00225 URLImpl *
00226 url_create(HdrHeap * heap)
00227 {
00228   URLImpl *url;
00229 
00230   url = (URLImpl *) heap->allocate_obj(sizeof(URLImpl), HDR_HEAP_OBJ_URL);
00231   obj_clear_data((HdrHeapObjImpl *) url);
00232   url->m_url_type = URL_TYPE_NONE;
00233   url->m_scheme_wks_idx = -1;
00234   url_clear_string_ref(url);
00235   return url;
00236 }
00237 
00238 
00239 
00240 
00241 void
00242 url_clear(URLImpl * url_impl)
00243 {
00244   obj_clear_data((HdrHeapObjImpl *) url_impl);
00245   url_impl->m_url_type = URL_TYPE_NONE;
00246   url_impl->m_scheme_wks_idx = -1;
00247 }
00248 
00249 
00250 
00251 
00252 URLImpl *
00253 url_copy(URLImpl * s_url, HdrHeap * s_heap, HdrHeap * d_heap, bool inherit_strs)
00254 {
00255   URLImpl *d_url = url_create(d_heap);
00256   url_copy_onto(s_url, s_heap, d_url, d_heap, inherit_strs);
00257   return d_url;
00258 }
00259 
00260 
00261 
00262 
00263 void
00264 url_copy_onto(URLImpl * s_url, HdrHeap * s_heap, URLImpl * d_url, HdrHeap * d_heap, bool inherit_strs)
00265 {
00266   if (s_url != d_url) {
00267     obj_copy_data((HdrHeapObjImpl *) s_url, (HdrHeapObjImpl *) d_url);
00268     if (inherit_strs && (s_heap != d_heap))
00269       d_heap->inherit_string_heaps(s_heap);
00270   }
00271 }
00272 
00273 
00274 
00275 
00276 void
00277 url_nuke_proxy_stuff(URLImpl * d_url)
00278 {
00279   d_url->m_len_scheme = 0;
00280   d_url->m_len_user = 0;
00281   d_url->m_len_password = 0;
00282   d_url->m_len_host = 0;
00283   d_url->m_len_port = 0;
00284 
00285   d_url->m_ptr_scheme = NULL;
00286   d_url->m_ptr_user = NULL;
00287   d_url->m_ptr_password = NULL;
00288   d_url->m_ptr_host = NULL;
00289   d_url->m_ptr_port = NULL;
00290 
00291   d_url->m_scheme_wks_idx = -1;
00292   d_url->m_port = 0;
00293 }
00294 
00295 
00296 
00297 
00298 
00299 
00300 
00301 
00302 
00303 void
00304 url_copy_onto_as_server_url(URLImpl * s_url, HdrHeap * s_heap, URLImpl * d_url, HdrHeap * d_heap, bool inherit_strs)
00305 {
00306   url_nuke_proxy_stuff(d_url);
00307 
00308   d_url->m_ptr_path = s_url->m_ptr_path;
00309   d_url->m_ptr_params = s_url->m_ptr_params;
00310   d_url->m_ptr_query = s_url->m_ptr_query;
00311   d_url->m_ptr_fragment = s_url->m_ptr_fragment;
00312   url_clear_string_ref(d_url);
00313 
00314   d_url->m_len_path = s_url->m_len_path;
00315   d_url->m_len_params = s_url->m_len_params;
00316   d_url->m_len_query = s_url->m_len_query;
00317   d_url->m_len_fragment = s_url->m_len_fragment;
00318 
00319   d_url->m_url_type = s_url->m_url_type;
00320   d_url->m_type_code = s_url->m_type_code;
00321 
00322   if (inherit_strs && (s_heap != d_heap))
00323     d_heap->inherit_string_heaps(s_heap);
00324 }
00325 
00326 
00327 
00328 
00329 
00330 
00331 
00332 
00333 
00334 int
00335 URLImpl::marshal(MarshalXlate * str_xlate, int num_xlate)
00336 {
00337 
00338   HDR_MARSHAL_STR(m_ptr_scheme, str_xlate, num_xlate);
00339   HDR_MARSHAL_STR(m_ptr_user, str_xlate, num_xlate);
00340   HDR_MARSHAL_STR(m_ptr_password, str_xlate, num_xlate);
00341   HDR_MARSHAL_STR(m_ptr_host, str_xlate, num_xlate);
00342   HDR_MARSHAL_STR(m_ptr_port, str_xlate, num_xlate);
00343   HDR_MARSHAL_STR(m_ptr_path, str_xlate, num_xlate);
00344   HDR_MARSHAL_STR(m_ptr_params, str_xlate, num_xlate);
00345   HDR_MARSHAL_STR(m_ptr_query, str_xlate, num_xlate);
00346   HDR_MARSHAL_STR(m_ptr_fragment, str_xlate, num_xlate);
00347 
00348   return 0;
00349 }
00350 
00351 void
00352 URLImpl::unmarshal(intptr_t offset)
00353 {
00354   HDR_UNMARSHAL_STR(m_ptr_scheme, offset);
00355   HDR_UNMARSHAL_STR(m_ptr_user, offset);
00356   HDR_UNMARSHAL_STR(m_ptr_password, offset);
00357   HDR_UNMARSHAL_STR(m_ptr_host, offset);
00358   HDR_UNMARSHAL_STR(m_ptr_port, offset);
00359   HDR_UNMARSHAL_STR(m_ptr_path, offset);
00360   HDR_UNMARSHAL_STR(m_ptr_params, offset);
00361   HDR_UNMARSHAL_STR(m_ptr_query, offset);
00362   HDR_UNMARSHAL_STR(m_ptr_fragment, offset);
00363 
00364 }
00365 
00366 void
00367 URLImpl::move_strings(HdrStrHeap * new_heap)
00368 {
00369   HDR_MOVE_STR(m_ptr_scheme, m_len_scheme);
00370   HDR_MOVE_STR(m_ptr_user, m_len_user);
00371   HDR_MOVE_STR(m_ptr_password, m_len_password);
00372   HDR_MOVE_STR(m_ptr_host, m_len_host);
00373   HDR_MOVE_STR(m_ptr_port, m_len_port);
00374   HDR_MOVE_STR(m_ptr_path, m_len_path);
00375   HDR_MOVE_STR(m_ptr_params, m_len_params);
00376   HDR_MOVE_STR(m_ptr_query, m_len_query);
00377   HDR_MOVE_STR(m_ptr_fragment, m_len_fragment);
00378   HDR_MOVE_STR(m_ptr_printed_string, m_len_printed_string);
00379 }
00380 
00381 size_t
00382 URLImpl::strings_length()
00383 {
00384   size_t ret = 0;
00385 
00386   ret += m_len_scheme;
00387   ret += m_len_user;
00388   ret += m_len_password;
00389   ret += m_len_host;
00390   ret += m_len_port;
00391   ret += m_len_path;
00392   ret += m_len_params;
00393   ret += m_len_query;
00394   ret += m_len_fragment;
00395   ret += m_len_printed_string;
00396   return ret;
00397 }
00398 
00399 void
00400 URLImpl::check_strings(HeapCheck * heaps, int num_heaps)
00401 {
00402   CHECK_STR(m_ptr_scheme, m_len_scheme, heaps, num_heaps);
00403   CHECK_STR(m_ptr_user, m_len_user, heaps, num_heaps);
00404   CHECK_STR(m_ptr_password, m_len_password, heaps, num_heaps);
00405   CHECK_STR(m_ptr_host, m_len_host, heaps, num_heaps);
00406   CHECK_STR(m_ptr_port, m_len_port, heaps, num_heaps);
00407   CHECK_STR(m_ptr_path, m_len_path, heaps, num_heaps);
00408   CHECK_STR(m_ptr_params, m_len_params, heaps, num_heaps);
00409   CHECK_STR(m_ptr_query, m_len_query, heaps, num_heaps);
00410   CHECK_STR(m_ptr_fragment, m_len_fragment, heaps, num_heaps);
00411 
00412 }
00413 
00414 
00415 
00416 
00417 
00418 
00419 
00420 const char *
00421 url_scheme_set(HdrHeap * heap, URLImpl * url, const char *scheme_str, int scheme_wks_idx, int length, bool copy_string)
00422 {
00423   const char *scheme_wks;
00424   url_called_set(url);
00425   if (length == 0)
00426     scheme_str = NULL;
00427 
00428   mime_str_u16_set(heap, scheme_str, length, &(url->m_ptr_scheme), &(url->m_len_scheme), copy_string);
00429 
00430   url->m_scheme_wks_idx = scheme_wks_idx;
00431   if (scheme_wks_idx >= 0)
00432     scheme_wks = hdrtoken_index_to_wks(scheme_wks_idx);
00433   else
00434     scheme_wks = NULL;
00435 
00436   if (scheme_wks == URL_SCHEME_HTTP || scheme_wks == URL_SCHEME_WS)
00437     url->m_url_type = URL_TYPE_HTTP;
00438   else if (scheme_wks == URL_SCHEME_HTTPS || scheme_wks == URL_SCHEME_WSS)
00439     url->m_url_type = URL_TYPE_HTTPS;
00440   else
00441     url->m_url_type = URL_TYPE_HTTP;
00442 
00443   return scheme_wks;          
00444 }
00445 
00446 
00447 
00448 
00449 void
00450 url_user_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00451 {
00452   url_called_set(url);
00453   if (length == 0)
00454     value = NULL;
00455   mime_str_u16_set(heap, value, length, &(url->m_ptr_user), &(url->m_len_user), copy_string);
00456 }
00457 
00458 
00459 
00460 
00461 void
00462 url_password_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00463 {
00464   url_called_set(url);
00465   if (length == 0)
00466     value = NULL;
00467   mime_str_u16_set(heap, value, length, &(url->m_ptr_password), &(url->m_len_password), copy_string);
00468 }
00469 
00470 
00471 
00472 
00473 void
00474 url_host_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00475 {
00476   url_called_set(url);
00477   if (length == 0) value = NULL;
00478   mime_str_u16_set(heap, value, length, &(url->m_ptr_host), &(url->m_len_host), copy_string);
00479 }
00480 
00481 
00482 
00483 
00484 void
00485 url_port_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00486 {
00487   url_called_set(url);
00488   if (length == 0)
00489     value = NULL;
00490   mime_str_u16_set(heap, value, length, &(url->m_ptr_port), &(url->m_len_port), copy_string);
00491 
00492   url->m_port = 0;
00493   for (int i = 0; i < length; i++) {
00494     if (!ParseRules::is_digit(value[i]))
00495       break;
00496     url->m_port = url->m_port * 10 + (value[i] - '0');
00497   }
00498 }
00499 
00500 
00501 
00502 
00503 void
00504 url_port_set(HdrHeap * heap, URLImpl * url, unsigned int port)
00505 {
00506   url_called_set(url);
00507   if (port > 0) {
00508     char value[6];
00509     int length;
00510 
00511     length = ink_fast_itoa(port, value, sizeof(value));
00512     mime_str_u16_set(heap, value, length, &(url->m_ptr_port), &(url->m_len_port), true);
00513   } else {
00514     mime_str_u16_set(heap, NULL, 0, &(url->m_ptr_port), &(url->m_len_port), true);
00515   }
00516   url->m_port = port;
00517 }
00518 
00519 
00520 
00521 
00522 void
00523 url_path_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00524 {
00525   url_called_set(url);
00526   if (length == 0)
00527     value = NULL;
00528   mime_str_u16_set(heap, value, length, &(url->m_ptr_path), &(url->m_len_path), copy_string);
00529 }
00530 
00531 
00532 
00533 
00534 
00535 
00536 
00537 void
00538 url_params_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00539 {
00540   url_called_set(url);
00541   mime_str_u16_set(heap, value, length, &(url->m_ptr_params), &(url->m_len_params), copy_string);
00542 }
00543 
00544 
00545 
00546 
00547 void
00548 url_query_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00549 {
00550   url_called_set(url);
00551   mime_str_u16_set(heap, value, length, &(url->m_ptr_query), &(url->m_len_query), copy_string);
00552 }
00553 
00554 
00555 
00556 
00557 void
00558 url_fragment_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00559 {
00560   url_called_set(url);
00561   mime_str_u16_set(heap, value, length, &(url->m_ptr_fragment), &(url->m_len_fragment), copy_string);
00562 }
00563 
00564 
00565 
00566 
00567 void
00568 url_type_set(URLImpl * url, unsigned int typecode)
00569 {
00570   url_called_set(url);
00571   url->m_type_code = typecode;
00572 }
00573 
00574 
00575 
00576 
00577 
00578 
00579 
00580 
00581 
00582 
00583 
00584 
00585 
00586 void
00587 url_called_set(URLImpl * url)
00588 {
00589   url->m_clean = !url->m_ptr_printed_string;
00590 }
00591 
00592 void
00593 url_clear_string_ref(URLImpl * url)
00594 {
00595   if (url->m_ptr_printed_string) {
00596     url->m_len_printed_string = 0;
00597     url->m_ptr_printed_string = NULL;
00598     url->m_clean = true;
00599   }
00600   return;
00601 }
00602 
00603 char *
00604 url_string_get_ref(HdrHeap * heap, URLImpl * url, int *length)
00605 {
00606   if (!url)
00607     return NULL;
00608 
00609   if (url->m_ptr_printed_string && url->m_clean) {
00610     if (length)
00611       *length = url->m_len_printed_string;
00612     return (char *) url->m_ptr_printed_string;
00613   } else {                      
00614     int len = url_length_get(url);
00615     char *buf;
00616     int index = 0;
00617     int offset = 0;
00618 
00619     
00620     buf = heap->allocate_str(len + 1);
00621     url_print(url, buf, len, &index, &offset);
00622     buf[len] = '\0';
00623 
00624     if (length) {
00625       *length = len;
00626     }
00627     url->m_clean = true;          
00628     url->m_len_printed_string = len;
00629     url->m_ptr_printed_string = buf;
00630     return buf;
00631   }
00632 }
00633 
00634 char *
00635 url_string_get(URLImpl * url, Arena * arena, int *length, HdrHeap * heap)
00636 {
00637   int len = url_length_get(url);
00638   char *buf;
00639   char *buf2;
00640   int index = 0;
00641   int offset = 0;
00642 
00643   buf = arena ? arena->str_alloc(len) : (char *)ats_malloc(len + 1);
00644 
00645   url_print(url, buf, len, &index, &offset);
00646   buf[len] = '\0';
00647 
00648   
00649   if (heap) {
00650     buf2 = heap->allocate_str(len + 1);
00651     memcpy(buf2, buf, len);
00652     buf2[len] = '\0';
00653     url->m_clean = true;          
00654     url->m_len_printed_string = len;
00655     url->m_ptr_printed_string = buf2;
00656   }
00657 
00658   if (length) {
00659     *length = len;
00660   }
00661   return buf;
00662 }
00663 
00664 
00665 
00666 
00667 char *
00668 url_string_get_buf(URLImpl * url, char *dstbuf, int dstbuf_size, int *length)
00669 {
00670   int len = url_length_get(url);
00671   int index = 0;
00672   int offset = 0;
00673   char *buf = 0;
00674 
00675   if (dstbuf && dstbuf_size > 0) {
00676     buf = dstbuf;
00677     if (len >= dstbuf_size)
00678       len = dstbuf_size - 1;
00679     url_print(url, dstbuf, len, &index, &offset);
00680     buf[len] = 0;
00681 
00682     if (length)
00683       *length = len;
00684   }
00685   return buf;
00686 }
00687 
00688 
00689 
00690 
00691 const char *
00692 url_scheme_get(URLImpl * url, int *length)
00693 {
00694   const char *str;
00695 
00696   if (url->m_scheme_wks_idx >= 0) {
00697     str = hdrtoken_index_to_wks(url->m_scheme_wks_idx);
00698     *length = hdrtoken_index_to_length(url->m_scheme_wks_idx);
00699   } else {
00700     str = url->m_ptr_scheme;
00701     *length = url->m_len_scheme;
00702   }
00703   return str;
00704 }
00705 
00706 
00707 
00708 
00709 const char *
00710 url_user_get(URLImpl * url, int *length)
00711 {
00712   *length = url->m_len_user;
00713   return url->m_ptr_user;
00714 }
00715 
00716 
00717 
00718 
00719 const char *
00720 url_password_get(URLImpl * url, int *length)
00721 {
00722   *length = url->m_len_password;
00723   return url->m_ptr_password;
00724 }
00725 
00726 
00727 
00728 
00729 const char *
00730 url_host_get(URLImpl * url, int *length)
00731 {
00732   *length = url->m_len_host;
00733   return url->m_ptr_host;
00734 }
00735 
00736 
00737 
00738 
00739 int
00740 url_port_get(URLImpl * url)
00741 {
00742   return url->m_port;
00743 }
00744 
00745 
00746 
00747 
00748 const char *
00749 url_path_get(URLImpl * url, int *length)
00750 {
00751   *length = url->m_len_path;
00752   return url->m_ptr_path;
00753 }
00754 
00755 
00756 
00757 
00758 const char *
00759 url_params_get(URLImpl * url, int *length)
00760 {
00761   *length = url->m_len_params;
00762   return url->m_ptr_params;
00763 }
00764 
00765 
00766 
00767 
00768 const char *
00769 url_query_get(URLImpl * url, int *length)
00770 {
00771   *length = url->m_len_query;
00772   return url->m_ptr_query;
00773 }
00774 
00775 
00776 
00777 
00778 const char *
00779 url_fragment_get(URLImpl * url, int *length)
00780 {
00781   *length = url->m_len_fragment;
00782   return url->m_ptr_fragment;
00783 }
00784 
00785 
00786 
00787 
00788 int
00789 url_type_get(URLImpl * url)
00790 {
00791   return url->m_type_code;
00792 }
00793 
00794 
00795 
00796 
00797 
00798 
00799 
00800 
00801 
00802 
00803 
00804 
00805 
00806 int
00807 url_length_get(URLImpl * url)
00808 {
00809   int length = 0;
00810 
00811   if (url->m_ptr_scheme) {
00812     if ((url->m_scheme_wks_idx >= 0) && (hdrtoken_index_to_wks(url->m_scheme_wks_idx) == URL_SCHEME_FILE))
00813       length += url->m_len_scheme + 1;  
00814     else
00815       length += url->m_len_scheme + 3;  
00816   }
00817 
00818   if (url->m_ptr_user) {
00819     length += url->m_len_user + 1;      
00820     if (url->m_ptr_password)
00821       length += url->m_len_password + 1;        
00822   }
00823 
00824   if (url->m_ptr_host) {
00825     length += url->m_len_host;
00826     if (url->m_ptr_port && url->m_port)
00827       length += url->m_len_port + 1;    
00828   }
00829 
00830   if (url->m_ptr_path) {
00831     length += url->m_len_path + 1;      
00832   }
00833   else {
00834     length += 1;                
00835   }
00836 
00837   if (url->m_ptr_params && url->m_len_params > 0) {
00838     length += url->m_len_params + 1;  
00839   }
00840 
00841   if (url->m_ptr_query && url->m_len_query > 0) {
00842     length += url->m_len_query + 1;   
00843   }
00844 
00845   if (url->m_ptr_fragment && url->m_len_fragment > 0) {
00846     length += url->m_len_fragment + 1;        
00847   }
00848 
00849   return length;
00850 }
00851 
00852 
00853 
00854 
00855 char *
00856 url_to_string(URLImpl * url, Arena * arena, int *length)
00857 {
00858   int len;
00859   int idx;
00860   char *str;
00861 
00862   len = url_length_get(url) + 1;
00863 
00864   if (length)
00865     *length = len;
00866 
00867   if (arena)
00868     str = arena->str_alloc(len);
00869   else
00870     str = (char *)ats_malloc(len + 1);
00871 
00872   idx = 0;
00873 
00874   if (url->m_ptr_scheme) {
00875     memcpy(&str[idx], url->m_ptr_scheme, url->m_len_scheme);
00876     idx += url->m_len_scheme;
00877     if ((url->m_scheme_wks_idx >= 0) && (hdrtoken_index_to_wks(url->m_scheme_wks_idx) == URL_SCHEME_FILE)) {
00878       str[idx++] = ':';
00879     } else {
00880       str[idx++] = ':';
00881       str[idx++] = '/';
00882       str[idx++] = '/';
00883     }
00884   }
00885 
00886   if (url->m_ptr_user) {
00887     memcpy(&str[idx], url->m_ptr_user, url->m_len_user);
00888     idx += url->m_len_user;
00889     if (url->m_ptr_password) {
00890       str[idx++] = ':';
00891       memcpy(&str[idx], url->m_ptr_password, url->m_len_password);
00892       idx += url->m_len_password;
00893     }
00894     str[idx++] = '@';
00895   }
00896 
00897   if (url->m_ptr_host) {
00898     memcpy(&str[idx], url->m_ptr_host, url->m_len_host);
00899     idx += url->m_len_host;
00900     if (url->m_ptr_port != 0) {
00901       str[idx++] = ':';
00902       memcpy(&str[idx], url->m_ptr_port, url->m_len_port);
00903       idx += url->m_len_port;
00904     }
00905   }
00906 
00907   memcpy(&str[idx], url->m_ptr_path, url->m_len_path);
00908   idx += url->m_len_path;
00909 
00910   if (url->m_ptr_params && url->m_len_params > 0) {
00911     str[idx++] = ';';
00912     memcpy(&str[idx], url->m_ptr_params, url->m_len_params);
00913     idx += url->m_len_params;
00914   }
00915 
00916   if (url->m_ptr_query && url->m_len_query > 0) {
00917     str[idx++] = '?';
00918     memcpy(&str[idx], url->m_ptr_query, url->m_len_query);
00919     idx += url->m_len_query;
00920   }
00921 
00922   if (url->m_ptr_fragment && url->m_len_fragment > 0) {
00923     str[idx++] = '#';
00924     memcpy(&str[idx], url->m_ptr_fragment, url->m_len_fragment);
00925     idx += url->m_len_fragment;
00926   }
00927 
00928   str[idx++] = '\0';
00929 
00930   ink_release_assert(idx == len);
00931 
00932   return str;
00933 }
00934 
00935 
00936 
00937 
00938 
00939 
00940 
00941 
00942 
00943 
00944 void
00945 unescape_str(char *&buf, char *buf_e, const char *&str, const char *str_e, int &state)
00946 {
00947   int copy_len;
00948   char *first_pct;
00949   int buf_len = (int) (buf_e - buf);
00950   int str_len = (int) (str_e - str);
00951   int min_len = (int) (str_len < buf_len ? str_len : buf_len);
00952 
00953   first_pct = ink_memcpy_until_char(buf, (char *) str, min_len, '%');
00954   copy_len = (int) (first_pct - str);
00955   str += copy_len;
00956   buf += copy_len;
00957   if (copy_len == min_len)
00958     return;
00959 
00960   while (str < str_e && (buf != buf_e)) {
00961     switch (state) {
00962     case 0:
00963       if (str[0] == '%') {
00964         str += 1;
00965         state = 1;
00966       } else {
00967         *buf++ = str[0];
00968         str += 1;
00969       }
00970       break;
00971     case 1:
00972       if (ParseRules::is_hex(str[0])) {
00973         str += 1;
00974         state = 2;
00975       } else {
00976         *buf++ = str[-1];
00977         state = 0;
00978       }
00979       break;
00980     case 2:
00981       if (ParseRules::is_hex(str[0])) {
00982         int tmp;
00983 
00984         if (ParseRules::is_alpha(str[-1])) {
00985           tmp = (ParseRules::ink_toupper(str[-1]) - 'A' + 10) * 16;
00986         } else {
00987           tmp = (str[-1] - '0') * 16;
00988         }
00989         if (ParseRules::is_alpha(str[0])) {
00990           tmp += (ParseRules::ink_toupper(str[0]) - 'A' + 10);
00991         } else {
00992           tmp += str[0] - '0';
00993         }
00994 
00995         *buf++ = tmp;
00996         str += 1;
00997         state = 0;
00998       } else {
00999         *buf++ = str[-2];
01000         state = 3;
01001       }
01002       break;
01003     case 3:
01004       *buf++ = str[-1];
01005       state = 0;
01006       break;
01007     }
01008   }
01009 }
01010 
01011 
01012 
01013 
01014 void
01015 unescape_str_tolower(char *&buf, char *end, const char *&str, const char *str_e, int &state)
01016 {
01017   while (str < str_e && (buf != end)) {
01018     switch (state) {
01019     case 0:
01020       if (str[0] == '%') {
01021         str += 1;
01022         state = 1;
01023       } else {
01024         *buf++ = ParseRules::ink_tolower(str[0]);
01025         str += 1;
01026       }
01027       break;
01028     case 1:
01029       if (ParseRules::is_hex(str[0])) {
01030         str += 1;
01031         state = 2;
01032       } else {
01033         *buf++ = ParseRules::ink_tolower(str[-1]);
01034         state = 0;
01035       }
01036       break;
01037     case 2:
01038       if (ParseRules::is_hex(str[0])) {
01039         int tmp;
01040 
01041         if (ParseRules::is_alpha(str[-1])) {
01042           tmp = (ParseRules::ink_toupper(str[-1]) - 'A' + 10) * 16;
01043         } else {
01044           tmp = (str[-1] - '0') * 16;
01045         }
01046         if (ParseRules::is_alpha(str[0])) {
01047           tmp += (ParseRules::ink_toupper(str[0]) - 'A' + 10);
01048         } else {
01049           tmp += str[0] - '0';
01050         }
01051 
01052         *buf++ = tmp;
01053         str += 1;
01054         state = 0;
01055       } else {
01056         *buf++ = ParseRules::ink_tolower(str[-2]);
01057         state = 3;
01058       }
01059       break;
01060     case 3:
01061       *buf++ = ParseRules::ink_tolower(str[-1]);
01062       state = 0;
01063       break;
01064     }
01065   }
01066 }
01067 
01068 
01069 
01070 
01071 char *
01072 url_unescapify(Arena * arena, const char *str, int length)
01073 {
01074   char *buffer;
01075   char *t, *e;
01076   int s;
01077 
01078   if (length == -1)
01079     length = (int) strlen(str);
01080 
01081   buffer = arena->str_alloc(length);
01082   t = buffer;
01083   e = buffer + length;
01084   s = 0;
01085 
01086   unescape_str(t, e, str, str + length, s);
01087   *t = '\0';
01088 
01089   return buffer;
01090 }
01091 
01092 
01093 
01094 
01095 
01096 
01097 
01098 
01099 
01100 
01101 #define GETNEXT(label) { \
01102     cur += 1;            \
01103     if (cur >= end) {    \
01104         goto label;      \
01105     }                    \
01106 }
01107 
01108 MIMEParseResult
01109 url_parse_scheme(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings_p)
01110 {
01111   const char *cur = *start;
01112   const char *scheme_wks;
01113   const char *scheme_start = NULL;
01114   const char *scheme_end = NULL;
01115   int scheme_wks_idx;
01116 
01117   while(' ' == *cur && ++cur < end)
01118     ;
01119   if (cur < end) {
01120     scheme_start = scheme_end = cur;
01121     
01122     if ((end - cur >= 5) && (((cur[0] ^ 'h') | (cur[1] ^ 't') | (cur[2] ^ 't') | (cur[3] ^ 'p') | (cur[4] ^ ':')) == 0)) {
01123       scheme_end = cur + 4;                   
01124       url_scheme_set(heap, url, scheme_start, URL_WKSIDX_HTTP, 4, copy_strings_p);
01125     } else if ('/' != *cur) {
01126       
01127       
01128       
01129       while (':' != *cur && ++cur < end)
01130         ;
01131       if (cur < end) { 
01132         scheme_wks_idx = hdrtoken_tokenize(scheme_start, cur - scheme_start, &scheme_wks);
01133     
01134         
01135 
01136 
01137         if ((scheme_wks_idx > 0 && hdrtoken_wks_to_token_type(scheme_wks) == HDRTOKEN_TYPE_SCHEME) || 
01138            (cur >= end-1 || cur[1] == '/')) 
01139         {
01140           scheme_end = cur;
01141           url_scheme_set(heap, url, scheme_start, scheme_wks_idx, scheme_end - scheme_start, copy_strings_p);
01142         }
01143       }
01144     }
01145     *start = scheme_end;
01146     return PARSE_CONT;
01147   }
01148   return PARSE_ERROR; 
01149 }
01150 
01151 MIMEParseResult
01152 url_parse(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings_p)
01153 {
01154   MIMEParseResult zret = url_parse_scheme(heap, url, start, end, copy_strings_p);
01155   return PARSE_CONT == zret ? url_parse_http(heap, url, start, end, copy_strings_p) : zret;
01156 }
01157 
01158 MIMEParseResult
01159 url_parse_no_path_component_breakdown(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings_p)
01160 {
01161   MIMEParseResult zret = url_parse_scheme(heap, url, start, end, copy_strings_p);
01162   return PARSE_CONT == zret ? url_parse_http_no_path_component_breakdown(heap, url, start, end, copy_strings_p) : zret;
01163 }
01164 
01165 
01166 
01167 
01168 
01169 
01170 
01171 
01172 
01173 
01174 
01175 
01176 
01177 
01178 
01179 
01180 
01181 MIMEParseResult
01182 url_parse_internet(HdrHeap* heap, URLImpl* url,
01183                        char const ** start, char const *end,
01184                        bool copy_strings_p)
01185 {
01186   char const* cur = *start;
01187   char const* base; 
01188   char const* bracket = 0; 
01189   ts::ConstBuffer user, passw, host, port;
01190   static size_t const MAX_COLON = 8; 
01191   size_t n_colon = 0;
01192   char const* last_colon = 0; 
01193 
01194   
01195   if (end - cur > 3 &&
01196       (((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
01197     cur += 3;
01198   } else if (':' == *cur && (++cur >= end ||
01199                              ('/' == *cur && (++cur >= end ||
01200                                               ('/' == *cur && ++cur >= end))))) {
01201     return PARSE_ERROR;
01202   }
01203   base = cur;
01204   
01205   while (cur < end) {
01206     
01207     
01208     switch (*cur) {
01209     case ']' : 
01210       if (0 == bracket || n_colon >= MAX_COLON)
01211         return PARSE_ERROR;
01212       ++cur;
01213       
01214 
01215 
01216 
01217 
01218 
01219       host.set(bracket, cur);
01220       
01221       
01222       if (cur >= end || '/' == *cur) { 
01223         last_colon = 0;
01224         break;
01225       } else if (':' != *cur) { 
01226         return PARSE_ERROR;
01227       }
01228       
01229 
01230 
01231       n_colon = MAX_COLON - 1;
01232       
01233     case ':' : 
01234       if (++n_colon > MAX_COLON)
01235         return PARSE_ERROR;
01236       last_colon = cur;
01237       ++cur;
01238       break;
01239     case '@' : 
01240       if (user || n_colon > 1)
01241         return PARSE_ERROR; 
01242       if (n_colon) {
01243         user.set(base, last_colon);
01244         passw.set(last_colon+1, cur);
01245         n_colon= 0;
01246         last_colon = 0;
01247       } else {
01248         user.set(base, cur);
01249       }
01250       ++cur;
01251       base = cur;
01252       break;
01253     case '[' : 
01254       if (bracket || base != cur) 
01255         return PARSE_ERROR;
01256       bracket = cur; 
01257       ++cur;
01258       break;
01259     case '/' : 
01260       end = cur; 
01261       break;
01262     default:
01263       ++cur;
01264       break;
01265     };
01266   }
01267   
01268   
01269 
01270   if (user) {
01271     url_user_set(heap, url, user._ptr, user._size, copy_strings_p);
01272     if (passw)
01273       url_password_set(heap, url, passw._ptr, passw._size, copy_strings_p);
01274   }
01275 
01276   
01277   if (!host) {
01278     if (1 == n_colon || MAX_COLON == n_colon) { 
01279       host.set(base, last_colon);
01280     } else { 
01281       host.set(base, cur);
01282       last_colon = 0; 
01283     }
01284   }
01285   if (host._size)
01286     url_host_set(heap, url, host._ptr, host._size, copy_strings_p);
01287   
01288   if (last_colon) {
01289     ink_assert(n_colon);
01290     port.set(last_colon+1, cur);
01291     if (!port._size)
01292       return PARSE_ERROR; 
01293     url_port_set(heap, url, port._ptr, port._size, copy_strings_p);
01294   }
01295   if ('/' == *cur) ++cur; 
01296   *start = cur;
01297   return PARSE_DONE;
01298 }
01299 
01300 
01301 
01302 
01303 
01304 MIMEParseResult
01305 url_parse_http(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings)
01306 {
01307   MIMEParseResult err;
01308   const char *cur;
01309   const char *path_start = NULL;
01310   const char *path_end = NULL;
01311   const char *params_start = NULL;
01312   const char *params_end = NULL;
01313   const char *query_start = NULL;
01314   const char *query_end = NULL;
01315   const char *fragment_start = NULL;
01316   const char *fragment_end = NULL;
01317   char mask;
01318 
01319   err = url_parse_internet(heap, url, start, end, copy_strings);
01320   if (err < 0)
01321     return err;
01322 
01323   cur = *start;
01324   if (*start == end)
01325     goto done;
01326 
01327   path_start = cur;
01328   mask = ';' & '?' & '#';
01329 parse_path2:
01330   if ((*cur & mask) == mask) {
01331     if (*cur == ';') {
01332       path_end = cur;
01333       goto parse_params1;
01334     }
01335     if (*cur == '?') {
01336       path_end = cur;
01337       goto parse_query1;
01338     }
01339     if (*cur == '#') {
01340       path_end = cur;
01341       goto parse_fragment1;
01342     }
01343   } else {
01344     ink_assert((*cur != ';') && (*cur != '?') && (*cur != '#'));
01345   }
01346   GETNEXT(done);
01347   goto parse_path2;
01348 
01349 parse_params1:
01350   params_start = cur + 1;
01351   GETNEXT(done);
01352 parse_params2:
01353   if (*cur == '?') {
01354     params_end = cur;
01355     goto parse_query1;
01356   }
01357   if (*cur == '#') {
01358     params_end = cur;
01359     goto parse_fragment1;
01360   }
01361   GETNEXT(done);
01362   goto parse_params2;
01363 
01364 parse_query1:
01365   query_start = cur + 1;
01366   GETNEXT(done);
01367 parse_query2:
01368   if (*cur == '#') {
01369     query_end = cur;
01370     goto parse_fragment1;
01371   }
01372   GETNEXT(done);
01373   goto parse_query2;
01374 
01375 parse_fragment1:
01376   fragment_start = cur + 1;
01377   GETNEXT(done);
01378   fragment_end = end;
01379 
01380 done:
01381   if (path_start) {
01382     if (!path_end)
01383       path_end = cur;
01384     url_path_set(heap, url, path_start, path_end - path_start, copy_strings);
01385   }
01386   if (params_start) {
01387     if (!params_end)
01388       params_end = cur;
01389     url_params_set(heap, url, params_start, params_end - params_start, copy_strings);
01390   }
01391   if (query_start) {
01392     if (!query_end)
01393       query_end = cur;
01394     url_query_set(heap, url, query_start, query_end - query_start, copy_strings);
01395   }
01396   if (fragment_start) {
01397     if (!fragment_end)
01398       fragment_end = cur;
01399     url_fragment_set(heap, url, fragment_start, fragment_end - fragment_start, copy_strings);
01400   }
01401 
01402   *start = cur;
01403   return PARSE_DONE;
01404 }
01405 
01406 MIMEParseResult
01407 url_parse_http_no_path_component_breakdown(HdrHeap * heap,
01408                                            URLImpl * url, const char **start, const char *end, bool copy_strings)
01409 {
01410   const char *cur = *start;
01411   char const* host_end;
01412 
01413   
01414   if (end - cur > 3 &&
01415       (((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
01416     cur += 3;
01417   } else if (':' == *cur && (++cur >= end ||
01418                              ('/' == *cur && (++cur >= end ||
01419                                               ('/' == *cur && ++cur >= end))))) {
01420     return PARSE_ERROR;
01421   }
01422 
01423   
01424   char const* base = cur;
01425   cur = static_cast<char const*>(memchr(cur, '/', end - cur));
01426   if (cur) {
01427     host_end = cur;
01428     ++cur;
01429   } else {
01430     host_end = cur = end;
01431   }
01432 
01433   
01434   if (base != host_end) {
01435     char const* port = 0;
01436     int port_len = 0;
01437 
01438     
01439     
01440     port = host_end - 1;
01441     char const* port_limit = host_end - 6;
01442     if (port_limit < base) port_limit = base; 
01443     while (port >= port_limit && isdigit(*port))
01444       --port;
01445     
01446     
01447     if (port >= base && ':' == *port) {
01448       port_len = host_end - port - 1; 
01449       host_end = port; 
01450       ++port; 
01451       url_port_set(heap, url, port, port_len, copy_strings);
01452     }
01453     
01454     url_host_set(heap, url, base, host_end - base, copy_strings);
01455   }
01456 
01457   
01458   if (cur < end) {
01459     url_path_set(heap, url, cur, end - cur, copy_strings);
01460     cur = end;
01461   }
01462   *start = cur;
01463   return PARSE_DONE;
01464 }
01465 
01466 
01467 
01468 
01469 
01470 
01471 
01472 
01473 
01474 
01475 int
01476 url_print(URLImpl * url, char *buf_start, int buf_length, int *buf_index_inout, int *buf_chars_to_skip_inout)
01477 {
01478 #define TRY(x)  if (!x) return 0
01479 
01480   if (url->m_ptr_scheme) {
01481     TRY(mime_mem_print(url->m_ptr_scheme, url->m_len_scheme,
01482                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01483     
01484 
01485 
01486 
01487       TRY(mime_mem_print("://", 3, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01488 
01489   }
01490 
01491   if (url->m_ptr_user) {
01492     TRY(mime_mem_print(url->m_ptr_user, url->m_len_user,
01493                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01494     if (url->m_ptr_password) {
01495       TRY(mime_mem_print(":", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01496       TRY(mime_mem_print(url->m_ptr_password, url->m_len_password,
01497                          buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01498     }
01499     TRY(mime_mem_print("@", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01500   }
01501 
01502   if (url->m_ptr_host) {
01503     
01504     
01505     int n = url->m_len_host;
01506     bool bracket_p = '[' != *url->m_ptr_host && (0 != memchr(url->m_ptr_host, ':', n > 5 ? 5 : n));
01507     if (bracket_p)
01508       TRY(mime_mem_print("[", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01509     TRY(mime_mem_print(url->m_ptr_host, url->m_len_host,
01510                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01511     if (bracket_p)
01512       TRY(mime_mem_print("]", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01513     if (url->m_ptr_port && url->m_port) {
01514       TRY(mime_mem_print(":", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01515       TRY(mime_mem_print(url->m_ptr_port, url->m_len_port,
01516                          buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01517     }
01518   }
01519 
01520   TRY(mime_mem_print("/", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01521 
01522   if (url->m_ptr_path) {
01523     TRY(mime_mem_print(url->m_ptr_path, url->m_len_path, buf_start,
01524                        buf_length, buf_index_inout, buf_chars_to_skip_inout));
01525   }
01526 
01527   if (url->m_ptr_params && url->m_len_params > 0) {
01528     TRY(mime_mem_print(";", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01529     TRY(mime_mem_print(url->m_ptr_params, url->m_len_params,
01530                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01531   }
01532 
01533   if (url->m_ptr_query && url->m_len_query > 0) {
01534     TRY(mime_mem_print("?", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01535     TRY(mime_mem_print(url->m_ptr_query, url->m_len_query,
01536                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01537   }
01538 
01539   if (url->m_ptr_fragment && url->m_len_fragment > 0) {
01540     TRY(mime_mem_print("#", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01541     TRY(mime_mem_print(url->m_ptr_fragment, url->m_len_fragment,
01542                        buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01543   }
01544 
01545   return 1;
01546 
01547 #undef TRY
01548 }
01549 
01550 void
01551 url_describe(HdrHeapObjImpl * raw, bool )
01552 {
01553   URLImpl *obj = (URLImpl *) raw;
01554 
01555   Debug("http", "[URLTYPE: %d, SWKSIDX: %d,\n", obj->m_url_type, obj->m_scheme_wks_idx);
01556   Debug("http", "\tSCHEME: \"%.*s\", SCHEME_LEN: %d,\n",
01557         obj->m_len_scheme, (obj->m_ptr_scheme ? obj->m_ptr_scheme : "NULL"), obj->m_len_scheme);
01558   Debug("http", "\tUSER: \"%.*s\", USER_LEN: %d,\n",
01559         obj->m_len_user, (obj->m_ptr_user ? obj->m_ptr_user : "NULL"), obj->m_len_user);
01560   Debug("http", "\tPASSWORD: \"%.*s\", PASSWORD_LEN: %d,\n",
01561         obj->m_len_password, (obj->m_ptr_password ? obj->m_ptr_password : "NULL"), obj->m_len_password);
01562   Debug("http", "\tHOST: \"%.*s\", HOST_LEN: %d,\n",
01563         obj->m_len_host, (obj->m_ptr_host ? obj->m_ptr_host : "NULL"), obj->m_len_host);
01564   Debug("http", "\tPORT: \"%.*s\", PORT_LEN: %d, PORT_NUM: %d\n",
01565         obj->m_len_port, (obj->m_ptr_port ? obj->m_ptr_port : "NULL"), obj->m_len_port, obj->m_port);
01566   Debug("http", "\tPATH: \"%.*s\", PATH_LEN: %d,\n",
01567         obj->m_len_path, (obj->m_ptr_path ? obj->m_ptr_path : "NULL"), obj->m_len_path);
01568   Debug("http", "\tPARAMS: \"%.*s\", PARAMS_LEN: %d,\n",
01569         obj->m_len_params, (obj->m_ptr_params ? obj->m_ptr_params : "NULL"), obj->m_len_params);
01570   Debug("http", "\tQUERY: \"%.*s\", QUERY_LEN: %d,\n",
01571         obj->m_len_query, (obj->m_ptr_query ? obj->m_ptr_query : "NULL"), obj->m_len_query);
01572   Debug("http", "\tFRAGMENT: \"%.*s\", FRAGMENT_LEN: %d]\n",
01573         obj->m_len_fragment, (obj->m_ptr_fragment ? obj->m_ptr_fragment : "NULL"), obj->m_len_fragment);
01574 }
01575 
01576 
01577 
01578 
01579 
01580 
01581 
01582 
01583 
01584 
01585 
01586 static inline void
01587 memcpy_tolower(char *d, const char *s, int n)
01588 {
01589   while (n--) {
01590     *d = ParseRules::ink_tolower(*s);
01591     s++;
01592     d++;
01593   }
01594 }
01595 
01596 
01597 #define BUFSIZE 512
01598 
01599 
01600 
01601 
01602 static inline void
01603 url_MD5_get_fast(URLImpl * url, CryptoContext& ctx, CryptoHash* hash)
01604 {
01605   char buffer[BUFSIZE];
01606   char *p;
01607 
01608   p = buffer;
01609   memcpy_tolower(p, url->m_ptr_scheme, url->m_len_scheme);
01610   p += url->m_len_scheme;
01611   *p++ = ':';
01612   *p++ = '/';
01613   *p++ = '/';
01614   
01615   *p++ = ':';
01616   
01617   *p++ = '@';
01618   memcpy_tolower(p, url->m_ptr_host, url->m_len_host);
01619   p += url->m_len_host;
01620   *p++ = '/';
01621   memcpy(p, url->m_ptr_path, url->m_len_path);
01622   p += url->m_len_path;
01623   *p++ = ';';
01624   
01625   *p++ = '?';
01626   
01627 
01628   ink_assert(sizeof(url->m_port) == 2);
01629   uint16_t port = (uint16_t) url_canonicalize_port(url->m_url_type, url->m_port);
01630   *p++ = ((char *) &port)[0];
01631   *p++ = ((char *) &port)[1];
01632 
01633   ctx.update(buffer, p - buffer);
01634   ctx.finalize(hash);
01635 }
01636 
01637 
01638 static inline void
01639 url_MD5_get_general(URLImpl * url, CryptoContext& ctx, CryptoHash& hash)
01640 {
01641   char buffer[BUFSIZE];
01642   char *p, *e;
01643   const char *strs[13], *ends[13];
01644   const char *t;
01645   in_port_t port;
01646   int i, s;
01647 
01648   strs[0] = url->m_ptr_scheme;
01649   strs[1] = "://";
01650   strs[2] = url->m_ptr_user;
01651   strs[3] = ":";
01652   strs[4] = url->m_ptr_password;
01653   strs[5] = "@";
01654   strs[6] = url->m_ptr_host;
01655   strs[7] = "/";
01656   strs[8] = url->m_ptr_path;
01657 
01658   ends[0] = strs[0] + url->m_len_scheme;
01659   ends[1] = strs[1] + 3;
01660   ends[2] = strs[2] + url->m_len_user;
01661   ends[3] = strs[3] + 1;
01662   ends[4] = strs[4] + url->m_len_password;
01663   ends[5] = strs[5] + 1;
01664   ends[6] = strs[6] + url->m_len_host;
01665   ends[7] = strs[7] + 1;
01666   ends[8] = strs[8] + url->m_len_path;
01667 
01668   strs[9] = ";";
01669   strs[10] = url->m_ptr_params;
01670   strs[11] = "?";
01671   strs[12] = url->m_ptr_query;
01672   ends[9] = strs[9] + 1;
01673   ends[10] = strs[10] + url->m_len_params;
01674   ends[11] = strs[11] + 1;
01675   ends[12] = strs[12] + url->m_len_query;
01676 
01677   p = buffer;
01678   e = buffer + BUFSIZE;
01679 
01680   for (i = 0; i < 13; i++) {
01681     if (strs[i]) {
01682       t = strs[i];
01683       s = 0;
01684 
01685       while (t < ends[i]) {
01686         if ((i == 0) || (i == 6)) {           
01687           unescape_str_tolower(p, e, t, ends[i], s);
01688         } else {
01689           unescape_str(p, e, t, ends[i], s);
01690         }
01691 
01692         if (p == e) {
01693           ctx.update(buffer, BUFSIZE);
01694           p = buffer;
01695         }
01696       }
01697     }
01698   }
01699 
01700   if (p != buffer) ctx.update(buffer, p-buffer);
01701 
01702   port = url_canonicalize_port(url->m_url_type, url->m_port);
01703 
01704   ctx.update(&port, sizeof(port));
01705   ctx.finalize(hash);
01706 }
01707 
01708 void
01709 url_MD5_get(URLImpl * url, CryptoHash* hash)
01710 {
01711   URLHashContext ctx;
01712   if ((url_hash_method != 0) &&
01713       (url->m_url_type == URL_TYPE_HTTP) &&
01714       ((url->m_len_user + url->m_len_password + url->m_len_params + url->m_len_query) == 0) &&
01715       (3 + 1 + 1 + 1 + 1 + 1 + 2 +
01716        url->m_len_scheme +
01717        url->m_len_host +
01718        url->m_len_path < BUFSIZE) &&
01719       (memchr(url->m_ptr_host, '%', url->m_len_host) == NULL) &&
01720       (memchr(url->m_ptr_path, '%', url->m_len_path) == NULL)) {
01721     url_MD5_get_fast(url, ctx, hash);
01722 #ifdef DEBUG
01723     CryptoHash md5_general;
01724     url_MD5_get_general(url, ctx, md5_general);
01725     ink_assert(*hash == md5_general);
01726 #endif
01727   } else {
01728     url_MD5_get_general(url, ctx, *hash);
01729   }
01730 }
01731 
01732 #undef BUFSIZE
01733 
01734 
01735 
01736 
01737 void
01738 url_host_MD5_get(URLImpl * url, INK_MD5 * md5)
01739 {
01740   MD5Context ctx;
01741 
01742   if (url->m_ptr_scheme) {
01743     ctx.update(url->m_ptr_scheme, url->m_len_scheme);
01744   }
01745 
01746   ctx.update("://", 3);
01747 
01748   if (url->m_ptr_host) {
01749     ctx.update(url->m_ptr_host, url->m_len_host);
01750   }
01751 
01752   ctx.update(":", 1);
01753 
01754   
01755   
01756   int port = url_canonicalize_port(url->m_url_type, url->m_port);
01757   ctx.update(&port, sizeof(port));
01758   ctx.finalize(*md5);
01759 }