00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include <assert.h>
00025 #include <new>
00026 #include "libts.h"
00027 #include "URL.h"
00028 #include "MIME.h"
00029 #include "HTTP.h"
00030 #include "Diags.h"
00031
00032 const char *URL_SCHEME_FILE;
00033 const char *URL_SCHEME_FTP;
00034 const char *URL_SCHEME_GOPHER;
00035 const char *URL_SCHEME_HTTP;
00036 const char *URL_SCHEME_HTTPS;
00037 const char *URL_SCHEME_WSS;
00038 const char *URL_SCHEME_WS;
00039 const char *URL_SCHEME_MAILTO;
00040 const char *URL_SCHEME_NEWS;
00041 const char *URL_SCHEME_NNTP;
00042 const char *URL_SCHEME_PROSPERO;
00043 const char *URL_SCHEME_TELNET;
00044 const char *URL_SCHEME_TUNNEL;
00045 const char *URL_SCHEME_WAIS;
00046 const char *URL_SCHEME_PNM;
00047 const char *URL_SCHEME_RTSP;
00048 const char *URL_SCHEME_RTSPU;
00049 const char *URL_SCHEME_MMS;
00050 const char *URL_SCHEME_MMSU;
00051 const char *URL_SCHEME_MMST;
00052
00053 int URL_WKSIDX_FILE;
00054 int URL_WKSIDX_FTP;
00055 int URL_WKSIDX_GOPHER;
00056 int URL_WKSIDX_HTTP;
00057 int URL_WKSIDX_HTTPS;
00058 int URL_WKSIDX_WS;
00059 int URL_WKSIDX_WSS;
00060 int URL_WKSIDX_MAILTO;
00061 int URL_WKSIDX_NEWS;
00062 int URL_WKSIDX_NNTP;
00063 int URL_WKSIDX_PROSPERO;
00064 int URL_WKSIDX_TELNET;
00065 int URL_WKSIDX_TUNNEL;
00066 int URL_WKSIDX_WAIS;
00067 int URL_WKSIDX_PNM;
00068 int URL_WKSIDX_RTSP;
00069 int URL_WKSIDX_RTSPU;
00070 int URL_WKSIDX_MMS;
00071 int URL_WKSIDX_MMSU;
00072 int URL_WKSIDX_MMST;
00073
00074 int URL_LEN_FILE;
00075 int URL_LEN_FTP;
00076 int URL_LEN_GOPHER;
00077 int URL_LEN_HTTP;
00078 int URL_LEN_HTTPS;
00079 int URL_LEN_WS;
00080 int URL_LEN_WSS;
00081 int URL_LEN_MAILTO;
00082 int URL_LEN_NEWS;
00083 int URL_LEN_NNTP;
00084 int URL_LEN_PROSPERO;
00085 int URL_LEN_TELNET;
00086 int URL_LEN_TUNNEL;
00087 int URL_LEN_WAIS;
00088 int URL_LEN_PNM;
00089 int URL_LEN_RTSP;
00090 int URL_LEN_RTSPU;
00091 int URL_LEN_MMS;
00092 int URL_LEN_MMSU;
00093 int URL_LEN_MMST;
00094
00095 int url_hash_method = 0;
00096
00097
00098
00099
00100 URLHashContext::HashType URLHashContext::Setting = URLHashContext::MMH;
00101
00102 URLHashContext::URLHashContext() {
00103 switch (Setting) {
00104 case UNSPECIFIED:
00105 case MD5:
00106 new(_obj) MD5Context;
00107 break;
00108 case MMH:
00109 new(_obj) MMHContext;
00110 break;
00111 default: ink_assert("Invalid global URL hash context");
00112 };
00113 }
00114
00115 void
00116 url_init()
00117 {
00118 static int init = 1;
00119
00120 if (init) {
00121 init = 0;
00122
00123 hdrtoken_init();
00124
00125 URL_SCHEME_FILE = hdrtoken_string_to_wks("file");
00126 URL_SCHEME_FTP = hdrtoken_string_to_wks("ftp");
00127 URL_SCHEME_GOPHER = hdrtoken_string_to_wks("gopher");
00128 URL_SCHEME_HTTP = hdrtoken_string_to_wks("http");
00129 URL_SCHEME_HTTPS = hdrtoken_string_to_wks("https");
00130 URL_SCHEME_WSS = hdrtoken_string_to_wks("wss");
00131 URL_SCHEME_WS = hdrtoken_string_to_wks("ws");
00132 URL_SCHEME_MAILTO = hdrtoken_string_to_wks("mailto");
00133 URL_SCHEME_NEWS = hdrtoken_string_to_wks("news");
00134 URL_SCHEME_NNTP = hdrtoken_string_to_wks("nntp");
00135 URL_SCHEME_PROSPERO = hdrtoken_string_to_wks("prospero");
00136 URL_SCHEME_TELNET = hdrtoken_string_to_wks("telnet");
00137 URL_SCHEME_TUNNEL = hdrtoken_string_to_wks("tunnel");
00138 URL_SCHEME_WAIS = hdrtoken_string_to_wks("wais");
00139 URL_SCHEME_PNM = hdrtoken_string_to_wks("pnm");
00140 URL_SCHEME_RTSP = hdrtoken_string_to_wks("rtsp");
00141 URL_SCHEME_RTSPU = hdrtoken_string_to_wks("rtspu");
00142 URL_SCHEME_MMS = hdrtoken_string_to_wks("mms");
00143 URL_SCHEME_MMSU = hdrtoken_string_to_wks("mmsu");
00144 URL_SCHEME_MMST = hdrtoken_string_to_wks("mmst");
00145
00146 ink_assert(URL_SCHEME_FILE &&
00147 URL_SCHEME_FTP &&
00148 URL_SCHEME_GOPHER &&
00149 URL_SCHEME_HTTP &&
00150 URL_SCHEME_HTTPS &&
00151 URL_SCHEME_WS &&
00152 URL_SCHEME_WSS &&
00153 URL_SCHEME_MAILTO &&
00154 URL_SCHEME_NEWS &&
00155 URL_SCHEME_NNTP &&
00156 URL_SCHEME_PROSPERO &&
00157 URL_SCHEME_TELNET &&
00158 URL_SCHEME_TUNNEL &&
00159 URL_SCHEME_WAIS &&
00160 URL_SCHEME_PNM &&
00161 URL_SCHEME_RTSP &&
00162 URL_SCHEME_RTSPU &&
00163 URL_SCHEME_MMS &&
00164 URL_SCHEME_MMSU &&
00165 URL_SCHEME_MMST
00166 );
00167
00168 URL_WKSIDX_FILE = hdrtoken_wks_to_index(URL_SCHEME_FILE);
00169 URL_WKSIDX_FTP = hdrtoken_wks_to_index(URL_SCHEME_FTP);
00170 URL_WKSIDX_GOPHER = hdrtoken_wks_to_index(URL_SCHEME_GOPHER);
00171 URL_WKSIDX_HTTP = hdrtoken_wks_to_index(URL_SCHEME_HTTP);
00172 URL_WKSIDX_HTTPS = hdrtoken_wks_to_index(URL_SCHEME_HTTPS);
00173 URL_WKSIDX_WS = hdrtoken_wks_to_index(URL_SCHEME_WS);
00174 URL_WKSIDX_WSS = hdrtoken_wks_to_index(URL_SCHEME_WSS);
00175 URL_WKSIDX_MAILTO = hdrtoken_wks_to_index(URL_SCHEME_MAILTO);
00176 URL_WKSIDX_NEWS = hdrtoken_wks_to_index(URL_SCHEME_NEWS);
00177 URL_WKSIDX_NNTP = hdrtoken_wks_to_index(URL_SCHEME_NNTP);
00178 URL_WKSIDX_PROSPERO = hdrtoken_wks_to_index(URL_SCHEME_PROSPERO);
00179 URL_WKSIDX_TELNET = hdrtoken_wks_to_index(URL_SCHEME_TELNET);
00180 URL_WKSIDX_TUNNEL = hdrtoken_wks_to_index(URL_SCHEME_TUNNEL);
00181 URL_WKSIDX_WAIS = hdrtoken_wks_to_index(URL_SCHEME_WAIS);
00182 URL_WKSIDX_PNM = hdrtoken_wks_to_index(URL_SCHEME_PNM);
00183 URL_WKSIDX_RTSP = hdrtoken_wks_to_index(URL_SCHEME_RTSP);
00184 URL_WKSIDX_RTSPU = hdrtoken_wks_to_index(URL_SCHEME_RTSPU);
00185 URL_WKSIDX_MMS = hdrtoken_wks_to_index(URL_SCHEME_MMS);
00186 URL_WKSIDX_MMSU = hdrtoken_wks_to_index(URL_SCHEME_MMSU);
00187 URL_WKSIDX_MMST = hdrtoken_wks_to_index(URL_SCHEME_MMST);
00188
00189 URL_LEN_FILE = hdrtoken_wks_to_length(URL_SCHEME_FILE);
00190 URL_LEN_FTP = hdrtoken_wks_to_length(URL_SCHEME_FTP);
00191 URL_LEN_GOPHER = hdrtoken_wks_to_length(URL_SCHEME_GOPHER);
00192 URL_LEN_HTTP = hdrtoken_wks_to_length(URL_SCHEME_HTTP);
00193 URL_LEN_HTTPS = hdrtoken_wks_to_length(URL_SCHEME_HTTPS);
00194 URL_LEN_WS = hdrtoken_wks_to_length(URL_SCHEME_WS);
00195 URL_LEN_WSS = hdrtoken_wks_to_length(URL_SCHEME_WSS);
00196 URL_LEN_MAILTO = hdrtoken_wks_to_length(URL_SCHEME_MAILTO);
00197 URL_LEN_NEWS = hdrtoken_wks_to_length(URL_SCHEME_NEWS);
00198 URL_LEN_NNTP = hdrtoken_wks_to_length(URL_SCHEME_NNTP);
00199 URL_LEN_PROSPERO = hdrtoken_wks_to_length(URL_SCHEME_PROSPERO);
00200 URL_LEN_TELNET = hdrtoken_wks_to_length(URL_SCHEME_TELNET);
00201 URL_LEN_TUNNEL = hdrtoken_wks_to_length(URL_SCHEME_TUNNEL);
00202 URL_LEN_WAIS = hdrtoken_wks_to_length(URL_SCHEME_WAIS);
00203 URL_LEN_PNM = hdrtoken_wks_to_length(URL_SCHEME_PNM);
00204 URL_LEN_RTSP = hdrtoken_wks_to_length(URL_SCHEME_RTSP);
00205 URL_LEN_RTSPU = hdrtoken_wks_to_length(URL_SCHEME_RTSPU);
00206 URL_LEN_MMS = hdrtoken_wks_to_length(URL_SCHEME_MMS);
00207 URL_LEN_MMSU = hdrtoken_wks_to_length(URL_SCHEME_MMSU);
00208 URL_LEN_MMST = hdrtoken_wks_to_length(URL_SCHEME_MMST);
00209
00210 ink_assert(URLHashContext::OBJ_SIZE >= sizeof(MD5Context));
00211 ink_assert(URLHashContext::OBJ_SIZE >= sizeof(MMHContext));
00212
00213 }
00214 }
00215
00216
00217
00218
00219
00220
00221
00222
00223
00224
00225 URLImpl *
00226 url_create(HdrHeap * heap)
00227 {
00228 URLImpl *url;
00229
00230 url = (URLImpl *) heap->allocate_obj(sizeof(URLImpl), HDR_HEAP_OBJ_URL);
00231 obj_clear_data((HdrHeapObjImpl *) url);
00232 url->m_url_type = URL_TYPE_NONE;
00233 url->m_scheme_wks_idx = -1;
00234 url_clear_string_ref(url);
00235 return url;
00236 }
00237
00238
00239
00240
00241 void
00242 url_clear(URLImpl * url_impl)
00243 {
00244 obj_clear_data((HdrHeapObjImpl *) url_impl);
00245 url_impl->m_url_type = URL_TYPE_NONE;
00246 url_impl->m_scheme_wks_idx = -1;
00247 }
00248
00249
00250
00251
00252 URLImpl *
00253 url_copy(URLImpl * s_url, HdrHeap * s_heap, HdrHeap * d_heap, bool inherit_strs)
00254 {
00255 URLImpl *d_url = url_create(d_heap);
00256 url_copy_onto(s_url, s_heap, d_url, d_heap, inherit_strs);
00257 return d_url;
00258 }
00259
00260
00261
00262
00263 void
00264 url_copy_onto(URLImpl * s_url, HdrHeap * s_heap, URLImpl * d_url, HdrHeap * d_heap, bool inherit_strs)
00265 {
00266 if (s_url != d_url) {
00267 obj_copy_data((HdrHeapObjImpl *) s_url, (HdrHeapObjImpl *) d_url);
00268 if (inherit_strs && (s_heap != d_heap))
00269 d_heap->inherit_string_heaps(s_heap);
00270 }
00271 }
00272
00273
00274
00275
00276 void
00277 url_nuke_proxy_stuff(URLImpl * d_url)
00278 {
00279 d_url->m_len_scheme = 0;
00280 d_url->m_len_user = 0;
00281 d_url->m_len_password = 0;
00282 d_url->m_len_host = 0;
00283 d_url->m_len_port = 0;
00284
00285 d_url->m_ptr_scheme = NULL;
00286 d_url->m_ptr_user = NULL;
00287 d_url->m_ptr_password = NULL;
00288 d_url->m_ptr_host = NULL;
00289 d_url->m_ptr_port = NULL;
00290
00291 d_url->m_scheme_wks_idx = -1;
00292 d_url->m_port = 0;
00293 }
00294
00295
00296
00297
00298
00299
00300
00301
00302
00303 void
00304 url_copy_onto_as_server_url(URLImpl * s_url, HdrHeap * s_heap, URLImpl * d_url, HdrHeap * d_heap, bool inherit_strs)
00305 {
00306 url_nuke_proxy_stuff(d_url);
00307
00308 d_url->m_ptr_path = s_url->m_ptr_path;
00309 d_url->m_ptr_params = s_url->m_ptr_params;
00310 d_url->m_ptr_query = s_url->m_ptr_query;
00311 d_url->m_ptr_fragment = s_url->m_ptr_fragment;
00312 url_clear_string_ref(d_url);
00313
00314 d_url->m_len_path = s_url->m_len_path;
00315 d_url->m_len_params = s_url->m_len_params;
00316 d_url->m_len_query = s_url->m_len_query;
00317 d_url->m_len_fragment = s_url->m_len_fragment;
00318
00319 d_url->m_url_type = s_url->m_url_type;
00320 d_url->m_type_code = s_url->m_type_code;
00321
00322 if (inherit_strs && (s_heap != d_heap))
00323 d_heap->inherit_string_heaps(s_heap);
00324 }
00325
00326
00327
00328
00329
00330
00331
00332
00333
00334 int
00335 URLImpl::marshal(MarshalXlate * str_xlate, int num_xlate)
00336 {
00337
00338 HDR_MARSHAL_STR(m_ptr_scheme, str_xlate, num_xlate);
00339 HDR_MARSHAL_STR(m_ptr_user, str_xlate, num_xlate);
00340 HDR_MARSHAL_STR(m_ptr_password, str_xlate, num_xlate);
00341 HDR_MARSHAL_STR(m_ptr_host, str_xlate, num_xlate);
00342 HDR_MARSHAL_STR(m_ptr_port, str_xlate, num_xlate);
00343 HDR_MARSHAL_STR(m_ptr_path, str_xlate, num_xlate);
00344 HDR_MARSHAL_STR(m_ptr_params, str_xlate, num_xlate);
00345 HDR_MARSHAL_STR(m_ptr_query, str_xlate, num_xlate);
00346 HDR_MARSHAL_STR(m_ptr_fragment, str_xlate, num_xlate);
00347
00348 return 0;
00349 }
00350
00351 void
00352 URLImpl::unmarshal(intptr_t offset)
00353 {
00354 HDR_UNMARSHAL_STR(m_ptr_scheme, offset);
00355 HDR_UNMARSHAL_STR(m_ptr_user, offset);
00356 HDR_UNMARSHAL_STR(m_ptr_password, offset);
00357 HDR_UNMARSHAL_STR(m_ptr_host, offset);
00358 HDR_UNMARSHAL_STR(m_ptr_port, offset);
00359 HDR_UNMARSHAL_STR(m_ptr_path, offset);
00360 HDR_UNMARSHAL_STR(m_ptr_params, offset);
00361 HDR_UNMARSHAL_STR(m_ptr_query, offset);
00362 HDR_UNMARSHAL_STR(m_ptr_fragment, offset);
00363
00364 }
00365
00366 void
00367 URLImpl::move_strings(HdrStrHeap * new_heap)
00368 {
00369 HDR_MOVE_STR(m_ptr_scheme, m_len_scheme);
00370 HDR_MOVE_STR(m_ptr_user, m_len_user);
00371 HDR_MOVE_STR(m_ptr_password, m_len_password);
00372 HDR_MOVE_STR(m_ptr_host, m_len_host);
00373 HDR_MOVE_STR(m_ptr_port, m_len_port);
00374 HDR_MOVE_STR(m_ptr_path, m_len_path);
00375 HDR_MOVE_STR(m_ptr_params, m_len_params);
00376 HDR_MOVE_STR(m_ptr_query, m_len_query);
00377 HDR_MOVE_STR(m_ptr_fragment, m_len_fragment);
00378 HDR_MOVE_STR(m_ptr_printed_string, m_len_printed_string);
00379 }
00380
00381 size_t
00382 URLImpl::strings_length()
00383 {
00384 size_t ret = 0;
00385
00386 ret += m_len_scheme;
00387 ret += m_len_user;
00388 ret += m_len_password;
00389 ret += m_len_host;
00390 ret += m_len_port;
00391 ret += m_len_path;
00392 ret += m_len_params;
00393 ret += m_len_query;
00394 ret += m_len_fragment;
00395 ret += m_len_printed_string;
00396 return ret;
00397 }
00398
00399 void
00400 URLImpl::check_strings(HeapCheck * heaps, int num_heaps)
00401 {
00402 CHECK_STR(m_ptr_scheme, m_len_scheme, heaps, num_heaps);
00403 CHECK_STR(m_ptr_user, m_len_user, heaps, num_heaps);
00404 CHECK_STR(m_ptr_password, m_len_password, heaps, num_heaps);
00405 CHECK_STR(m_ptr_host, m_len_host, heaps, num_heaps);
00406 CHECK_STR(m_ptr_port, m_len_port, heaps, num_heaps);
00407 CHECK_STR(m_ptr_path, m_len_path, heaps, num_heaps);
00408 CHECK_STR(m_ptr_params, m_len_params, heaps, num_heaps);
00409 CHECK_STR(m_ptr_query, m_len_query, heaps, num_heaps);
00410 CHECK_STR(m_ptr_fragment, m_len_fragment, heaps, num_heaps);
00411
00412 }
00413
00414
00415
00416
00417
00418
00419
00420 const char *
00421 url_scheme_set(HdrHeap * heap, URLImpl * url, const char *scheme_str, int scheme_wks_idx, int length, bool copy_string)
00422 {
00423 const char *scheme_wks;
00424 url_called_set(url);
00425 if (length == 0)
00426 scheme_str = NULL;
00427
00428 mime_str_u16_set(heap, scheme_str, length, &(url->m_ptr_scheme), &(url->m_len_scheme), copy_string);
00429
00430 url->m_scheme_wks_idx = scheme_wks_idx;
00431 if (scheme_wks_idx >= 0)
00432 scheme_wks = hdrtoken_index_to_wks(scheme_wks_idx);
00433 else
00434 scheme_wks = NULL;
00435
00436 if (scheme_wks == URL_SCHEME_HTTP || scheme_wks == URL_SCHEME_WS)
00437 url->m_url_type = URL_TYPE_HTTP;
00438 else if (scheme_wks == URL_SCHEME_HTTPS || scheme_wks == URL_SCHEME_WSS)
00439 url->m_url_type = URL_TYPE_HTTPS;
00440 else
00441 url->m_url_type = URL_TYPE_HTTP;
00442
00443 return scheme_wks;
00444 }
00445
00446
00447
00448
00449 void
00450 url_user_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00451 {
00452 url_called_set(url);
00453 if (length == 0)
00454 value = NULL;
00455 mime_str_u16_set(heap, value, length, &(url->m_ptr_user), &(url->m_len_user), copy_string);
00456 }
00457
00458
00459
00460
00461 void
00462 url_password_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00463 {
00464 url_called_set(url);
00465 if (length == 0)
00466 value = NULL;
00467 mime_str_u16_set(heap, value, length, &(url->m_ptr_password), &(url->m_len_password), copy_string);
00468 }
00469
00470
00471
00472
00473 void
00474 url_host_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00475 {
00476 url_called_set(url);
00477 if (length == 0) value = NULL;
00478 mime_str_u16_set(heap, value, length, &(url->m_ptr_host), &(url->m_len_host), copy_string);
00479 }
00480
00481
00482
00483
00484 void
00485 url_port_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00486 {
00487 url_called_set(url);
00488 if (length == 0)
00489 value = NULL;
00490 mime_str_u16_set(heap, value, length, &(url->m_ptr_port), &(url->m_len_port), copy_string);
00491
00492 url->m_port = 0;
00493 for (int i = 0; i < length; i++) {
00494 if (!ParseRules::is_digit(value[i]))
00495 break;
00496 url->m_port = url->m_port * 10 + (value[i] - '0');
00497 }
00498 }
00499
00500
00501
00502
00503 void
00504 url_port_set(HdrHeap * heap, URLImpl * url, unsigned int port)
00505 {
00506 url_called_set(url);
00507 if (port > 0) {
00508 char value[6];
00509 int length;
00510
00511 length = ink_fast_itoa(port, value, sizeof(value));
00512 mime_str_u16_set(heap, value, length, &(url->m_ptr_port), &(url->m_len_port), true);
00513 } else {
00514 mime_str_u16_set(heap, NULL, 0, &(url->m_ptr_port), &(url->m_len_port), true);
00515 }
00516 url->m_port = port;
00517 }
00518
00519
00520
00521
00522 void
00523 url_path_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00524 {
00525 url_called_set(url);
00526 if (length == 0)
00527 value = NULL;
00528 mime_str_u16_set(heap, value, length, &(url->m_ptr_path), &(url->m_len_path), copy_string);
00529 }
00530
00531
00532
00533
00534
00535
00536
00537 void
00538 url_params_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00539 {
00540 url_called_set(url);
00541 mime_str_u16_set(heap, value, length, &(url->m_ptr_params), &(url->m_len_params), copy_string);
00542 }
00543
00544
00545
00546
00547 void
00548 url_query_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00549 {
00550 url_called_set(url);
00551 mime_str_u16_set(heap, value, length, &(url->m_ptr_query), &(url->m_len_query), copy_string);
00552 }
00553
00554
00555
00556
00557 void
00558 url_fragment_set(HdrHeap * heap, URLImpl * url, const char *value, int length, bool copy_string)
00559 {
00560 url_called_set(url);
00561 mime_str_u16_set(heap, value, length, &(url->m_ptr_fragment), &(url->m_len_fragment), copy_string);
00562 }
00563
00564
00565
00566
00567 void
00568 url_type_set(URLImpl * url, unsigned int typecode)
00569 {
00570 url_called_set(url);
00571 url->m_type_code = typecode;
00572 }
00573
00574
00575
00576
00577
00578
00579
00580
00581
00582
00583
00584
00585
00586 void
00587 url_called_set(URLImpl * url)
00588 {
00589 url->m_clean = !url->m_ptr_printed_string;
00590 }
00591
00592 void
00593 url_clear_string_ref(URLImpl * url)
00594 {
00595 if (url->m_ptr_printed_string) {
00596 url->m_len_printed_string = 0;
00597 url->m_ptr_printed_string = NULL;
00598 url->m_clean = true;
00599 }
00600 return;
00601 }
00602
00603 char *
00604 url_string_get_ref(HdrHeap * heap, URLImpl * url, int *length)
00605 {
00606 if (!url)
00607 return NULL;
00608
00609 if (url->m_ptr_printed_string && url->m_clean) {
00610 if (length)
00611 *length = url->m_len_printed_string;
00612 return (char *) url->m_ptr_printed_string;
00613 } else {
00614 int len = url_length_get(url);
00615 char *buf;
00616 int index = 0;
00617 int offset = 0;
00618
00619
00620 buf = heap->allocate_str(len + 1);
00621 url_print(url, buf, len, &index, &offset);
00622 buf[len] = '\0';
00623
00624 if (length) {
00625 *length = len;
00626 }
00627 url->m_clean = true;
00628 url->m_len_printed_string = len;
00629 url->m_ptr_printed_string = buf;
00630 return buf;
00631 }
00632 }
00633
00634 char *
00635 url_string_get(URLImpl * url, Arena * arena, int *length, HdrHeap * heap)
00636 {
00637 int len = url_length_get(url);
00638 char *buf;
00639 char *buf2;
00640 int index = 0;
00641 int offset = 0;
00642
00643 buf = arena ? arena->str_alloc(len) : (char *)ats_malloc(len + 1);
00644
00645 url_print(url, buf, len, &index, &offset);
00646 buf[len] = '\0';
00647
00648
00649 if (heap) {
00650 buf2 = heap->allocate_str(len + 1);
00651 memcpy(buf2, buf, len);
00652 buf2[len] = '\0';
00653 url->m_clean = true;
00654 url->m_len_printed_string = len;
00655 url->m_ptr_printed_string = buf2;
00656 }
00657
00658 if (length) {
00659 *length = len;
00660 }
00661 return buf;
00662 }
00663
00664
00665
00666
00667 char *
00668 url_string_get_buf(URLImpl * url, char *dstbuf, int dstbuf_size, int *length)
00669 {
00670 int len = url_length_get(url);
00671 int index = 0;
00672 int offset = 0;
00673 char *buf = 0;
00674
00675 if (dstbuf && dstbuf_size > 0) {
00676 buf = dstbuf;
00677 if (len >= dstbuf_size)
00678 len = dstbuf_size - 1;
00679 url_print(url, dstbuf, len, &index, &offset);
00680 buf[len] = 0;
00681
00682 if (length)
00683 *length = len;
00684 }
00685 return buf;
00686 }
00687
00688
00689
00690
00691 const char *
00692 url_scheme_get(URLImpl * url, int *length)
00693 {
00694 const char *str;
00695
00696 if (url->m_scheme_wks_idx >= 0) {
00697 str = hdrtoken_index_to_wks(url->m_scheme_wks_idx);
00698 *length = hdrtoken_index_to_length(url->m_scheme_wks_idx);
00699 } else {
00700 str = url->m_ptr_scheme;
00701 *length = url->m_len_scheme;
00702 }
00703 return str;
00704 }
00705
00706
00707
00708
00709 const char *
00710 url_user_get(URLImpl * url, int *length)
00711 {
00712 *length = url->m_len_user;
00713 return url->m_ptr_user;
00714 }
00715
00716
00717
00718
00719 const char *
00720 url_password_get(URLImpl * url, int *length)
00721 {
00722 *length = url->m_len_password;
00723 return url->m_ptr_password;
00724 }
00725
00726
00727
00728
00729 const char *
00730 url_host_get(URLImpl * url, int *length)
00731 {
00732 *length = url->m_len_host;
00733 return url->m_ptr_host;
00734 }
00735
00736
00737
00738
00739 int
00740 url_port_get(URLImpl * url)
00741 {
00742 return url->m_port;
00743 }
00744
00745
00746
00747
00748 const char *
00749 url_path_get(URLImpl * url, int *length)
00750 {
00751 *length = url->m_len_path;
00752 return url->m_ptr_path;
00753 }
00754
00755
00756
00757
00758 const char *
00759 url_params_get(URLImpl * url, int *length)
00760 {
00761 *length = url->m_len_params;
00762 return url->m_ptr_params;
00763 }
00764
00765
00766
00767
00768 const char *
00769 url_query_get(URLImpl * url, int *length)
00770 {
00771 *length = url->m_len_query;
00772 return url->m_ptr_query;
00773 }
00774
00775
00776
00777
00778 const char *
00779 url_fragment_get(URLImpl * url, int *length)
00780 {
00781 *length = url->m_len_fragment;
00782 return url->m_ptr_fragment;
00783 }
00784
00785
00786
00787
00788 int
00789 url_type_get(URLImpl * url)
00790 {
00791 return url->m_type_code;
00792 }
00793
00794
00795
00796
00797
00798
00799
00800
00801
00802
00803
00804
00805
00806 int
00807 url_length_get(URLImpl * url)
00808 {
00809 int length = 0;
00810
00811 if (url->m_ptr_scheme) {
00812 if ((url->m_scheme_wks_idx >= 0) && (hdrtoken_index_to_wks(url->m_scheme_wks_idx) == URL_SCHEME_FILE))
00813 length += url->m_len_scheme + 1;
00814 else
00815 length += url->m_len_scheme + 3;
00816 }
00817
00818 if (url->m_ptr_user) {
00819 length += url->m_len_user + 1;
00820 if (url->m_ptr_password)
00821 length += url->m_len_password + 1;
00822 }
00823
00824 if (url->m_ptr_host) {
00825 length += url->m_len_host;
00826 if (url->m_ptr_port && url->m_port)
00827 length += url->m_len_port + 1;
00828 }
00829
00830 if (url->m_ptr_path) {
00831 length += url->m_len_path + 1;
00832 }
00833 else {
00834 length += 1;
00835 }
00836
00837 if (url->m_ptr_params && url->m_len_params > 0) {
00838 length += url->m_len_params + 1;
00839 }
00840
00841 if (url->m_ptr_query && url->m_len_query > 0) {
00842 length += url->m_len_query + 1;
00843 }
00844
00845 if (url->m_ptr_fragment && url->m_len_fragment > 0) {
00846 length += url->m_len_fragment + 1;
00847 }
00848
00849 return length;
00850 }
00851
00852
00853
00854
00855 char *
00856 url_to_string(URLImpl * url, Arena * arena, int *length)
00857 {
00858 int len;
00859 int idx;
00860 char *str;
00861
00862 len = url_length_get(url) + 1;
00863
00864 if (length)
00865 *length = len;
00866
00867 if (arena)
00868 str = arena->str_alloc(len);
00869 else
00870 str = (char *)ats_malloc(len + 1);
00871
00872 idx = 0;
00873
00874 if (url->m_ptr_scheme) {
00875 memcpy(&str[idx], url->m_ptr_scheme, url->m_len_scheme);
00876 idx += url->m_len_scheme;
00877 if ((url->m_scheme_wks_idx >= 0) && (hdrtoken_index_to_wks(url->m_scheme_wks_idx) == URL_SCHEME_FILE)) {
00878 str[idx++] = ':';
00879 } else {
00880 str[idx++] = ':';
00881 str[idx++] = '/';
00882 str[idx++] = '/';
00883 }
00884 }
00885
00886 if (url->m_ptr_user) {
00887 memcpy(&str[idx], url->m_ptr_user, url->m_len_user);
00888 idx += url->m_len_user;
00889 if (url->m_ptr_password) {
00890 str[idx++] = ':';
00891 memcpy(&str[idx], url->m_ptr_password, url->m_len_password);
00892 idx += url->m_len_password;
00893 }
00894 str[idx++] = '@';
00895 }
00896
00897 if (url->m_ptr_host) {
00898 memcpy(&str[idx], url->m_ptr_host, url->m_len_host);
00899 idx += url->m_len_host;
00900 if (url->m_ptr_port != 0) {
00901 str[idx++] = ':';
00902 memcpy(&str[idx], url->m_ptr_port, url->m_len_port);
00903 idx += url->m_len_port;
00904 }
00905 }
00906
00907 memcpy(&str[idx], url->m_ptr_path, url->m_len_path);
00908 idx += url->m_len_path;
00909
00910 if (url->m_ptr_params && url->m_len_params > 0) {
00911 str[idx++] = ';';
00912 memcpy(&str[idx], url->m_ptr_params, url->m_len_params);
00913 idx += url->m_len_params;
00914 }
00915
00916 if (url->m_ptr_query && url->m_len_query > 0) {
00917 str[idx++] = '?';
00918 memcpy(&str[idx], url->m_ptr_query, url->m_len_query);
00919 idx += url->m_len_query;
00920 }
00921
00922 if (url->m_ptr_fragment && url->m_len_fragment > 0) {
00923 str[idx++] = '#';
00924 memcpy(&str[idx], url->m_ptr_fragment, url->m_len_fragment);
00925 idx += url->m_len_fragment;
00926 }
00927
00928 str[idx++] = '\0';
00929
00930 ink_release_assert(idx == len);
00931
00932 return str;
00933 }
00934
00935
00936
00937
00938
00939
00940
00941
00942
00943
00944 void
00945 unescape_str(char *&buf, char *buf_e, const char *&str, const char *str_e, int &state)
00946 {
00947 int copy_len;
00948 char *first_pct;
00949 int buf_len = (int) (buf_e - buf);
00950 int str_len = (int) (str_e - str);
00951 int min_len = (int) (str_len < buf_len ? str_len : buf_len);
00952
00953 first_pct = ink_memcpy_until_char(buf, (char *) str, min_len, '%');
00954 copy_len = (int) (first_pct - str);
00955 str += copy_len;
00956 buf += copy_len;
00957 if (copy_len == min_len)
00958 return;
00959
00960 while (str < str_e && (buf != buf_e)) {
00961 switch (state) {
00962 case 0:
00963 if (str[0] == '%') {
00964 str += 1;
00965 state = 1;
00966 } else {
00967 *buf++ = str[0];
00968 str += 1;
00969 }
00970 break;
00971 case 1:
00972 if (ParseRules::is_hex(str[0])) {
00973 str += 1;
00974 state = 2;
00975 } else {
00976 *buf++ = str[-1];
00977 state = 0;
00978 }
00979 break;
00980 case 2:
00981 if (ParseRules::is_hex(str[0])) {
00982 int tmp;
00983
00984 if (ParseRules::is_alpha(str[-1])) {
00985 tmp = (ParseRules::ink_toupper(str[-1]) - 'A' + 10) * 16;
00986 } else {
00987 tmp = (str[-1] - '0') * 16;
00988 }
00989 if (ParseRules::is_alpha(str[0])) {
00990 tmp += (ParseRules::ink_toupper(str[0]) - 'A' + 10);
00991 } else {
00992 tmp += str[0] - '0';
00993 }
00994
00995 *buf++ = tmp;
00996 str += 1;
00997 state = 0;
00998 } else {
00999 *buf++ = str[-2];
01000 state = 3;
01001 }
01002 break;
01003 case 3:
01004 *buf++ = str[-1];
01005 state = 0;
01006 break;
01007 }
01008 }
01009 }
01010
01011
01012
01013
01014 void
01015 unescape_str_tolower(char *&buf, char *end, const char *&str, const char *str_e, int &state)
01016 {
01017 while (str < str_e && (buf != end)) {
01018 switch (state) {
01019 case 0:
01020 if (str[0] == '%') {
01021 str += 1;
01022 state = 1;
01023 } else {
01024 *buf++ = ParseRules::ink_tolower(str[0]);
01025 str += 1;
01026 }
01027 break;
01028 case 1:
01029 if (ParseRules::is_hex(str[0])) {
01030 str += 1;
01031 state = 2;
01032 } else {
01033 *buf++ = ParseRules::ink_tolower(str[-1]);
01034 state = 0;
01035 }
01036 break;
01037 case 2:
01038 if (ParseRules::is_hex(str[0])) {
01039 int tmp;
01040
01041 if (ParseRules::is_alpha(str[-1])) {
01042 tmp = (ParseRules::ink_toupper(str[-1]) - 'A' + 10) * 16;
01043 } else {
01044 tmp = (str[-1] - '0') * 16;
01045 }
01046 if (ParseRules::is_alpha(str[0])) {
01047 tmp += (ParseRules::ink_toupper(str[0]) - 'A' + 10);
01048 } else {
01049 tmp += str[0] - '0';
01050 }
01051
01052 *buf++ = tmp;
01053 str += 1;
01054 state = 0;
01055 } else {
01056 *buf++ = ParseRules::ink_tolower(str[-2]);
01057 state = 3;
01058 }
01059 break;
01060 case 3:
01061 *buf++ = ParseRules::ink_tolower(str[-1]);
01062 state = 0;
01063 break;
01064 }
01065 }
01066 }
01067
01068
01069
01070
01071 char *
01072 url_unescapify(Arena * arena, const char *str, int length)
01073 {
01074 char *buffer;
01075 char *t, *e;
01076 int s;
01077
01078 if (length == -1)
01079 length = (int) strlen(str);
01080
01081 buffer = arena->str_alloc(length);
01082 t = buffer;
01083 e = buffer + length;
01084 s = 0;
01085
01086 unescape_str(t, e, str, str + length, s);
01087 *t = '\0';
01088
01089 return buffer;
01090 }
01091
01092
01093
01094
01095
01096
01097
01098
01099
01100
01101 #define GETNEXT(label) { \
01102 cur += 1; \
01103 if (cur >= end) { \
01104 goto label; \
01105 } \
01106 }
01107
01108 MIMEParseResult
01109 url_parse_scheme(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings_p)
01110 {
01111 const char *cur = *start;
01112 const char *scheme_wks;
01113 const char *scheme_start = NULL;
01114 const char *scheme_end = NULL;
01115 int scheme_wks_idx;
01116
01117 while(' ' == *cur && ++cur < end)
01118 ;
01119 if (cur < end) {
01120 scheme_start = scheme_end = cur;
01121
01122 if ((end - cur >= 5) && (((cur[0] ^ 'h') | (cur[1] ^ 't') | (cur[2] ^ 't') | (cur[3] ^ 'p') | (cur[4] ^ ':')) == 0)) {
01123 scheme_end = cur + 4;
01124 url_scheme_set(heap, url, scheme_start, URL_WKSIDX_HTTP, 4, copy_strings_p);
01125 } else if ('/' != *cur) {
01126
01127
01128
01129 while (':' != *cur && ++cur < end)
01130 ;
01131 if (cur < end) {
01132 scheme_wks_idx = hdrtoken_tokenize(scheme_start, cur - scheme_start, &scheme_wks);
01133
01134
01135
01136
01137 if ((scheme_wks_idx > 0 && hdrtoken_wks_to_token_type(scheme_wks) == HDRTOKEN_TYPE_SCHEME) ||
01138 (cur >= end-1 || cur[1] == '/'))
01139 {
01140 scheme_end = cur;
01141 url_scheme_set(heap, url, scheme_start, scheme_wks_idx, scheme_end - scheme_start, copy_strings_p);
01142 }
01143 }
01144 }
01145 *start = scheme_end;
01146 return PARSE_CONT;
01147 }
01148 return PARSE_ERROR;
01149 }
01150
01151 MIMEParseResult
01152 url_parse(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings_p)
01153 {
01154 MIMEParseResult zret = url_parse_scheme(heap, url, start, end, copy_strings_p);
01155 return PARSE_CONT == zret ? url_parse_http(heap, url, start, end, copy_strings_p) : zret;
01156 }
01157
01158 MIMEParseResult
01159 url_parse_no_path_component_breakdown(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings_p)
01160 {
01161 MIMEParseResult zret = url_parse_scheme(heap, url, start, end, copy_strings_p);
01162 return PARSE_CONT == zret ? url_parse_http_no_path_component_breakdown(heap, url, start, end, copy_strings_p) : zret;
01163 }
01164
01165
01166
01167
01168
01169
01170
01171
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181 MIMEParseResult
01182 url_parse_internet(HdrHeap* heap, URLImpl* url,
01183 char const ** start, char const *end,
01184 bool copy_strings_p)
01185 {
01186 char const* cur = *start;
01187 char const* base;
01188 char const* bracket = 0;
01189 ts::ConstBuffer user, passw, host, port;
01190 static size_t const MAX_COLON = 8;
01191 size_t n_colon = 0;
01192 char const* last_colon = 0;
01193
01194
01195 if (end - cur > 3 &&
01196 (((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
01197 cur += 3;
01198 } else if (':' == *cur && (++cur >= end ||
01199 ('/' == *cur && (++cur >= end ||
01200 ('/' == *cur && ++cur >= end))))) {
01201 return PARSE_ERROR;
01202 }
01203 base = cur;
01204
01205 while (cur < end) {
01206
01207
01208 switch (*cur) {
01209 case ']' :
01210 if (0 == bracket || n_colon >= MAX_COLON)
01211 return PARSE_ERROR;
01212 ++cur;
01213
01214
01215
01216
01217
01218
01219 host.set(bracket, cur);
01220
01221
01222 if (cur >= end || '/' == *cur) {
01223 last_colon = 0;
01224 break;
01225 } else if (':' != *cur) {
01226 return PARSE_ERROR;
01227 }
01228
01229
01230
01231 n_colon = MAX_COLON - 1;
01232
01233 case ':' :
01234 if (++n_colon > MAX_COLON)
01235 return PARSE_ERROR;
01236 last_colon = cur;
01237 ++cur;
01238 break;
01239 case '@' :
01240 if (user || n_colon > 1)
01241 return PARSE_ERROR;
01242 if (n_colon) {
01243 user.set(base, last_colon);
01244 passw.set(last_colon+1, cur);
01245 n_colon= 0;
01246 last_colon = 0;
01247 } else {
01248 user.set(base, cur);
01249 }
01250 ++cur;
01251 base = cur;
01252 break;
01253 case '[' :
01254 if (bracket || base != cur)
01255 return PARSE_ERROR;
01256 bracket = cur;
01257 ++cur;
01258 break;
01259 case '/' :
01260 end = cur;
01261 break;
01262 default:
01263 ++cur;
01264 break;
01265 };
01266 }
01267
01268
01269
01270 if (user) {
01271 url_user_set(heap, url, user._ptr, user._size, copy_strings_p);
01272 if (passw)
01273 url_password_set(heap, url, passw._ptr, passw._size, copy_strings_p);
01274 }
01275
01276
01277 if (!host) {
01278 if (1 == n_colon || MAX_COLON == n_colon) {
01279 host.set(base, last_colon);
01280 } else {
01281 host.set(base, cur);
01282 last_colon = 0;
01283 }
01284 }
01285 if (host._size)
01286 url_host_set(heap, url, host._ptr, host._size, copy_strings_p);
01287
01288 if (last_colon) {
01289 ink_assert(n_colon);
01290 port.set(last_colon+1, cur);
01291 if (!port._size)
01292 return PARSE_ERROR;
01293 url_port_set(heap, url, port._ptr, port._size, copy_strings_p);
01294 }
01295 if ('/' == *cur) ++cur;
01296 *start = cur;
01297 return PARSE_DONE;
01298 }
01299
01300
01301
01302
01303
01304 MIMEParseResult
01305 url_parse_http(HdrHeap * heap, URLImpl * url, const char **start, const char *end, bool copy_strings)
01306 {
01307 MIMEParseResult err;
01308 const char *cur;
01309 const char *path_start = NULL;
01310 const char *path_end = NULL;
01311 const char *params_start = NULL;
01312 const char *params_end = NULL;
01313 const char *query_start = NULL;
01314 const char *query_end = NULL;
01315 const char *fragment_start = NULL;
01316 const char *fragment_end = NULL;
01317 char mask;
01318
01319 err = url_parse_internet(heap, url, start, end, copy_strings);
01320 if (err < 0)
01321 return err;
01322
01323 cur = *start;
01324 if (*start == end)
01325 goto done;
01326
01327 path_start = cur;
01328 mask = ';' & '?' & '#';
01329 parse_path2:
01330 if ((*cur & mask) == mask) {
01331 if (*cur == ';') {
01332 path_end = cur;
01333 goto parse_params1;
01334 }
01335 if (*cur == '?') {
01336 path_end = cur;
01337 goto parse_query1;
01338 }
01339 if (*cur == '#') {
01340 path_end = cur;
01341 goto parse_fragment1;
01342 }
01343 } else {
01344 ink_assert((*cur != ';') && (*cur != '?') && (*cur != '#'));
01345 }
01346 GETNEXT(done);
01347 goto parse_path2;
01348
01349 parse_params1:
01350 params_start = cur + 1;
01351 GETNEXT(done);
01352 parse_params2:
01353 if (*cur == '?') {
01354 params_end = cur;
01355 goto parse_query1;
01356 }
01357 if (*cur == '#') {
01358 params_end = cur;
01359 goto parse_fragment1;
01360 }
01361 GETNEXT(done);
01362 goto parse_params2;
01363
01364 parse_query1:
01365 query_start = cur + 1;
01366 GETNEXT(done);
01367 parse_query2:
01368 if (*cur == '#') {
01369 query_end = cur;
01370 goto parse_fragment1;
01371 }
01372 GETNEXT(done);
01373 goto parse_query2;
01374
01375 parse_fragment1:
01376 fragment_start = cur + 1;
01377 GETNEXT(done);
01378 fragment_end = end;
01379
01380 done:
01381 if (path_start) {
01382 if (!path_end)
01383 path_end = cur;
01384 url_path_set(heap, url, path_start, path_end - path_start, copy_strings);
01385 }
01386 if (params_start) {
01387 if (!params_end)
01388 params_end = cur;
01389 url_params_set(heap, url, params_start, params_end - params_start, copy_strings);
01390 }
01391 if (query_start) {
01392 if (!query_end)
01393 query_end = cur;
01394 url_query_set(heap, url, query_start, query_end - query_start, copy_strings);
01395 }
01396 if (fragment_start) {
01397 if (!fragment_end)
01398 fragment_end = cur;
01399 url_fragment_set(heap, url, fragment_start, fragment_end - fragment_start, copy_strings);
01400 }
01401
01402 *start = cur;
01403 return PARSE_DONE;
01404 }
01405
01406 MIMEParseResult
01407 url_parse_http_no_path_component_breakdown(HdrHeap * heap,
01408 URLImpl * url, const char **start, const char *end, bool copy_strings)
01409 {
01410 const char *cur = *start;
01411 char const* host_end;
01412
01413
01414 if (end - cur > 3 &&
01415 (((':' ^ *cur) | ('/' ^ cur[1]) | ('/' ^ cur[2])) == 0)) {
01416 cur += 3;
01417 } else if (':' == *cur && (++cur >= end ||
01418 ('/' == *cur && (++cur >= end ||
01419 ('/' == *cur && ++cur >= end))))) {
01420 return PARSE_ERROR;
01421 }
01422
01423
01424 char const* base = cur;
01425 cur = static_cast<char const*>(memchr(cur, '/', end - cur));
01426 if (cur) {
01427 host_end = cur;
01428 ++cur;
01429 } else {
01430 host_end = cur = end;
01431 }
01432
01433
01434 if (base != host_end) {
01435 char const* port = 0;
01436 int port_len = 0;
01437
01438
01439
01440 port = host_end - 1;
01441 char const* port_limit = host_end - 6;
01442 if (port_limit < base) port_limit = base;
01443 while (port >= port_limit && isdigit(*port))
01444 --port;
01445
01446
01447 if (port >= base && ':' == *port) {
01448 port_len = host_end - port - 1;
01449 host_end = port;
01450 ++port;
01451 url_port_set(heap, url, port, port_len, copy_strings);
01452 }
01453
01454 url_host_set(heap, url, base, host_end - base, copy_strings);
01455 }
01456
01457
01458 if (cur < end) {
01459 url_path_set(heap, url, cur, end - cur, copy_strings);
01460 cur = end;
01461 }
01462 *start = cur;
01463 return PARSE_DONE;
01464 }
01465
01466
01467
01468
01469
01470
01471
01472
01473
01474
01475 int
01476 url_print(URLImpl * url, char *buf_start, int buf_length, int *buf_index_inout, int *buf_chars_to_skip_inout)
01477 {
01478 #define TRY(x) if (!x) return 0
01479
01480 if (url->m_ptr_scheme) {
01481 TRY(mime_mem_print(url->m_ptr_scheme, url->m_len_scheme,
01482 buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01483
01484
01485
01486
01487 TRY(mime_mem_print("://", 3, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01488
01489 }
01490
01491 if (url->m_ptr_user) {
01492 TRY(mime_mem_print(url->m_ptr_user, url->m_len_user,
01493 buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01494 if (url->m_ptr_password) {
01495 TRY(mime_mem_print(":", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01496 TRY(mime_mem_print(url->m_ptr_password, url->m_len_password,
01497 buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01498 }
01499 TRY(mime_mem_print("@", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01500 }
01501
01502 if (url->m_ptr_host) {
01503
01504
01505 int n = url->m_len_host;
01506 bool bracket_p = '[' != *url->m_ptr_host && (0 != memchr(url->m_ptr_host, ':', n > 5 ? 5 : n));
01507 if (bracket_p)
01508 TRY(mime_mem_print("[", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01509 TRY(mime_mem_print(url->m_ptr_host, url->m_len_host,
01510 buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01511 if (bracket_p)
01512 TRY(mime_mem_print("]", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01513 if (url->m_ptr_port && url->m_port) {
01514 TRY(mime_mem_print(":", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01515 TRY(mime_mem_print(url->m_ptr_port, url->m_len_port,
01516 buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01517 }
01518 }
01519
01520 TRY(mime_mem_print("/", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01521
01522 if (url->m_ptr_path) {
01523 TRY(mime_mem_print(url->m_ptr_path, url->m_len_path, buf_start,
01524 buf_length, buf_index_inout, buf_chars_to_skip_inout));
01525 }
01526
01527 if (url->m_ptr_params && url->m_len_params > 0) {
01528 TRY(mime_mem_print(";", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01529 TRY(mime_mem_print(url->m_ptr_params, url->m_len_params,
01530 buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01531 }
01532
01533 if (url->m_ptr_query && url->m_len_query > 0) {
01534 TRY(mime_mem_print("?", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01535 TRY(mime_mem_print(url->m_ptr_query, url->m_len_query,
01536 buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01537 }
01538
01539 if (url->m_ptr_fragment && url->m_len_fragment > 0) {
01540 TRY(mime_mem_print("#", 1, buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01541 TRY(mime_mem_print(url->m_ptr_fragment, url->m_len_fragment,
01542 buf_start, buf_length, buf_index_inout, buf_chars_to_skip_inout));
01543 }
01544
01545 return 1;
01546
01547 #undef TRY
01548 }
01549
01550 void
01551 url_describe(HdrHeapObjImpl * raw, bool )
01552 {
01553 URLImpl *obj = (URLImpl *) raw;
01554
01555 Debug("http", "[URLTYPE: %d, SWKSIDX: %d,\n", obj->m_url_type, obj->m_scheme_wks_idx);
01556 Debug("http", "\tSCHEME: \"%.*s\", SCHEME_LEN: %d,\n",
01557 obj->m_len_scheme, (obj->m_ptr_scheme ? obj->m_ptr_scheme : "NULL"), obj->m_len_scheme);
01558 Debug("http", "\tUSER: \"%.*s\", USER_LEN: %d,\n",
01559 obj->m_len_user, (obj->m_ptr_user ? obj->m_ptr_user : "NULL"), obj->m_len_user);
01560 Debug("http", "\tPASSWORD: \"%.*s\", PASSWORD_LEN: %d,\n",
01561 obj->m_len_password, (obj->m_ptr_password ? obj->m_ptr_password : "NULL"), obj->m_len_password);
01562 Debug("http", "\tHOST: \"%.*s\", HOST_LEN: %d,\n",
01563 obj->m_len_host, (obj->m_ptr_host ? obj->m_ptr_host : "NULL"), obj->m_len_host);
01564 Debug("http", "\tPORT: \"%.*s\", PORT_LEN: %d, PORT_NUM: %d\n",
01565 obj->m_len_port, (obj->m_ptr_port ? obj->m_ptr_port : "NULL"), obj->m_len_port, obj->m_port);
01566 Debug("http", "\tPATH: \"%.*s\", PATH_LEN: %d,\n",
01567 obj->m_len_path, (obj->m_ptr_path ? obj->m_ptr_path : "NULL"), obj->m_len_path);
01568 Debug("http", "\tPARAMS: \"%.*s\", PARAMS_LEN: %d,\n",
01569 obj->m_len_params, (obj->m_ptr_params ? obj->m_ptr_params : "NULL"), obj->m_len_params);
01570 Debug("http", "\tQUERY: \"%.*s\", QUERY_LEN: %d,\n",
01571 obj->m_len_query, (obj->m_ptr_query ? obj->m_ptr_query : "NULL"), obj->m_len_query);
01572 Debug("http", "\tFRAGMENT: \"%.*s\", FRAGMENT_LEN: %d]\n",
01573 obj->m_len_fragment, (obj->m_ptr_fragment ? obj->m_ptr_fragment : "NULL"), obj->m_len_fragment);
01574 }
01575
01576
01577
01578
01579
01580
01581
01582
01583
01584
01585
01586 static inline void
01587 memcpy_tolower(char *d, const char *s, int n)
01588 {
01589 while (n--) {
01590 *d = ParseRules::ink_tolower(*s);
01591 s++;
01592 d++;
01593 }
01594 }
01595
01596
01597 #define BUFSIZE 512
01598
01599
01600
01601
01602 static inline void
01603 url_MD5_get_fast(URLImpl * url, CryptoContext& ctx, CryptoHash* hash)
01604 {
01605 char buffer[BUFSIZE];
01606 char *p;
01607
01608 p = buffer;
01609 memcpy_tolower(p, url->m_ptr_scheme, url->m_len_scheme);
01610 p += url->m_len_scheme;
01611 *p++ = ':';
01612 *p++ = '/';
01613 *p++ = '/';
01614
01615 *p++ = ':';
01616
01617 *p++ = '@';
01618 memcpy_tolower(p, url->m_ptr_host, url->m_len_host);
01619 p += url->m_len_host;
01620 *p++ = '/';
01621 memcpy(p, url->m_ptr_path, url->m_len_path);
01622 p += url->m_len_path;
01623 *p++ = ';';
01624
01625 *p++ = '?';
01626
01627
01628 ink_assert(sizeof(url->m_port) == 2);
01629 uint16_t port = (uint16_t) url_canonicalize_port(url->m_url_type, url->m_port);
01630 *p++ = ((char *) &port)[0];
01631 *p++ = ((char *) &port)[1];
01632
01633 ctx.update(buffer, p - buffer);
01634 ctx.finalize(hash);
01635 }
01636
01637
01638 static inline void
01639 url_MD5_get_general(URLImpl * url, CryptoContext& ctx, CryptoHash& hash)
01640 {
01641 char buffer[BUFSIZE];
01642 char *p, *e;
01643 const char *strs[13], *ends[13];
01644 const char *t;
01645 in_port_t port;
01646 int i, s;
01647
01648 strs[0] = url->m_ptr_scheme;
01649 strs[1] = "://";
01650 strs[2] = url->m_ptr_user;
01651 strs[3] = ":";
01652 strs[4] = url->m_ptr_password;
01653 strs[5] = "@";
01654 strs[6] = url->m_ptr_host;
01655 strs[7] = "/";
01656 strs[8] = url->m_ptr_path;
01657
01658 ends[0] = strs[0] + url->m_len_scheme;
01659 ends[1] = strs[1] + 3;
01660 ends[2] = strs[2] + url->m_len_user;
01661 ends[3] = strs[3] + 1;
01662 ends[4] = strs[4] + url->m_len_password;
01663 ends[5] = strs[5] + 1;
01664 ends[6] = strs[6] + url->m_len_host;
01665 ends[7] = strs[7] + 1;
01666 ends[8] = strs[8] + url->m_len_path;
01667
01668 strs[9] = ";";
01669 strs[10] = url->m_ptr_params;
01670 strs[11] = "?";
01671 strs[12] = url->m_ptr_query;
01672 ends[9] = strs[9] + 1;
01673 ends[10] = strs[10] + url->m_len_params;
01674 ends[11] = strs[11] + 1;
01675 ends[12] = strs[12] + url->m_len_query;
01676
01677 p = buffer;
01678 e = buffer + BUFSIZE;
01679
01680 for (i = 0; i < 13; i++) {
01681 if (strs[i]) {
01682 t = strs[i];
01683 s = 0;
01684
01685 while (t < ends[i]) {
01686 if ((i == 0) || (i == 6)) {
01687 unescape_str_tolower(p, e, t, ends[i], s);
01688 } else {
01689 unescape_str(p, e, t, ends[i], s);
01690 }
01691
01692 if (p == e) {
01693 ctx.update(buffer, BUFSIZE);
01694 p = buffer;
01695 }
01696 }
01697 }
01698 }
01699
01700 if (p != buffer) ctx.update(buffer, p-buffer);
01701
01702 port = url_canonicalize_port(url->m_url_type, url->m_port);
01703
01704 ctx.update(&port, sizeof(port));
01705 ctx.finalize(hash);
01706 }
01707
01708 void
01709 url_MD5_get(URLImpl * url, CryptoHash* hash)
01710 {
01711 URLHashContext ctx;
01712 if ((url_hash_method != 0) &&
01713 (url->m_url_type == URL_TYPE_HTTP) &&
01714 ((url->m_len_user + url->m_len_password + url->m_len_params + url->m_len_query) == 0) &&
01715 (3 + 1 + 1 + 1 + 1 + 1 + 2 +
01716 url->m_len_scheme +
01717 url->m_len_host +
01718 url->m_len_path < BUFSIZE) &&
01719 (memchr(url->m_ptr_host, '%', url->m_len_host) == NULL) &&
01720 (memchr(url->m_ptr_path, '%', url->m_len_path) == NULL)) {
01721 url_MD5_get_fast(url, ctx, hash);
01722 #ifdef DEBUG
01723 CryptoHash md5_general;
01724 url_MD5_get_general(url, ctx, md5_general);
01725 ink_assert(*hash == md5_general);
01726 #endif
01727 } else {
01728 url_MD5_get_general(url, ctx, *hash);
01729 }
01730 }
01731
01732 #undef BUFSIZE
01733
01734
01735
01736
01737 void
01738 url_host_MD5_get(URLImpl * url, INK_MD5 * md5)
01739 {
01740 MD5Context ctx;
01741
01742 if (url->m_ptr_scheme) {
01743 ctx.update(url->m_ptr_scheme, url->m_len_scheme);
01744 }
01745
01746 ctx.update("://", 3);
01747
01748 if (url->m_ptr_host) {
01749 ctx.update(url->m_ptr_host, url->m_len_host);
01750 }
01751
01752 ctx.update(":", 1);
01753
01754
01755
01756 int port = url_canonicalize_port(url->m_url_type, url->m_port);
01757 ctx.update(&port, sizeof(port));
01758 ctx.finalize(*md5);
01759 }