00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "libts.h"
00025
00026 #include "Main.h"
00027 #include "Update.h"
00028 #include "ProxyConfig.h"
00029 #include "StatSystem.h"
00030 #include "HttpUpdateSM.h"
00031 #include "HttpDebugNames.h"
00032 #include "URL.h"
00033 #include "HdrUtils.h"
00034 #include <records/I_RecHttp.h>
00035 #include "I_Layout.h"
00036
00037 RecRawStatBlock *update_rsb;
00038
00039 #define UpdateEstablishStaticConfigInteger(_ix,_n) \
00040 REC_EstablishStaticConfigInteger(_ix,_n); \
00041
00042 #define UPDATE_INCREMENT_DYN_STAT(x) \
00043 RecIncrRawStat(update_rsb, mutex->thread_holding, (int) x, 1);
00044 #define UPDATE_DECREMENT_DYN_STAT(x) \
00045 RecIncrRawStat(update_rsb, mutex->thread_holding, (int) x, -1);
00046 #define UPDATE_READ_DYN_STAT(x, C, S) \
00047 RecGetRawStatCount(update_rsb, (int) x, &C); \
00048 RecGetRawStatSum(update_rsb, (int) x, &S);
00049
00050 #define UPDATE_CLEAR_DYN_STAT(x) \
00051 do { \
00052 RecSetRawStatSum(update_rsb, x, 0); \
00053 RecSetRawStatCount(update_rsb, x, 0); \
00054 } while (0);
00055
00056 #define UPDATE_ConfigReadInteger REC_ConfigReadInteger
00057 #define UPDATE_ConfigReadString REC_ConfigReadString
00058 #define UPDATE_RegisterConfigUpdateFunc REC_RegisterConfigUpdateFunc
00059
00060
00061
00062
00063
00064 static const char *const GET_METHOD = "GET ";
00065 static const char *const HTTP_VERSION = " HTTP/1.0";
00066 static const char *const REQUEST_TERMINATOR = "\r\n\r\n";
00067 static const char *const TERMINATOR = "\r\n";
00068 static const char *const HTML_COMMENT_TAG = "!--";
00069 static const char *const HTML_COMMENT_END = "-->";
00070 static const int MAX_LINE_LENGTH = (32 * 1024);
00071
00072
00073
00074 static int len_GET_METHOD = 0;
00075 static int len_HTTP_VERSION = 0;
00076 static int len_REQUEST_TERMINATOR = 0;
00077 static int len_TERMINATOR = 0;
00078
00079 struct html_tag update_allowable_html_tags[] = {
00080 {"a", "href"},
00081 {"img", "src"},
00082 {"img", "href"},
00083 {"body", "background"},
00084 {"frame", "src"},
00085 {"iframe", "src"},
00086 {"fig", "src"},
00087 {"overlay", "src"},
00088 {"applet", "code"},
00089 {"script", "src"},
00090 {"embed", "src"},
00091 {"bgsound", "src"},
00092 {"area", "href"},
00093 {"base", "href"},
00094 {"meta", "content"},
00095 {NULL, NULL}
00096 };
00097
00098 struct schemes_descriptor
00099 {
00100 const char *tag;
00101 int tag_len;
00102 };
00103
00104 struct schemes_descriptor proto_schemes[] = {
00105 {"cid:", 0},
00106 {"clsid:", 0},
00107 {"file:", 0},
00108 {"finger:", 0},
00109 {"ftp:", 0},
00110 {"gopher:", 0},
00111 {"hdl:", 0},
00112 {"http:", 0},
00113 {"https:", 0},
00114 {"ilu:", 0},
00115 {"ior:", 0},
00116 {"irc:", 0},
00117 {"java:", 0},
00118 {"javascript:", 0},
00119 {"lifn:", 0},
00120 {"mailto:", 0},
00121 {"mid:", 0},
00122 {"news:", 0},
00123 {"path:", 0},
00124 {"prospero:", 0},
00125 {"rlogin:", 0},
00126 {"service:", 0},
00127 {"shttp:", 0},
00128 {"snews:", 0},
00129 {"stanf:", 0},
00130 {"telnet:", 0},
00131 {"tn3270:", 0},
00132 {"wais:", 0},
00133 {"whois++:", 0},
00134 {NULL, 0}
00135 };
00136
00137 struct schemes_descriptor supported_proto_schemes[] = {
00138 {"http:",},
00139 {NULL, 0}
00140 };
00141
00142 static int global_id = 1;
00143
00144 void
00145 init_proto_schemes()
00146 {
00147 int n;
00148 for (n = 0; proto_schemes[n].tag; ++n) {
00149 proto_schemes[n].tag_len = strlen(proto_schemes[n].tag);
00150 }
00151 }
00152
00153 void
00154 init_supported_proto_schemes()
00155 {
00156 int n;
00157 for (n = 0; supported_proto_schemes[n].tag; ++n) {
00158 supported_proto_schemes[n].tag_len = strlen(supported_proto_schemes[n].tag);
00159 }
00160 }
00161
00162
00163
00164
00165
00166
00167 UpdateConfigParams::UpdateConfigParams():
00168 _enabled(0), _immediate_update(0), _retry_count(0),
00169 _retry_interval(0), _concurrent_updates(0), _max_update_state_machines(0), _memory_use_in_mb(0)
00170 {
00171 }
00172
00173 UpdateConfigParams::UpdateConfigParams(UpdateConfigParams & p)
00174 {
00175 _enabled = p._enabled;
00176 _immediate_update = p._immediate_update;
00177 _retry_count = p._retry_count;
00178 _retry_interval = p._retry_interval;
00179 _concurrent_updates = p._concurrent_updates;
00180 _max_update_state_machines = p._max_update_state_machines;
00181 _memory_use_in_mb = p._memory_use_in_mb;
00182 }
00183
00184 UpdateConfigParams::~UpdateConfigParams()
00185 {
00186 }
00187
00188 UpdateConfigParams & UpdateConfigParams::operator=(UpdateConfigParams & p)
00189 {
00190 _enabled = p._enabled;
00191 _immediate_update = p._immediate_update;
00192 _retry_count = p._retry_count;
00193 _retry_interval = p._retry_interval;
00194 _concurrent_updates = p._concurrent_updates;
00195 _max_update_state_machines = p._max_update_state_machines;
00196 _memory_use_in_mb = p._memory_use_in_mb;
00197 return *this;
00198 }
00199
00200 int
00201 UpdateConfigParams::operator==(UpdateConfigParams & p)
00202 {
00203 if (_enabled != p._enabled)
00204 return 0;
00205 if (_immediate_update != p._immediate_update)
00206 return 0;
00207 if (_retry_count != p._retry_count)
00208 return 0;
00209 if (_retry_interval != p._retry_interval)
00210 return 0;
00211 if (_concurrent_updates != p._concurrent_updates)
00212 return 0;
00213 if (_max_update_state_machines != p._max_update_state_machines)
00214 return 0;
00215 if (_memory_use_in_mb != p._memory_use_in_mb)
00216 return 0;
00217 return 1;
00218 }
00219
00220
00221
00222
00223
00224
00225 UpdateEntry::UpdateEntry():_group_link(0), _hash_link(0), _id(0), _url(0),
00226 _URLhandle(), _terminal_url(0),
00227 _request_headers(0), _num_request_headers(0),
00228 _http_hdr(0),
00229 _offset_hour(0), _interval(0), _max_depth(0), _start_time(0), _expired(0), _scheme_index(-1), _update_event_status(0)
00230 {
00231 http_parser_init(&_http_parser);
00232 }
00233
00234 UpdateEntry::~UpdateEntry()
00235 {
00236 ats_free(_url);
00237 _url = NULL;
00238
00239 if (_URLhandle.valid()) {
00240 _URLhandle.destroy();
00241 }
00242
00243 ats_free(_request_headers);
00244 _request_headers = NULL;
00245
00246
00247 if (_http_hdr && _http_hdr->valid()) {
00248 _http_hdr->destroy();
00249 delete _http_hdr;
00250 _http_hdr = NULL;
00251 }
00252 _indirect_list = NULL;
00253 }
00254
00255 void
00256 UpdateEntry::Init(int derived_url)
00257 {
00258 _id = ink_atomic_increment(&global_id, 1);
00259 if (derived_url) {
00260 return;
00261 }
00262 ComputeScheduleTime();
00263
00264 int scheme_len;
00265 const char *scheme = _URLhandle.scheme_get(&scheme_len);
00266 if (scheme != URL_SCHEME_HTTP) {
00267
00268 _max_depth = 0;
00269 }
00270
00271 }
00272
00273 int
00274 UpdateEntry::ValidURL(char *s, char *e)
00275 {
00276
00277
00278 const char *url_start = s;
00279 char *url_end = e;
00280 int err;
00281
00282 _URLhandle.create(NULL);
00283 err = _URLhandle.parse(&url_start, url_end);
00284 if (err >= 0) {
00285 _url = ats_strdup(s);
00286 return 0;
00287 } else {
00288 _URLhandle.destroy();
00289 return 1;
00290 }
00291 return 0;
00292 }
00293
00294 int
00295 UpdateEntry::ValidHeaders(char *s)
00296 {
00297
00298
00299 enum
00300 {
00301 FIND_START_OF_HEADER_NAME = 1,
00302 SCAN_FOR_HEADER_NAME,
00303 SCAN_FOR_END_OF_HEADER_VALUE
00304 };
00305
00306 char *p = s;
00307 char *t;
00308 int bad_header = 0;
00309 int end_of_headers = 0;
00310 int scan_state = FIND_START_OF_HEADER_NAME;
00311
00312 while (*p) {
00313 switch (scan_state) {
00314 case FIND_START_OF_HEADER_NAME:
00315 {
00316 if (!ValidHeaderNameChar(*p)) {
00317 bad_header = 1;
00318 break;
00319 } else {
00320 scan_state = SCAN_FOR_HEADER_NAME;
00321 break;
00322 }
00323 }
00324 case SCAN_FOR_HEADER_NAME:
00325 {
00326 if (!ValidHeaderNameChar(*p)) {
00327 if (*p == ':') {
00328 scan_state = SCAN_FOR_END_OF_HEADER_VALUE;
00329 break;
00330 } else {
00331 bad_header = 1;
00332 break;
00333 }
00334 } else {
00335
00336 break;
00337 }
00338 }
00339 case SCAN_FOR_END_OF_HEADER_VALUE:
00340 {
00341 t = strchr(p, '\r');
00342 if (t) {
00343 if (*(t + 1) == '\n') {
00344 p = t + 1;
00345 ++_num_request_headers;
00346 scan_state = FIND_START_OF_HEADER_NAME;
00347 break;
00348 } else {
00349 bad_header = 1;
00350 break;
00351 }
00352 } else {
00353 t = strchr(p, 0);
00354 if (t) {
00355 ++_num_request_headers;
00356 end_of_headers = 1;
00357 } else {
00358 bad_header = 1;
00359 }
00360 break;
00361 }
00362 }
00363 }
00364
00365 if (bad_header) {
00366 if (_num_request_headers) {
00367 return 1;
00368 } else {
00369 if (p == s) {
00370 return 0;
00371 } else {
00372 return 1;
00373 }
00374 }
00375 } else {
00376 if (end_of_headers) {
00377 break;
00378 } else {
00379 ++p;
00380 }
00381 }
00382 }
00383
00384
00385
00386 _request_headers = ats_strdup(s);
00387 return 0;
00388 }
00389
00390 int
00391 UpdateEntry::BuildHttpRequest()
00392 {
00393
00394
00395
00396 char request[MAX_LINE_LENGTH];
00397 int request_size;
00398
00399 request_size = len_GET_METHOD + strlen(_url) +
00400 len_HTTP_VERSION + (_request_headers ? len_TERMINATOR + strlen(_request_headers) : 0) + len_REQUEST_TERMINATOR + 1;
00401 if (request_size > MAX_LINE_LENGTH) {
00402 return 1;
00403 }
00404 if (_request_headers) {
00405 snprintf(request, sizeof(request), "%s%s%s%s%s%s", GET_METHOD, _url,
00406 HTTP_VERSION, TERMINATOR, _request_headers, REQUEST_TERMINATOR);
00407 } else {
00408 snprintf(request, sizeof(request), "%s%s%s%s", GET_METHOD, _url, HTTP_VERSION, REQUEST_TERMINATOR);
00409 }
00410 _http_hdr = new HTTPHdr;
00411 http_parser_init(&_http_parser);
00412 _http_hdr->create(HTTP_TYPE_REQUEST);
00413 int err;
00414 const char *start = request;
00415 const char *end = start + request_size - 1;
00416
00417 while (start < end) {
00418 err = _http_hdr->parse_req(&_http_parser, &start, end, false);
00419 if (err != PARSE_CONT) {
00420 break;
00421 }
00422 end = start + strlen(start);
00423 }
00424 http_parser_clear(&_http_parser);
00425 return 0;
00426 }
00427
00428 int
00429 UpdateEntry::ValidHeaderNameChar(char c)
00430 {
00431 if ((c > 31) && (c < 127)) {
00432 if (ValidSeparatorChar(c)) {
00433 return 0;
00434 } else {
00435 return 1;
00436 }
00437 } else {
00438 return 0;
00439 }
00440 }
00441
00442 int
00443 UpdateEntry::ValidSeparatorChar(char c)
00444 {
00445 switch (c) {
00446 case '(':
00447 case ')':
00448 case '<':
00449 case '>':
00450 case '@':
00451 case ',':
00452 case ';':
00453 case ':':
00454 case '\\':
00455 case '"':
00456 case '/':
00457 case '[':
00458 case ']':
00459 case '?':
00460 case '=':
00461 case '{':
00462 case '}':
00463 case ' ':
00464 case '\t':
00465 return 1;
00466 default:
00467 return 0;
00468 }
00469 }
00470
00471 int
00472 UpdateEntry::ValidHour(char *s)
00473 {
00474
00475
00476 _offset_hour = atoi(s);
00477 if ((_offset_hour >= MIN_OFFSET_HOUR) && (_offset_hour <= MAX_OFFSET_HOUR)) {
00478 return 0;
00479 } else {
00480 return 1;
00481 }
00482 }
00483
00484 int
00485 UpdateEntry::ValidInterval(char *s)
00486 {
00487
00488
00489 _interval = atoi(s);
00490 if ((_interval >= MIN_INTERVAL) && (_interval <= MAX_INTERVAL)) {
00491 return 0;
00492 } else {
00493 return 1;
00494 }
00495 return 0;
00496 }
00497
00498 int
00499 UpdateEntry::ValidDepth(char *s)
00500 {
00501
00502
00503 _max_depth = atoi(s);
00504
00505 if ((_max_depth >= MIN_DEPTH) && (_max_depth <= MAX_DEPTH)) {
00506 return 0;
00507 } else {
00508 return 1;
00509 }
00510 return 0;
00511 }
00512
00513 void
00514 UpdateEntry::SetTerminalStatus(int term_url)
00515 {
00516 _terminal_url = term_url;
00517 }
00518
00519 int
00520 UpdateEntry::TerminalURL()
00521 {
00522 return _terminal_url;
00523 }
00524
00525
00526 void
00527 UpdateEntry::ComputeScheduleTime()
00528 {
00529 ink_hrtime ht;
00530 time_t cur_time;
00531 struct tm cur_tm;
00532
00533 if (_expired) {
00534 _expired = 0;
00535 } else {
00536 if (_start_time) {
00537 return;
00538 }
00539 }
00540
00541 ht = ink_get_based_hrtime();
00542 cur_time = ht / HRTIME_SECOND;
00543
00544 if (!_start_time) {
00545 time_t zero_hour;
00546
00547
00548
00549 ink_localtime_r(&cur_time, &cur_tm);
00550 cur_tm.tm_hour = _offset_hour;
00551 cur_tm.tm_min = 0;
00552 cur_tm.tm_sec = 0;
00553
00554 zero_hour = convert_tm(&cur_tm);
00555
00556 if (zero_hour > cur_time) zero_hour -= 24 * SECONDS_PER_HOUR;
00557 _start_time = cur_time + (_interval - ((cur_time - zero_hour) % _interval));
00558 } else {
00559
00560 _start_time += _interval;
00561 }
00562 }
00563
00564 int
00565 UpdateEntry::ScheduleNow(time_t cur_time)
00566 {
00567 if (cur_time >= _start_time) {
00568 _expired = 1;
00569 return 1;
00570 } else {
00571 return 0;
00572 }
00573 }
00574
00575
00576
00577
00578
00579 UpdateConfigList::UpdateConfigList():_entry_q_elements(0), _pending_q_elements(0), _hash_table(0)
00580 {
00581 }
00582
00583 UpdateConfigList::~UpdateConfigList()
00584 {
00585 if (_hash_table) {
00586 delete[]_hash_table;
00587 _hash_table = NULL;
00588 }
00589 }
00590
00591 void
00592 UpdateConfigList::Add(UpdateEntry * e)
00593 {
00594 _entry_q_elements++;
00595 _entry_q.enqueue(e);
00596 }
00597
00598 int
00599 UpdateConfigList::HashAdd(UpdateEntry * e)
00600 {
00601 uint64_t folded64 = e->_url_md5.fold();
00602 ink_assert(folded64);
00603 int32_t index = folded64 % HASH_TABLE_SIZE;
00604
00605 if (!_hash_table) {
00606
00607
00608 _hash_table = new UpdateEntry *[HASH_TABLE_SIZE];
00609 memset((char *) _hash_table, 0, (sizeof(UpdateEntry *) * HASH_TABLE_SIZE));
00610 }
00611
00612
00613 UpdateEntry *he = _hash_table[index];
00614 UpdateEntry **last_link = &_hash_table[index];
00615
00616 while (he) {
00617 if (e->_url_md5 == he->_url_md5) {
00618 return 1;
00619 } else {
00620 last_link = &he->_hash_link;
00621 he = he->_hash_link;
00622 }
00623 }
00624
00625
00626
00627 e->_hash_link = *last_link;
00628 *last_link = e;
00629
00630
00631
00632 Add(e);
00633
00634 return 0;
00635 }
00636
00637 UpdateEntry *
00638 UpdateConfigList::Remove()
00639 {
00640 UpdateEntry *e = _entry_q.dequeue();
00641 if (e) {
00642 _entry_q_elements--;
00643 }
00644 return e;
00645 }
00646
00647 void
00648 UpdateConfigList::AddPending(UpdateEntry * e)
00649 {
00650 _pending_q_elements++;
00651 _pending_q.enqueue(e);
00652 }
00653
00654 UpdateEntry *
00655 UpdateConfigList::RemovePending()
00656 {
00657 UpdateEntry *e = _pending_q.dequeue();
00658 if (e) {
00659 _pending_q_elements--;
00660 }
00661 return e;
00662 }
00663
00664
00665
00666
00667
00668
00669 UpdateManager::UpdateManager():_CM(0), _SCH(0)
00670 {
00671 }
00672
00673 UpdateManager::~UpdateManager()
00674 {
00675 }
00676
00677 int
00678 UpdateManager::start()
00679 {
00680
00681
00682 len_GET_METHOD = strlen(GET_METHOD);
00683 len_HTTP_VERSION = strlen(HTTP_VERSION);
00684 len_REQUEST_TERMINATOR = strlen(REQUEST_TERMINATOR);
00685 len_TERMINATOR = strlen(TERMINATOR);
00686 init_proto_schemes();
00687 init_supported_proto_schemes();
00688
00689 _CM = new UpdateConfigManager;
00690 _CM->init();
00691
00692 _SCH = new UpdateScheduler(_CM);
00693 _SCH->Init();
00694
00695 return 0;
00696 }
00697
00698 UpdateManager updateManager;
00699
00700 typedef int (UpdateConfigManager::*UpdateConfigManagerContHandler) (int, void *);
00701
00702
00703
00704
00705 UpdateConfigManager::UpdateConfigManager()
00706 :Continuation(new_ProxyMutex()), _periodic_event(0), _filename(0)
00707 {
00708 SET_HANDLER((UpdateConfigManagerContHandler)
00709 & UpdateConfigManager::ProcessUpdate);
00710 }
00711
00712 UpdateConfigManager::~UpdateConfigManager()
00713 {
00714 }
00715
00716 int
00717 UpdateConfigManager::init()
00718 {
00719 update_rsb = RecAllocateRawStatBlock((int) update_stat_count);
00720
00721 _CP_actual = new UpdateConfigParams;
00722
00723
00724
00725 UpdateEstablishStaticConfigInteger(_CP_actual->_enabled, "proxy.config.update.enabled");
00726
00727 UpdateEstablishStaticConfigInteger(_CP_actual->_immediate_update, "proxy.config.update.force");
00728
00729 UpdateEstablishStaticConfigInteger(_CP_actual->_retry_count, "proxy.config.update.retry_count");
00730
00731 UpdateEstablishStaticConfigInteger(_CP_actual->_retry_interval, "proxy.config.update.retry_interval");
00732
00733 UpdateEstablishStaticConfigInteger(_CP_actual->_concurrent_updates, "proxy.config.update.concurrent_updates");
00734
00735 UpdateEstablishStaticConfigInteger(_CP_actual->_max_update_state_machines,
00736 "proxy.config.update.max_update_state_machines");
00737
00738 UpdateEstablishStaticConfigInteger(_CP_actual->_memory_use_in_mb, "proxy.config.update.memory_use_mb");
00739
00740
00741
00742 RecRegisterRawStat(update_rsb, RECT_PROCESS,
00743 "proxy.process.update.successes",
00744 RECD_INT, RECP_NON_PERSISTENT, (int) update_successes_stat, RecRawStatSyncCount);
00745 UPDATE_CLEAR_DYN_STAT(update_successes_stat);
00746
00747 RecRegisterRawStat(update_rsb, RECT_PROCESS,
00748 "proxy.process.update.no_actions",
00749 RECD_INT, RECP_NON_PERSISTENT, (int) update_no_actions_stat, RecRawStatSyncCount);
00750 UPDATE_CLEAR_DYN_STAT(update_no_actions_stat);
00751
00752 RecRegisterRawStat(update_rsb, RECT_PROCESS,
00753 "proxy.process.update.fails",
00754 RECD_INT, RECP_NON_PERSISTENT, (int) update_fails_stat, RecRawStatSyncCount);
00755 UPDATE_CLEAR_DYN_STAT(update_fails_stat);
00756
00757 RecRegisterRawStat(update_rsb, RECT_PROCESS,
00758 "proxy.process.update.unknown_status",
00759 RECD_INT, RECP_NON_PERSISTENT, (int) update_unknown_status_stat, RecRawStatSyncCount);
00760 UPDATE_CLEAR_DYN_STAT(update_unknown_status_stat);
00761
00762 RecRegisterRawStat(update_rsb, RECT_PROCESS,
00763 "proxy.process.update.state_machines",
00764 RECD_INT, RECP_NON_PERSISTENT, (int) update_state_machines_stat, RecRawStatSyncCount);
00765 UPDATE_CLEAR_DYN_STAT(update_state_machines_stat);
00766
00767 Debug("update",
00768 "Update params: enable %" PRId64" force %" PRId64" rcnt %" PRId64" rint %" PRId64" updates %" PRId64" "
00769 "max_sm %" PRId64" mem %" PRId64"",
00770 _CP_actual->_enabled, _CP_actual->_immediate_update,
00771 _CP_actual->_retry_count, _CP_actual->_retry_interval,
00772 _CP_actual->_concurrent_updates, _CP_actual->_max_update_state_machines, _CP_actual->_memory_use_in_mb);
00773
00774
00775
00776 _CP = new UpdateConfigParams(*_CP_actual);
00777
00778
00779
00780 SetFileName((char *) "update.config");
00781 REC_RegisterConfigUpdateFunc("proxy.config.update.update_configuration", URL_list_update_callout, (void *) this);
00782
00783
00784
00785 handleEvent(EVENT_IMMEDIATE, (Event *) NULL);
00786
00787
00788
00789 _periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
00790
00791 return 0;
00792 }
00793
00794 int
00795 UpdateConfigManager::GetConfigParams(Ptr<UpdateConfigParams> *P)
00796 {
00797 MUTEX_TRY_LOCK(lock, mutex, this_ethread());
00798 if (!lock) {
00799 return 0;
00800 } else {
00801 *P = _CP;
00802 return 1;
00803 }
00804 }
00805
00806 int
00807 UpdateConfigManager::GetConfigList(Ptr<UpdateConfigList> *L)
00808 {
00809 MUTEX_TRY_LOCK(lock, mutex, this_ethread());
00810 if (!lock) {
00811 return 0;
00812 } else {
00813 *L = _CL;
00814 return 1;
00815 }
00816 }
00817
00818 int
00819 UpdateConfigManager::URL_list_update_callout(const char *, RecDataT ,
00820 RecData data, void *cookie)
00821 {
00822 UpdateConfigManager *cm = (UpdateConfigManager *) cookie;
00823 cm->SetFileName((char *) data.rec_string);
00824
00825
00826
00827
00828
00829 eventProcessor.schedule_imm(cm, ET_CACHE);
00830
00831 return 0;
00832 }
00833
00834 int
00835 UpdateConfigManager::ProcessUpdate(int event, Event * e)
00836 {
00837 if (event == EVENT_IMMEDIATE) {
00838
00839
00840
00841
00842 UpdateConfigList *l = NULL;
00843
00844 l = BuildUpdateList();
00845 if (l) {
00846 _CL = l;
00847 }
00848 return EVENT_DONE;
00849 }
00850
00851 if (event == EVENT_INTERVAL) {
00852
00853
00854
00855
00856 UpdateConfigParams *p = new UpdateConfigParams(*_CP_actual);
00857
00858 if (!(*_CP == *p)) {
00859 _CP = p;
00860 Debug("update", "enable %" PRId64" force %" PRId64" rcnt %" PRId64" rint %" PRId64" updates %" PRId64" state machines %" PRId64" mem %" PRId64"",
00861 p->_enabled, p->_immediate_update, p->_retry_count,
00862 p->_retry_interval, p->_concurrent_updates, p->_max_update_state_machines, p->_memory_use_in_mb);
00863 } else {
00864 delete p;
00865 }
00866 return EVENT_DONE;
00867 }
00868
00869
00870 Debug("update", "ProcessUpdate: Unknown event %d %p", event, e);
00871 return EVENT_DONE;
00872 }
00873
00874 UpdateConfigList *
00875 UpdateConfigManager::BuildUpdateList()
00876 {
00877
00878 ats_scoped_str config_path;
00879
00880 if (_filename) {
00881 config_path = Layout::get()->relative_to(Layout::get()->sysconfdir, _filename);
00882 } else {
00883 return (UpdateConfigList *) NULL;
00884 }
00885
00886 int fd = open(config_path, O_RDONLY);
00887 if (fd < 0) {
00888 Warning("read update.config, open failed");
00889 SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, open failed");
00890 return (UpdateConfigList *) NULL;
00891 }
00892 return ParseConfigFile(fd);
00893 }
00894
00895 int
00896 UpdateConfigManager::GetDataLine(int fd, int bufsize, char *buf, int field_delimiters, int delimiter)
00897 {
00898 char *line = buf;
00899 int linesize = bufsize;
00900 int bytes_read = 0;
00901 int rlen;
00902
00903 while ((rlen = ink_file_fd_readline(fd, linesize, line)) > 0) {
00904
00905
00906
00907
00908
00909
00910
00911 if (0 == bytes_read) {
00912
00913 if (*line == '#') return rlen;
00914 else if (1 == rlen) continue;
00915 }
00916 bytes_read += rlen;
00917
00918
00919
00920 char *p = buf;
00921 int delimiters_found = 0;
00922
00923 while (*p) {
00924 if (*p == delimiter) {
00925 delimiters_found++;
00926 }
00927 p++;
00928 }
00929 if (delimiters_found == field_delimiters) {
00930
00931 return bytes_read;
00932
00933 } else if ((delimiters_found == (field_delimiters - 1))
00934 && (*(p - 1) == '\n')) {
00935
00936
00937
00938 *(p - 1) = '\\';
00939 return bytes_read;
00940 }
00941
00942 line += rlen;
00943 linesize -= rlen;
00944 }
00945 return 0;
00946 }
00947
00948 UpdateConfigList *
00949 UpdateConfigManager::ParseConfigFile(int f)
00950 {
00951
00952
00953
00954
00955
00956 enum
00957 { F_URL, F_HEADERS, F_HOUR, F_INTERVAL, F_DEPTH, F_ITEMS };
00958 char *p_start[F_ITEMS];
00959 char *p_end[F_ITEMS];
00960
00961 char line[MAX_LINE_LENGTH];
00962 char *p;
00963
00964 int ln = 0;
00965 int i;
00966
00967 UpdateEntry *e = NULL;
00968 UpdateConfigList *ul = new UpdateConfigList;
00969
00970 while (GetDataLine(f, sizeof(line) - 1, line, F_ITEMS, '\\') > 0) {
00971 ++ln;
00972 if (*line == '#') {
00973 continue;
00974 } else {
00975 p = line;
00976 }
00977
00978
00979
00980 for (i = 0; i < F_ITEMS; ++i) {
00981 p_start[i] = p;
00982 p_end[i] = strchr(p, '\\');
00983 *p_end[i] = 0;
00984
00985 if (p_end[i]) {
00986 p = p_end[i] + 1;
00987 } else {
00988 Warning("read update.config, invalid syntax, line %d", ln);
00989 SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid syntax");
00990 break;
00991 }
00992 }
00993 if (i < F_ITEMS) {
00994
00995 goto abort_processing;
00996 }
00997
00998
00999 e = new UpdateEntry;
01000
01001
01002
01003
01004 if (e->ValidURL(p_start[F_URL], p_end[F_URL])) {
01005 Warning("read update.config, invalid URL field, line %d", ln);
01006 SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid URL field");
01007 goto abort_processing;
01008 }
01009
01010
01011
01012 if (e->ValidHeaders(p_start[F_HEADERS])) {
01013 Warning("read update.config, invalid headers field, line %d", ln);
01014 SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid headers field");
01015 goto abort_processing;
01016 }
01017
01018
01019
01020 if (e->BuildHttpRequest()) {
01021 Warning("read update.config, header processing error, line %d", ln);
01022 SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, header processing error");
01023 goto abort_processing;
01024 }
01025
01026
01027
01028 if (e->ValidHour(p_start[F_HOUR])) {
01029 Warning("read update.config, invalid hour field, line %d", ln);
01030 SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid hour field");
01031 goto abort_processing;
01032 }
01033
01034
01035
01036 if (e->ValidInterval(p_start[F_INTERVAL])) {
01037 Warning("read update.config, invalid interval field, line %d", ln);
01038 SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid interval field");
01039 goto abort_processing;
01040 }
01041
01042
01043
01044 if (e->ValidDepth(p_start[F_DEPTH])) {
01045 Warning("read update.config, invalid depth field, line %d", ln);
01046 SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid depth field");
01047 goto abort_processing;
01048 }
01049
01050
01051 e->Init();
01052 Debug("update",
01053 "[%d] [%s] [%s] nhdrs %d hour %d interval %d depth %d",
01054 e->_id, e->_url, e->_request_headers, e->_num_request_headers, e->_offset_hour, e->_interval, e->_max_depth);
01055 ul->Add(e);
01056 e = NULL;
01057 }
01058
01059
01060
01061 close(f);
01062 return ul;
01063
01064 abort_processing:
01065 close(f);
01066 if (e) {
01067 delete e;
01068 }
01069 if (ul) {
01070 delete ul;
01071 }
01072 return (UpdateConfigList *) NULL;
01073 }
01074
01075
01076
01077
01078
01079 UpdateScheduler::UpdateScheduler(UpdateConfigManager * c)
01080 :Continuation(new_ProxyMutex()), _periodic_event(0),
01081 _recursive_update(0), _CM(c), _schedule_event_callbacks(0), _update_state_machines(0), _base_EN(0), _parent_US(0)
01082 {
01083 SET_HANDLER((UpdateSchedulerContHandler)
01084 & UpdateScheduler::ScheduleEvent);
01085 }
01086
01087 UpdateScheduler::~UpdateScheduler()
01088 {
01089 }
01090
01091 int
01092 UpdateScheduler::Init()
01093 {
01094 _recursive_update = 0;
01095 _periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
01096 return 0;
01097 }
01098
01099 int
01100 UpdateScheduler::Init(UpdateScheduler * us, UpdateEntry * ue, Ptr<UpdateConfigParams> p)
01101 {
01102 ink_assert(ue->_indirect_list->Entries());
01103
01104 _recursive_update = 1;
01105 _CP = p;
01106 _CL = ue->_indirect_list;
01107 _base_EN = ue;
01108 _parent_US = us;
01109
01110
01111
01112 UpdateEntry *e;
01113 while ((e = _CL->Remove())) {
01114 _CL->AddPending(e);
01115 }
01116 _periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
01117 return 0;
01118 }
01119
01120 int
01121 UpdateScheduler::ScheduleEvent(int event, void *e)
01122 {
01123 UpdateEntry *ue = NULL;
01124 int update_complete = 1;
01125
01126 if (event == EVENT_IMMEDIATE) {
01127
01128
01129
01130 ue = (UpdateEntry *) e;
01131
01132 switch (ue->_update_event_status) {
01133 case UPDATE_EVENT_SUCCESS:
01134 {
01135 Debug("update", "%s update complete, UPDATE_EVENT_SUCCESS id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
01136 UPDATE_INCREMENT_DYN_STAT(update_successes_stat);
01137
01138 if ((ue->_max_depth > 0) && ue->_indirect_list) {
01139 if (ue->_indirect_list->Entries()) {
01140
01141
01142
01143
01144
01145
01146 Debug("update", "Starting UpdateScheduler for id: %d [%s]", ue->_id, ue->_url);
01147 UpdateScheduler *us = new UpdateScheduler();
01148 us->Init(this, ue, _CP);
01149 update_complete = 0;
01150
01151 } else {
01152 ue->_indirect_list = NULL;
01153 }
01154 }
01155 break;
01156 }
01157 case UPDATE_EVENT_SUCCESS_NOACTION:
01158 {
01159 Debug("update",
01160 "%s update complete, UPDATE_EVENT_SUCCESS_NOACTION id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
01161 UPDATE_INCREMENT_DYN_STAT(update_no_actions_stat);
01162 break;
01163 }
01164 case UPDATE_EVENT_FAILED:
01165 {
01166 Debug("update", "%s update complete, UPDATE_EVENT_FAILED id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
01167 UPDATE_INCREMENT_DYN_STAT(update_fails_stat);
01168 break;
01169 }
01170 default:
01171 {
01172 Debug("update",
01173 "%s update complete, unknown status %d, id: %d",
01174 (_recursive_update ? "(R)" : ""), ue->_update_event_status, ue->_id);
01175 UPDATE_INCREMENT_DYN_STAT(update_unknown_status_stat);
01176 break;
01177 }
01178 }
01179
01180 if (update_complete) {
01181 if (!_recursive_update) {
01182
01183
01184
01185
01186 ue->ComputeScheduleTime();
01187 _CL->Add(ue);
01188
01189 } else {
01190 delete ue;
01191 }
01192 --_update_state_machines;
01193 UPDATE_DECREMENT_DYN_STAT(update_state_machines_stat);
01194 }
01195
01196
01197
01198
01199
01200 if (Schedule() < 0) {
01201
01202 if (_update_state_machines == 0) {
01203
01204
01205
01206
01207 _CP = NULL;
01208 _CL = NULL;
01209
01210 if (_recursive_update) {
01211
01212
01213
01214
01215 _periodic_event->cancel();
01216 _base_EN->_indirect_list = NULL;
01217 _base_EN->_update_event_status = UPDATE_EVENT_SUCCESS;
01218
01219 SET_HANDLER((UpdateSchedulerContHandler)
01220 & UpdateScheduler::ChildExitEventHandler);
01221 handleEvent(EVENT_IMMEDIATE, 0);
01222 }
01223 }
01224 }
01225 return EVENT_DONE;
01226 }
01227
01228
01229
01230 if (event == EVENT_INTERVAL) {
01231 ++_schedule_event_callbacks;
01232 } else {
01233
01234 Debug("update", "UpdateScheduler::ScheduleEvent unknown event %d", event);
01235 return EVENT_DONE;
01236 }
01237
01238 if (!_CP && !_CL) {
01239
01240
01241 if (!_CM->GetConfigParams(&_CP)) {
01242 return EVENT_CONT;
01243 }
01244 if (!_CM->GetConfigList(&_CL)) {
01245 _CP = NULL;
01246 return EVENT_CONT;
01247 }
01248
01249
01250 if (!_CP || !_CL) {
01251 _CP = NULL;
01252 _CL = NULL;
01253 return EVENT_CONT;
01254 }
01255
01256
01257 if (!_CP->IsEnabled()) {
01258 _CP = NULL;
01259 _CL = NULL;
01260 return EVENT_CONT;
01261 }
01262
01263 } else {
01264
01265
01266
01267
01268
01269 Schedule();
01270 return EVENT_CONT;
01271 }
01272 ink_release_assert(!_update_state_machines);
01273
01274
01275
01276
01277
01278 ink_hrtime ht = ink_get_based_hrtime();
01279 time_t cur_time = ht / HRTIME_SECOND;
01280 Queue<UpdateEntry> no_action_q;
01281 int time_expired;
01282
01283 while ((ue = _CL->Remove())) {
01284 time_expired = ue->ScheduleNow(cur_time);
01285 if (time_expired || _CP->ImmediateUpdate()) {
01286 if (Schedule(ue) > 0) {
01287 Debug("update", "%s and started id: %d", time_expired ? "expired" : "force expire", ue->_id);
01288 } else {
01289 Debug("update", "%s with deferred start id: %d", time_expired ? "expired" : "force expire", ue->_id);
01290 }
01291
01292 } else {
01293 no_action_q.enqueue(ue);
01294 }
01295 }
01296
01297
01298
01299 while ((ue = no_action_q.dequeue())) {
01300 _CL->Add(ue);
01301 }
01302
01303 if (!_update_state_machines && !_CL->_pending_q.head) {
01304
01305
01306
01307 _CP = NULL;
01308 _CL = NULL;
01309 }
01310 return EVENT_DONE;
01311 }
01312
01313 int
01314 UpdateScheduler::ChildExitEventHandler(int event, Event * )
01315 {
01316 switch (event) {
01317 case EVENT_IMMEDIATE:
01318 case EVENT_INTERVAL:
01319 {
01320 MUTEX_TRY_LOCK(lock, _parent_US->mutex, this_ethread());
01321 if (lock) {
01322 Debug("update", "Child UpdateScheduler exit id: %d", _base_EN->_id);
01323 _parent_US->handleEvent(EVENT_IMMEDIATE, _base_EN);
01324 delete this;
01325
01326 } else {
01327
01328 eventProcessor.schedule_in(this, HRTIME_MSECONDS(10));
01329 }
01330 break;
01331 }
01332 default:
01333 {
01334 ink_release_assert(!"UpdateScheduler::ChildExitEventHandler invalid event");
01335 }
01336 }
01337
01338 return EVENT_DONE;
01339 }
01340
01341 int
01342 UpdateScheduler::Schedule(UpdateEntry * e)
01343 {
01344
01345
01346
01347
01348 UpdateSM *usm;
01349 UpdateEntry *ue = e;
01350 int allow_schedule;
01351 RecInt count, sum;
01352 int max_concurrent_updates;
01353
01354 UPDATE_READ_DYN_STAT(update_state_machines_stat, count, sum);
01355 if (_CP->ConcurrentUpdates() < _CP->MaxUpdateSM()) {
01356 max_concurrent_updates = _CP->ConcurrentUpdates();
01357 } else {
01358 max_concurrent_updates = _CP->MaxUpdateSM();
01359 }
01360 allow_schedule = (sum < max_concurrent_updates);
01361
01362 if (allow_schedule) {
01363 ue = ue ? ue : _CL->RemovePending();
01364 if (ue) {
01365 ++_update_state_machines;
01366 UPDATE_INCREMENT_DYN_STAT(update_state_machines_stat);
01367 usm = new UpdateSM(this, _CP, ue);
01368 usm->Start();
01369
01370 Debug("update", "%s %s start update id: %d [%s]",
01371 (_recursive_update ? "(R)" : ""), (e ? "directed" : "speculative"), ue->_id, ue->_url);
01372
01373 return 1;
01374 } else {
01375 return -1;
01376 }
01377
01378 } else {
01379 if (ue) {
01380 _CL->AddPending(ue);
01381 }
01382 return 0;
01383 }
01384 }
01385
01386
01387
01388
01389
01390 UpdateSM::UpdateSM(UpdateScheduler * us, Ptr<UpdateConfigParams> p, UpdateEntry * e)
01391 :Continuation(new_ProxyMutex()), _state(USM_INIT), _return_status(0), _retries(0)
01392 {
01393 SET_HANDLER((UpdateSMContHandler) & UpdateSM::HandleSMEvent);
01394 _US = us;
01395 _CP = p;
01396 _EN = e;
01397 }
01398
01399 UpdateSM::~UpdateSM()
01400 {
01401 _CP = NULL;
01402 }
01403
01404 void
01405 UpdateSM::Start()
01406 {
01407 eventProcessor.schedule_imm(this, ET_CACHE);
01408 }
01409
01410 int
01411 UpdateSM::HandleSMEvent(int event, Event * )
01412 {
01413 while (1) {
01414 switch (_state) {
01415 case USM_INIT:
01416 {
01417
01418
01419
01420
01421
01422
01423 if (_EN->_max_depth > 0) {
01424
01425 _state = USM_PROCESS_URL;
01426 break;
01427 }
01428
01429 INK_MD5 url_md5;
01430
01431 Cache::generate_key(&url_md5, &_EN->_URLhandle);
01432 ClusterMachine *m = cluster_machine_at_depth(cache_hash(url_md5));
01433 if (m) {
01434
01435 _state = USM_EXIT;
01436 _EN->_update_event_status = UPDATE_EVENT_SUCCESS_NOACTION;
01437 break;
01438 } else {
01439
01440 _state = USM_PROCESS_URL;
01441 break;
01442 }
01443 }
01444 case USM_PROCESS_URL:
01445 {
01446
01447
01448
01449 int n;
01450 int scheme_len;
01451 const char *scheme;
01452 _state = USM_PROCESS_URL_COMPLETION;
01453 scheme = _EN->_URLhandle.scheme_get(&scheme_len);
01454 for (n = 0; n < N_SCHEMES; ++n) {
01455 if (scheme == *scheme_dispatch_table[n].scheme) {
01456 _EN->_scheme_index = n;
01457 if ((*scheme_dispatch_table[n].func) (this)) {
01458 break;
01459 }
01460 return EVENT_CONT;
01461 }
01462 }
01463
01464
01465 _state = USM_EXIT;
01466 _EN->_update_event_status = UPDATE_EVENT_FAILED;
01467 break;
01468 }
01469 case USM_PROCESS_URL_COMPLETION:
01470 {
01471
01472
01473
01474 _state = USM_EXIT;
01475 _EN->_update_event_status = event;
01476 (*scheme_post_dispatch_table[_EN->_scheme_index].func) (this);
01477 break;
01478 }
01479 case USM_EXIT:
01480 {
01481
01482
01483
01484 if ((_return_status == UPDATE_EVENT_FAILED)
01485 && (_retries < _CP->RetryCount())) {
01486
01487
01488
01489 ++_retries;
01490 _state = USM_PROCESS_URL;
01491 eventProcessor.schedule_in(this, HRTIME_SECONDS(_CP->RetryInterval()), ET_CACHE);
01492 return EVENT_DONE;
01493
01494 } else {
01495 MUTEX_TRY_LOCK(lock, _US->mutex, this_ethread());
01496 if (lock) {
01497 _US->handleEvent(EVENT_IMMEDIATE, (void *) _EN);
01498 delete this;
01499 return EVENT_DONE;
01500
01501 } else {
01502
01503 eventProcessor.schedule_in(this, HRTIME_MSECONDS(10), ET_CACHE);
01504 return EVENT_CONT;
01505 }
01506 }
01507 }
01508 }
01509 }
01510
01511 return EVENT_CONT;
01512 }
01513
01514 struct dispatch_entry scheme_dispatch_table[UpdateSM::N_SCHEMES] = {
01515 {&URL_SCHEME_HTTP, UpdateSM::http_scheme},
01516 };
01517
01518 struct dispatch_entry scheme_post_dispatch_table[UpdateSM::N_SCHEMES] = {
01519 {&URL_SCHEME_HTTP, UpdateSM::http_scheme_postproc},
01520 };
01521
01522 int
01523 UpdateSM::http_scheme(UpdateSM * sm)
01524 {
01525 if (sm->_EN->_max_depth > 0) {
01526
01527
01528
01529 Debug("update", "Start recursive HTTP GET id: %d [%s]", sm->_EN->_id, sm->_EN->_url);
01530 sm->_EN->_indirect_list = new UpdateConfigList;
01531 RecursiveHttpGet *RHttpGet = new RecursiveHttpGet;
01532
01533 RHttpGet->Init(sm, sm->_EN->_url, sm->_EN->_request_headers,
01534 &sm->_EN->_URLhandle, sm->_EN->_http_hdr,
01535 sm->_EN->_max_depth, sm->_EN->_indirect_list, &update_allowable_html_tags[0]);
01536 } else {
01537
01538
01539
01540 Debug("update", "Start HTTP GET id: %d [%s]", sm->_EN->_id, sm->_EN->_url);
01541 HttpUpdateSM *current_reader;
01542
01543 current_reader = HttpUpdateSM::allocate();
01544 current_reader->init();
01545
01546 current_reader->start_scheduled_update(sm, sm->_EN->_http_hdr);
01547 }
01548 return 0;
01549 }
01550
01551 int
01552 UpdateSM::http_scheme_postproc(UpdateSM * sm)
01553 {
01554
01555
01556 switch (sm->_EN->_update_event_status) {
01557 case UPDATE_EVENT_SUCCESS:
01558 case UPDATE_EVENT_FAILED:
01559
01560 sm->_return_status = sm->_EN->_update_event_status;
01561 break;
01562
01563 case HTTP_SCH_UPDATE_EVENT_WRITTEN:
01564 case HTTP_SCH_UPDATE_EVENT_UPDATED:
01565 case HTTP_SCH_UPDATE_EVENT_DELETED:
01566 case HTTP_SCH_UPDATE_EVENT_NOT_CACHED:
01567 case HTTP_SCH_UPDATE_EVENT_NO_ACTION:
01568 sm->_EN->_update_event_status = UPDATE_EVENT_SUCCESS;
01569 sm->_return_status = UPDATE_EVENT_SUCCESS;
01570 break;
01571
01572 case HTTP_SCH_UPDATE_EVENT_ERROR:
01573 default:
01574 sm->_EN->_update_event_status = UPDATE_EVENT_FAILED;
01575 sm->_return_status = UPDATE_EVENT_FAILED;
01576 break;
01577 }
01578 return 0;
01579 }
01580
01581
01582
01583
01584
01585
01586 char
01587 HtmlParser::default_zero_char = '\0';
01588
01589 RecursiveHttpGet::RecursiveHttpGet()
01590 :Continuation(new_ProxyMutex()), _id(0), _caller_cont(0),
01591 _request_headers(0), _http_hdr(0), _recursion_depth(0), _OL(0), _group_link_head(0), _active_child_state_machines(0)
01592 {
01593 SET_HANDLER((RecursiveHttpGetContHandler)
01594 & RecursiveHttpGet::RecursiveHttpGetEvent);
01595 }
01596
01597 RecursiveHttpGet::~RecursiveHttpGet()
01598 {
01599 _CL = NULL;
01600 }
01601
01602 void
01603 RecursiveHttpGet::Init(Continuation * cont, char *url, char *request_headers,
01604 URL * url_data, HTTPHdr * http_hdr, int recursion_depth,
01605 Ptr<UpdateConfigList> L, struct html_tag *allowed_html_tags)
01606 {
01607
01608
01609
01610
01611 _id = ink_atomic_increment(&global_id, 1);
01612 _caller_cont = cont;
01613 _request_headers = request_headers;
01614 _url_data = url_data;
01615 _http_hdr = http_hdr;
01616 _recursion_depth = recursion_depth;
01617 _CL = L;
01618 _OL = ObjectReloadContAllocator.alloc();
01619 _OL->Init(this, url, strlen(url), _request_headers, (_request_headers ? strlen(_request_headers) : 0), 1, 1);
01620
01621 html_parser.Init(url, allowed_html_tags);
01622
01623 Debug("update", "Start recursive read rid: %d [%s]", _id, html_parser._url);
01624 }
01625
01626 int
01627 RecursiveHttpGet::RecursiveHttpGetEvent(int event, Event * d)
01628 {
01629 char *url, *url_end;
01630 int status;
01631 UpdateEntry *ue;
01632 IOBufferReader *r = (IOBufferReader *) d;
01633
01634 switch (event) {
01635 case NET_EVENT_OPEN_FAILED:
01636 {
01637 Debug("update", "RecursiveHttpGetEvent connect failed id: %d [%s]", _id, html_parser._url);
01638 break;
01639 }
01640 case VC_EVENT_ERROR:
01641 {
01642 Debug("update", "RecursiveHttpGetEvent connect event error id: %d [%s]", _id, html_parser._url);
01643 break;
01644 }
01645 case VC_EVENT_READ_READY:
01646 case VC_EVENT_READ_COMPLETE:
01647 case VC_EVENT_EOS:
01648 {
01649 while ((status = html_parser.ParseHtml(r, &url, &url_end))) {
01650
01651
01652 ue = new UpdateEntry;
01653 if (ue->ValidURL(url, url_end + 1 )) {
01654 delete ue;
01655 ue = NULL;
01656
01657 } else {
01658
01659
01660 ue->_request_headers = ats_strdup(_request_headers);
01661 ue->BuildHttpRequest();
01662 ue->Init(1);
01663
01664
01665 int ue_host_len;
01666 const char *ue_host = ue->_URLhandle.host_get(&ue_host_len);
01667 int url_host_len;
01668 const char *url_host = _url_data->host_get(&url_host_len);
01669
01670 if (ue_host == NULL || url_host == NULL || ptr_len_casecmp(ue_host, ue_host_len, url_host, url_host_len)) {
01671 delete ue;
01672 ue = NULL;
01673 continue;
01674 }
01675
01676
01677 ue->_URLhandle.hash_get(&ue->_url_md5);
01678
01679 if (_CL->HashAdd(ue)) {
01680
01681
01682 delete ue;
01683 ue = NULL;
01684
01685 } else {
01686
01687
01688
01689
01690 ue->SetTerminalStatus(((status < 0) ? 1 : 0));
01691 Debug("update", "Recursive find rid: %d id: %d %s\n [%s]",
01692 _id, ue->_id, (ue->TerminalURL()? "T " : ""), ue->_url);
01693
01694 if (_group_link_head) {
01695 ue->_group_link = _group_link_head;
01696 _group_link_head = ue;
01697 } else {
01698 _group_link_head = ue;
01699 ue->_group_link = NULL;
01700 }
01701 }
01702 }
01703 }
01704 ink_release_assert(r->read_avail() == 0);
01705
01706 if ((event == VC_EVENT_READ_COMPLETE)
01707 || (event == VC_EVENT_EOS)) {
01708 break;
01709
01710 } else {
01711 return EVENT_CONT;
01712 }
01713 }
01714 case UPDATE_EVENT_SUCCESS:
01715 case UPDATE_EVENT_FAILED:
01716 {
01717
01718
01719 ink_release_assert(_active_child_state_machines > 0);
01720 _active_child_state_machines--;
01721 break;
01722 }
01723 default:
01724 {
01725 ink_release_assert(!"RecursiveHttpGetEvent invalid event");
01726 return EVENT_DONE;
01727
01728 }
01729 }
01730
01731 if (_group_link_head) {
01732
01733
01734
01735
01736 while (_group_link_head) {
01737 ue = _group_link_head;
01738 _group_link_head = ue->_group_link;
01739
01740 if (!ue->TerminalURL()) {
01741 if (_recursion_depth <= 1) {
01742 continue;
01743 }
01744
01745 Debug("update", "(R) start non-terminal HTTP GET rid: %d id: %d [%s]", _id, ue->_id, ue->_url);
01746
01747 _active_child_state_machines++;
01748 RecursiveHttpGet *RHttpGet = new RecursiveHttpGet();
01749 RHttpGet->Init(this, ue->_url, _request_headers,
01750 _url_data, _http_hdr, (_recursion_depth - 1), _CL, &update_allowable_html_tags[0]);
01751 return EVENT_CONT;
01752
01753 }
01754 }
01755 }
01756
01757
01758
01759 SET_HANDLER((RecursiveHttpGetContHandler)
01760 & RecursiveHttpGet::ExitEventHandler);
01761 handleEvent(EVENT_IMMEDIATE, 0);
01762 return EVENT_DONE;
01763 }
01764
01765 int
01766 RecursiveHttpGet::ExitEventHandler(int event, Event * )
01767 {
01768 switch (event) {
01769 case EVENT_IMMEDIATE:
01770 case EVENT_INTERVAL:
01771 {
01772 MUTEX_TRY_LOCK(lock, _caller_cont->mutex, this_ethread());
01773 if (lock) {
01774 Debug("update", "Exiting recursive read rid: %d [%s]", _id, html_parser._url);
01775 _caller_cont->handleEvent(UPDATE_EVENT_SUCCESS, 0);
01776 delete this;
01777
01778 } else {
01779
01780 eventProcessor.schedule_in(this, HRTIME_MSECONDS(10));
01781 }
01782 break;
01783 }
01784 default:
01785 {
01786 ink_release_assert(!"RecursiveHttpGet::ExitEventHandler invalid event");
01787 }
01788 }
01789
01790 return EVENT_DONE;
01791 }
01792
01793 int
01794 HtmlParser::ParseHtml(IOBufferReader * r, char **url, char **url_end)
01795 {
01796 int status;
01797 while (1) {
01798 if ((status = ScanHtmlForURL(r, url, url_end))) {
01799 status = ConstructURL(url, url_end);
01800 if (status)
01801 return status;
01802 } else {
01803 return 0;
01804 }
01805 }
01806 }
01807
01808 int
01809 HtmlParser::ScanHtmlForURL(IOBufferReader * r, char **url, char **url_end)
01810 {
01811 unsigned char c;
01812 int n = 0;
01813
01814 while (1) {
01815 switch (_scan_state) {
01816 case SCAN_INIT:
01817 {
01818 _tag.clear();
01819
01820 _attr.clear();
01821 _attr_value.clear();
01822 _attr_value_hash_char_index = -1;
01823 _attr_value_quoted = 0;
01824 _attr_matched = false;
01825
01826 _scan_state = SCAN_START;
01827 n = -1;
01828 break;
01829 }
01830 case SCAN_START:
01831 {
01832 while ((n = r->read((char *) &c, 1))) {
01833 if (c == '<') {
01834 _scan_state = FIND_TAG_START;
01835 break;
01836 }
01837 }
01838 break;
01839 }
01840 case FIND_TAG_START:
01841 {
01842 while ((n = r->read((char *) &c, 1))) {
01843 if (!isspace(c)) {
01844 if (c == '>') {
01845
01846
01847
01848 _scan_state = SCAN_INIT;
01849 break;
01850
01851 } else {
01852 _tag(_tag.length()) = c;
01853 _scan_state = COPY_TAG;
01854 break;
01855 }
01856 }
01857 }
01858 break;
01859 }
01860 case COPY_TAG:
01861 {
01862 while ((n = r->read((char *) &c, 1))) {
01863 if (!isspace(c)) {
01864 if (c == '>') {
01865
01866
01867
01868 _scan_state = SCAN_INIT;
01869 break;
01870
01871 } else if (c == '=') {
01872
01873
01874
01875 _scan_state = SCAN_INIT;
01876 break;
01877
01878 } else {
01879 if (_tag.length() < MAX_TAG_NAME_LENGTH) {
01880 _tag(_tag.length()) = c;
01881
01882 } else {
01883
01884
01885
01886 _scan_state = SCAN_INIT;
01887 break;
01888 }
01889 }
01890
01891 } else {
01892 _tag(_tag.length()) = 0;
01893 if (strcmp(_tag, HTML_COMMENT_TAG) == 0) {
01894 _scan_state = IGNORE_COMMENT_START;
01895 } else {
01896 _scan_state = FIND_ATTR_START;
01897 }
01898 break;
01899 }
01900 }
01901 break;
01902 }
01903 case IGNORE_COMMENT_START:
01904 {
01905 _comment_end_ptr = (char *) HTML_COMMENT_END;
01906 _scan_state = IGNORE_COMMENT;
01907 break;
01908 }
01909 case IGNORE_COMMENT:
01910 {
01911 while ((n = r->read((char *) &c, 1))) {
01912 if (!isspace(c)) {
01913 if (c == *_comment_end_ptr) {
01914 _comment_end_ptr++;
01915 if (!*_comment_end_ptr) {
01916 _scan_state = SCAN_INIT;
01917 break;
01918 }
01919 } else {
01920 _comment_end_ptr = (char *) HTML_COMMENT_END;
01921 }
01922 }
01923 }
01924 break;
01925 }
01926 case FIND_ATTR_START:
01927 {
01928 while ((n = r->read((char *) &c, 1))) {
01929 if (!isspace(c)) {
01930 if (c == '>') {
01931
01932
01933
01934 _scan_state = SCAN_INIT;
01935 break;
01936
01937 } else if (c == '=') {
01938
01939
01940
01941 _scan_state = SCAN_INIT;
01942 break;
01943
01944 } else {
01945 _attr(_attr.length()) = c;
01946 _scan_state = COPY_ATTR;
01947 break;
01948 }
01949 }
01950 }
01951 break;
01952 }
01953 case COPY_ATTR:
01954 {
01955 while ((n = r->read((char *) &c, 1))) {
01956 if (!isspace(c)) {
01957 if (c == '>') {
01958
01959
01960
01961 _scan_state = SCAN_INIT;
01962 break;
01963
01964 } else if (c == '=') {
01965
01966
01967
01968 _attr(_attr.length()) = 0;
01969 _scan_state = FIND_ATTR_VALUE_START;
01970 break;
01971
01972 } else {
01973 if (_attr.length() < MAX_ATTR_NAME_LENGTH) {
01974 _attr(_attr.length()) = c;
01975
01976 } else {
01977
01978
01979
01980 _scan_state = SCAN_INIT;
01981 break;
01982 }
01983 }
01984
01985 } else {
01986 _attr(_attr.length()) = 0;
01987 _scan_state = FIND_ATTR_VALUE_DELIMITER;
01988 break;
01989 }
01990 }
01991 break;
01992 }
01993 case FIND_ATTR_VALUE_DELIMITER:
01994 {
01995 while ((n = r->read((char *) &c, 1))) {
01996 if (isspace(c) || (c == '=')) {
01997 if (c == '=') {
01998 _scan_state = FIND_ATTR_VALUE_START;
01999 break;
02000 }
02001 } else {
02002 _scan_state = SCAN_INIT;
02003 break;
02004 }
02005 }
02006 break;
02007 }
02008 case FIND_ATTR_VALUE_START:
02009 {
02010 while ((n = r->read((char *) &c, 1))) {
02011 if (!isspace(c)) {
02012 if (c == '>') {
02013
02014
02015
02016 _scan_state = SCAN_INIT;
02017 break;
02018
02019 } else if ((c == '\'') || (c == '\"')) {
02020 _attr_value_quoted = c;
02021 _scan_state = COPY_ATTR_VALUE;
02022 break;
02023
02024 } else {
02025 _attr_value_quoted = 0;
02026 _attr_value(_attr_value.length()) = c;
02027 _scan_state = COPY_ATTR_VALUE;
02028 break;
02029 }
02030 }
02031 }
02032 break;
02033 }
02034 case COPY_ATTR_VALUE:
02035 {
02036 while ((n = r->read((char *) &c, 1))) {
02037 if (_attr_value_quoted) {
02038 if (c == _attr_value_quoted) {
02039
02040
02041
02042 _attr_value(_attr_value.length()) = 0;
02043 _scan_state = VALIDATE_ENTRY;
02044 break;
02045
02046 } else if (c == '\n') {
02047 _scan_state = TERMINATE_COPY_ATTR_VALUE;
02048 break;
02049 } else {
02050 _attr_value(_attr_value.length()) = c;
02051 if (c == '#') {
02052 _attr_value_hash_char_index = _attr_value.length() - 1;
02053 }
02054 }
02055
02056 } else {
02057 if (isspace(c)) {
02058
02059
02060
02061 _attr_value(_attr_value.length()) = 0;
02062 _scan_state = VALIDATE_ENTRY;
02063 break;
02064
02065 } else if (c == '>') {
02066
02067
02068
02069 _attr_value(_attr_value.length()) = 0;
02070 _scan_state = VALIDATE_ENTRY_RESTART;
02071 break;
02072
02073 } else {
02074 _attr_value(_attr_value.length()) = c;
02075 if (c == '#') {
02076 _attr_value_hash_char_index = _attr_value.length() - 1;
02077 }
02078 }
02079 }
02080 }
02081 break;
02082 }
02083 case VALIDATE_ENTRY:
02084 case VALIDATE_ENTRY_RESTART:
02085 {
02086 if (_scan_state == VALIDATE_ENTRY) {
02087 _scan_state = RESUME_ATTR_VALUE_SCAN;
02088 } else {
02089 _scan_state = SCAN_INIT;
02090 }
02091 if (AllowTagAttrValue()) {
02092 if (ExtractURL(url, url_end)) {
02093 return 1;
02094 }
02095 }
02096 break;
02097 }
02098 case RESUME_ATTR_VALUE_SCAN:
02099 {
02100 _attr.clear();
02101 _attr_value.clear();
02102 _attr_value_hash_char_index = -1;
02103 _attr_value_quoted = 0;
02104
02105 _scan_state = FIND_ATTR_START;
02106 n = -2;
02107 break;
02108 }
02109 case TERMINATE_COPY_ATTR_VALUE:
02110 {
02111 while ((n = r->read((char *) &c, 1))) {
02112 if (c == _attr_value_quoted) {
02113 _scan_state = RESUME_ATTR_VALUE_SCAN;
02114 break;
02115 }
02116 }
02117 break;
02118 }
02119 default:
02120 {
02121 ink_release_assert(!"HtmlParser::ScanHtmlForURL bad state");
02122 }
02123 }
02124
02125 if (n == 0) {
02126 return 0;
02127 }
02128
02129 }
02130 }
02131
02132 int
02133 HtmlParser::AllowTagAttrValue()
02134 {
02135 struct html_tag *p_tag = allowable_html_tags;
02136 struct html_tag *p_attr = allowable_html_attrs;
02137
02138 if (!_tag || !_attr)
02139 return 0;
02140
02141 while (p_tag->tag && p_tag->attr) {
02142 if (!strcasecmp(_tag, p_tag->tag)
02143 && !strcasecmp(_attr, p_tag->attr)) {
02144 if (p_attr == NULL || p_attr->tag == NULL)
02145 return 1;
02146 else if (_attr_matched) {
02147 return 1;
02148 } else {
02149
02150 return 0;
02151 }
02152 } else {
02153 if (p_attr && p_attr->tag && p_attr->attr && _attr_value.length() > 0) {
02154 if (!strcasecmp(_attr, p_attr->tag)
02155 && !strcasecmp(_attr_value, p_attr->attr)) {
02156 _attr_matched = true;
02157 }
02158 }
02159 p_tag++;
02160 if (p_attr)
02161 p_attr++;
02162 }
02163 }
02164 return 0;
02165 }
02166
02167 int
02168 HtmlParser::ValidProtoScheme(char *p)
02169 {
02170 int n;
02171 for (n = 0; proto_schemes[n].tag; ++n) {
02172 if (!strncasecmp(p, proto_schemes[n].tag, proto_schemes[n].tag_len)) {
02173 return 1;
02174 }
02175 }
02176 return 0;
02177 }
02178
02179 int
02180 HtmlParser::ValidSupportedProtoScheme(char *p)
02181 {
02182 int n;
02183 for (n = 0; supported_proto_schemes[n].tag; ++n) {
02184 if (!strncasecmp(p, supported_proto_schemes[n].tag, supported_proto_schemes[n].tag_len)) {
02185 return 1;
02186 }
02187 }
02188 return 0;
02189 }
02190
02191 int
02192 HtmlParser::ExtractURL(char **url, char **url_end)
02193 {
02194 intptr_t n;
02195
02196
02197 if (_attr_value_hash_char_index >= 0) {
02198 if (!_attr_value_hash_char_index) {
02199 return 0;
02200
02201 } else {
02202
02203 _attr_value.set_length(_attr_value_hash_char_index + 1);
02204 _attr_value[_attr_value_hash_char_index] = 0;
02205 }
02206 }
02207
02208 if (!strcasecmp(_tag, "base") && !strcasecmp(_attr, "href")) {
02209 if (_html_doc_base) {
02210 _html_doc_base.clear();
02211 }
02212 for (n = 0; n < _attr_value.length(); ++n) {
02213 _html_doc_base(_html_doc_base.length()) = _attr_value[n];
02214 }
02215 _html_doc_base(_html_doc_base.length()) = 0;
02216 return 0;
02217
02218 } else if (!strcasecmp(_tag, "meta") && !strcasecmp(_attr, "content")) {
02219
02220
02221
02222
02223 if (_attr_value.length()) {
02224
02225 for (n = 0; n < _attr_value.length(); ++n) {
02226 if (!ParseRules::is_digit((unsigned char) _attr_value[n])) {
02227 break;
02228 }
02229 }
02230 if ((n < _attr_value.length()) && (((unsigned char) _attr_value[n]) == ';')) {
02231
02232 for (; n < _attr_value.length(); ++n) {
02233 if (!isspace((unsigned char) _attr_value[n])) {
02234 break;
02235 }
02236 }
02237 if ((n < _attr_value.length()) && (!strncasecmp(&_attr_value[n], "URL=", 4))) {
02238 n += 4;
02239 if ((n < _attr_value.length())
02240 && ((_attr_value.length() - n) > 1)) {
02241 *url = &_attr_value[n];
02242 *url_end = &_attr_value[_attr_value.length() - 2];
02243 return 1;
02244 }
02245 }
02246 }
02247 return 0;
02248
02249 } else {
02250 return 0;
02251 }
02252 }
02253
02254 if (_attr_value.length() > 1) {
02255 *url = &_attr_value[(intptr_t)0];
02256 *url_end = &_attr_value[_attr_value.length() - 2];
02257 return 1;
02258
02259 } else {
02260 return 0;
02261 }
02262 }
02263
02264 int
02265 HtmlParser::ConstructURL(char **url, char **url_end)
02266 {
02267 unsigned char *p_url = (unsigned char *) *url;
02268 unsigned char *p_url_end = (unsigned char *) *url_end;
02269
02270
02271
02272
02273 while (p_url < p_url_end) {
02274 if (isspace(*p_url)) {
02275 ++p_url;
02276 } else {
02277 break;
02278 }
02279 }
02280
02281
02282
02283
02284 int relative_URL = 0;
02285 int http_needed = 0;
02286 if (ValidProtoScheme((char *) p_url)) {
02287 if (!strncasecmp((char *) p_url, "http:", 5)
02288 && (strncasecmp((char *) p_url, "http://", 7) != 0)) {
02289
02290
02291
02292
02293
02294 p_url += strlen("http:");
02295 if (p_url > p_url_end) {
02296 return 0;
02297 }
02298 relative_URL = 1;
02299 }
02300 } else {
02301 relative_URL = 1;
02302
02303 if (strncasecmp((char *) p_url, "//", 2) == 0)
02304 http_needed = 1;
02305 }
02306
02307
02308
02309
02310 if (!relative_URL && !ValidSupportedProtoScheme((char *) p_url)) {
02311 return 0;
02312 }
02313
02314 if (relative_URL) {
02315
02316
02317
02318 DynArray<char>*base = 0;
02319 DynArray<char>*absolute_url = 0;
02320
02321 if (http_needed) {
02322 absolute_url = PrependString("http:", 5, (char *) p_url, (p_url_end - p_url + 2));
02323 } else if (_html_doc_base.length()) {
02324
02325
02326
02327 base = MakeURL(_url, _html_doc_base, _html_doc_base.length(), !ValidProtoScheme(_html_doc_base));
02328 absolute_url = MakeURL(*base, (char *) p_url, (p_url_end - p_url + 2), 1);
02329 } else {
02330 absolute_url = MakeURL(_url, (char *) p_url, (p_url_end - p_url + 2), 1);
02331 }
02332 _result.clear();
02333 _result = *absolute_url;
02334 absolute_url->detach();
02335
02336
02337 delete absolute_url;
02338 if (base)
02339 delete base;
02340
02341 *url = &_result[(intptr_t)0];
02342 *url_end = &_result[_result.length() - 3];
02343
02344
02345 } else {
02346 *url = (char *) p_url;
02347 *url_end = (char *) p_url_end;
02348 }
02349
02350
02351
02352
02353
02354
02355
02356 if (!strncasecmp((char *) (p_url_end - 4), ".html", 5)
02357 || !strncasecmp((char *) (p_url_end - 3), ".htm", 4)
02358 || !strncasecmp((char *) (p_url_end), "/", 1)) {
02359 return 1;
02360 } else {
02361 return -1;
02362 }
02363 }
02364
02365 DynArray<char>*
02366 HtmlParser::MakeURL(char *url, char *sub, int subsize, int relative_url)
02367 {
02368 int i, n;
02369 int skip_slashslash;
02370
02371 DynArray<char>*result = new DynArray<char>(&default_zero_char, 128);
02372
02373 if (relative_url) {
02374 if (*sub != '/') {
02375
02376 int url_len = strlen(url);
02377
02378
02379 for (i = url_len; i && url[i] != '/'; i--);
02380
02381 if (i && (url[i] == url[i - 1])) {
02382
02383
02384 for (n = 0; n < url_len; ++n) {
02385 (*result) (result->length()) = url[n];
02386 }
02387 (*result) (result->length()) = '/';
02388
02389 } else {
02390 for (n = 0; n < (i + 1); ++n) {
02391 (*result) (result->length()) = url[n];
02392 }
02393 }
02394
02395 for (n = 0; n < subsize; ++n) {
02396 (*result) (result->length()) = sub[n];
02397 }
02398 (*result) (result->length()) = '\0';
02399
02400 } else {
02401 i = 0;
02402 do {
02403
02404 for (; url[i] && url[i] != '/'; i++);
02405
02406 if (!url[i]) {
02407 break;
02408 }
02409
02410 skip_slashslash = ((url[i] == url[i + 1]) && (url[i + 1] == '/'));
02411
02412 if (skip_slashslash) {
02413 i += 2;
02414 }
02415 } while (skip_slashslash);
02416
02417 for (n = 0; n < (i - 1); ++n) {
02418 (*result) (result->length()) = url[n];
02419 }
02420
02421 if (url[n] != '/') {
02422 (*result) (result->length()) = url[n];
02423 }
02424
02425 for (n = 0; n < subsize; ++n) {
02426 (*result) (result->length()) = sub[n];
02427 }
02428 (*result) (result->length()) = '\0';
02429 }
02430
02431 } else {
02432 for (n = 0; n < subsize; ++n) {
02433 (*result) (result->length()) = sub[n];
02434 }
02435 (*result) (result->length()) = '\0';
02436 }
02437 return result;
02438 }
02439
02440 DynArray<char>*
02441 HtmlParser::PrependString(const char *pre, int presize, char *sub, int subsize)
02442 {
02443 int n;
02444
02445 DynArray<char>*result = new DynArray<char>(&default_zero_char, 128);
02446
02447 for (n = 0; n < presize; ++n) {
02448 (*result) (result->length()) = pre[n];
02449 }
02450 for (n = 0; n < subsize; ++n) {
02451 (*result) (result->length()) = sub[n];
02452 }
02453 (*result) (result->length()) = '\0';
02454
02455 return result;
02456 }
02457
02458
02459
02460
02461
02462 ClassAllocator<ObjectReloadCont> ObjectReloadContAllocator("ObjectReloadCont");
02463
02464 ObjectReloadCont::ObjectReloadCont():Continuation(0),
02465 _caller_cont(0), _request_id(0), _send_data(0),
02466 _receive_data(0), _start_event(0),
02467 _state(START), _cur_action(0), _netvc(0), _write_vio(0), _read_vio(0), _read_event_callback(0)
02468 {
02469 SET_HANDLER((ObjectReloadContHandler) & ObjectReloadCont::ObjectReloadEvent);
02470 }
02471
02472 ObjectReloadCont::~ObjectReloadCont()
02473 {
02474 }
02475
02476 void
02477 ObjectReloadCont::Init(Continuation * cont, char *url, int url_len,
02478 char *headers, int headers_len, int http_case, int read_event_callback)
02479 {
02480 int total_len;
02481
02482 mutex = new_ProxyMutex();
02483 _caller_cont = cont;
02484 _request_id = ink_atomic_increment(&global_id, 1);
02485 _read_event_callback = read_event_callback;
02486
02487
02488
02489
02490 if (http_case) {
02491 if (headers_len) {
02492 total_len = len_GET_METHOD + url_len + len_HTTP_VERSION + len_TERMINATOR + headers_len + len_REQUEST_TERMINATOR;
02493 } else {
02494 total_len = len_GET_METHOD + url_len + len_HTTP_VERSION + len_REQUEST_TERMINATOR;
02495 }
02496 _send_data = new_MIOBuffer(buffer_size_to_index(total_len + 1));
02497
02498 memcpy(_send_data->end(), GET_METHOD, len_GET_METHOD);
02499 memcpy(&(_send_data->end())[len_GET_METHOD], url, url_len);
02500 memcpy(&(_send_data->end())[len_GET_METHOD + url_len], HTTP_VERSION, len_HTTP_VERSION);
02501
02502 if (headers_len) {
02503 memcpy(&(_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION], TERMINATOR, len_TERMINATOR);
02504 memcpy(&(_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION + len_TERMINATOR], headers, headers_len);
02505 memcpy(&(_send_data->end())[len_GET_METHOD + url_len +
02506 len_HTTP_VERSION + len_TERMINATOR +
02507 headers_len], REQUEST_TERMINATOR, len_REQUEST_TERMINATOR);
02508
02509
02510 (_send_data->end())[len_GET_METHOD + url_len +
02511 len_HTTP_VERSION + len_TERMINATOR + headers_len + len_REQUEST_TERMINATOR] = 0;
02512 } else {
02513 memcpy(&(_send_data->end())[len_GET_METHOD + url_len +
02514 len_HTTP_VERSION], REQUEST_TERMINATOR, len_REQUEST_TERMINATOR);
02515
02516
02517 (_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION + len_REQUEST_TERMINATOR] = 0;
02518 }
02519 _send_data->fill(total_len);
02520
02521 } else {
02522
02523 ink_assert(false);
02524 }
02525 handleEvent(EVENT_IMMEDIATE, (void *) NULL);
02526 }
02527
02528 void
02529 ObjectReloadCont::free()
02530 {
02531 mutex = 0;
02532 if (_send_data) {
02533 free_MIOBuffer(_send_data);
02534 _send_data = 0;
02535 }
02536 if (_receive_data) {
02537 free_MIOBuffer(_receive_data);
02538 _receive_data = 0;
02539 }
02540 }
02541
02542 int
02543 ObjectReloadCont::ObjectReloadEvent(int event, void *d)
02544 {
02545 switch (_state) {
02546 case START:
02547 {
02548 IpEndpoint target;
02549
02550 Debug("update-reload", "Connect start id=%d", _request_id);
02551 _state = ObjectReloadCont::ATTEMPT_CONNECT;
02552 MUTEX_TRY_LOCK(lock, this->mutex, this_ethread());
02553 ink_release_assert(lock);
02554 target.setToLoopback(AF_INET);
02555 target.port() = htons(HttpProxyPort::findHttp(AF_INET)->m_port);
02556 _cur_action = netProcessor.connect_re(this, &target.sa);
02557 return EVENT_DONE;
02558 }
02559 case ATTEMPT_CONNECT:
02560 {
02561 if (event != NET_EVENT_OPEN) {
02562
02563 Debug("update-reload", "Connect fail id=%d", _request_id);
02564 CallBackUser(event, 0);
02565 free();
02566 ObjectReloadContAllocator.free(this);
02567 return EVENT_DONE;
02568 }
02569 _netvc = (class NetVConnection *) d;
02570
02571
02572 Debug("update-reload", "Write start id=%d [%s]", _request_id, _send_data->start());
02573 _state = ObjectReloadCont::WRITING_URL;
02574 IOBufferReader *r = _send_data->alloc_reader();
02575 _write_vio = _netvc->do_io_write(this, r->read_avail(), r);
02576 return EVENT_DONE;
02577 }
02578 case WRITING_URL:
02579 {
02580 ink_release_assert(_write_vio == (VIO *) d);
02581 if (event == VC_EVENT_WRITE_READY) {
02582 _write_vio->reenable();
02583 return EVENT_DONE;
02584 } else if (event == VC_EVENT_WRITE_COMPLETE) {
02585
02586 Debug("update-reload", "Read start id=%d", _request_id);
02587 _state = ObjectReloadCont::READING_DATA;
02588 _receive_data = new_MIOBuffer(max_iobuffer_size);
02589 _receive_data_reader = _receive_data->alloc_reader();
02590 _read_vio = _netvc->do_io_read(this, INT64_MAX, _receive_data);
02591 return EVENT_DONE;
02592 } else {
02593
02594 Debug("update-reload", "Write fail id=%d", _request_id);
02595 _netvc->do_io(VIO::CLOSE);
02596 CallBackUser(event, 0);
02597 free();
02598 ObjectReloadContAllocator.free(this);
02599 return EVENT_DONE;
02600 }
02601 }
02602 case READING_DATA:
02603 {
02604 ink_release_assert(_read_vio == (VIO *) d);
02605 switch (event) {
02606 case VC_EVENT_READ_READY:
02607 {
02608 if (_read_event_callback) {
02609 _caller_cont->handleEvent(event, _receive_data_reader);
02610
02611 } else {
02612 int64_t read_bytes = _receive_data_reader->read_avail();
02613 _receive_data_reader->consume(read_bytes);
02614 _read_vio->reenable();
02615 }
02616 return EVENT_CONT;
02617 }
02618 case VC_EVENT_READ_COMPLETE:
02619 case VC_EVENT_EOS:
02620 {
02621 if (_read_event_callback) {
02622 _caller_cont->handleEvent(event, _receive_data_reader);
02623 }
02624
02625 Debug("update-reload", "Fill success id=%d", _request_id);
02626 break;
02627 }
02628 default:
02629 {
02630 Debug("update-reload", "Fill read fail id=%d", _request_id);
02631 CallBackUser(event, 0);
02632 break;
02633 }
02634 }
02635
02636 _netvc->do_io(VIO::CLOSE);
02637 free();
02638 ObjectReloadContAllocator.free(this);
02639 return EVENT_DONE;
02640 }
02641 default:
02642 {
02643 ink_release_assert(!"ObjectReloadEvent invalid state");
02644 }
02645
02646 }
02647 return 0;
02648 }
02649
02650 int
02651 ObjectReloadCont::CallBackUser(int event, void *d)
02652 {
02653 _caller_cont->handleEvent(event, d);
02654 return 0;
02655 }
02656
02657