00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 #include "libts.h"
00025 
00026 #include "Main.h"
00027 #include "Update.h"
00028 #include "ProxyConfig.h"
00029 #include "StatSystem.h"
00030 #include "HttpUpdateSM.h"
00031 #include "HttpDebugNames.h"
00032 #include "URL.h"
00033 #include "HdrUtils.h"
00034 #include <records/I_RecHttp.h>
00035 #include "I_Layout.h"
00036 
00037 RecRawStatBlock *update_rsb;
00038 
00039 #define UpdateEstablishStaticConfigInteger(_ix,_n) \
00040   REC_EstablishStaticConfigInteger(_ix,_n); \
00041 
00042 #define UPDATE_INCREMENT_DYN_STAT(x) \
00043         RecIncrRawStat(update_rsb, mutex->thread_holding, (int) x, 1);
00044 #define UPDATE_DECREMENT_DYN_STAT(x) \
00045         RecIncrRawStat(update_rsb, mutex->thread_holding, (int) x, -1);
00046 #define UPDATE_READ_DYN_STAT(x, C, S) \
00047         RecGetRawStatCount(update_rsb, (int) x, &C); \
00048         RecGetRawStatSum(update_rsb, (int) x, &S);
00049 
00050 #define UPDATE_CLEAR_DYN_STAT(x) \
00051 do { \
00052         RecSetRawStatSum(update_rsb, x, 0); \
00053         RecSetRawStatCount(update_rsb, x, 0); \
00054 } while (0);
00055 
00056 #define UPDATE_ConfigReadInteger         REC_ConfigReadInteger
00057 #define UPDATE_ConfigReadString          REC_ConfigReadString
00058 #define UPDATE_RegisterConfigUpdateFunc  REC_RegisterConfigUpdateFunc
00059 
00060 
00061 
00062 
00063 
00064 static const char *const GET_METHOD = "GET ";
00065 static const char *const HTTP_VERSION = " HTTP/1.0";
00066 static const char *const REQUEST_TERMINATOR = "\r\n\r\n";
00067 static const char *const TERMINATOR = "\r\n";
00068 static const char *const HTML_COMMENT_TAG = "!--";
00069 static const char *const HTML_COMMENT_END = "-->";
00070 static const int MAX_LINE_LENGTH = (32 * 1024);
00071 
00072 
00073 
00074 static int len_GET_METHOD = 0;
00075 static int len_HTTP_VERSION = 0;
00076 static int len_REQUEST_TERMINATOR = 0;
00077 static int len_TERMINATOR = 0;
00078 
00079 struct html_tag update_allowable_html_tags[] = {
00080   {"a", "href"},
00081   {"img", "src"},
00082   {"img", "href"},
00083   {"body", "background"},
00084   {"frame", "src"},
00085   {"iframe", "src"},
00086   {"fig", "src"},
00087   {"overlay", "src"},
00088   {"applet", "code"},
00089   {"script", "src"},
00090   {"embed", "src"},
00091   {"bgsound", "src"},
00092   {"area", "href"},
00093   {"base", "href"},             
00094   {"meta", "content"},          
00095   {NULL, NULL}
00096 };
00097 
00098 struct schemes_descriptor
00099 {
00100   const char *tag;
00101   int tag_len;
00102 };
00103 
00104 struct schemes_descriptor proto_schemes[] = {
00105   {"cid:", 0},
00106   {"clsid:", 0},
00107   {"file:", 0},
00108   {"finger:", 0},
00109   {"ftp:", 0},
00110   {"gopher:", 0},
00111   {"hdl:", 0},
00112   {"http:", 0},
00113   {"https:", 0},
00114   {"ilu:", 0},
00115   {"ior:", 0},
00116   {"irc:", 0},
00117   {"java:", 0},
00118   {"javascript:", 0},
00119   {"lifn:", 0},
00120   {"mailto:", 0},
00121   {"mid:", 0},
00122   {"news:", 0},
00123   {"path:", 0},
00124   {"prospero:", 0},
00125   {"rlogin:", 0},
00126   {"service:", 0},
00127   {"shttp:", 0},
00128   {"snews:", 0},
00129   {"stanf:", 0},
00130   {"telnet:", 0},
00131   {"tn3270:", 0},
00132   {"wais:", 0},
00133   {"whois++:", 0},
00134   {NULL, 0}
00135 };
00136 
00137 struct schemes_descriptor supported_proto_schemes[] = {
00138   {"http:",},
00139   {NULL, 0}
00140 };
00141 
00142 static int global_id = 1;
00143 
00144 void
00145 init_proto_schemes()
00146 {
00147   int n;
00148   for (n = 0; proto_schemes[n].tag; ++n) {
00149     proto_schemes[n].tag_len = strlen(proto_schemes[n].tag);
00150   }
00151 }
00152 
00153 void
00154 init_supported_proto_schemes()
00155 {
00156   int n;
00157   for (n = 0; supported_proto_schemes[n].tag; ++n) {
00158     supported_proto_schemes[n].tag_len = strlen(supported_proto_schemes[n].tag);
00159   }
00160 }
00161 
00162 
00163 
00164 
00165 
00166 
00167 UpdateConfigParams::UpdateConfigParams():
00168 _enabled(0), _immediate_update(0), _retry_count(0),
00169 _retry_interval(0), _concurrent_updates(0), _max_update_state_machines(0), _memory_use_in_mb(0)
00170 {
00171 }
00172 
00173 UpdateConfigParams::UpdateConfigParams(UpdateConfigParams & p)
00174 {
00175   _enabled = p._enabled;
00176   _immediate_update = p._immediate_update;
00177   _retry_count = p._retry_count;
00178   _retry_interval = p._retry_interval;
00179   _concurrent_updates = p._concurrent_updates;
00180   _max_update_state_machines = p._max_update_state_machines;
00181   _memory_use_in_mb = p._memory_use_in_mb;
00182 }
00183 
00184 UpdateConfigParams::~UpdateConfigParams()
00185 {
00186 }
00187 
00188 UpdateConfigParams & UpdateConfigParams::operator=(UpdateConfigParams & p)
00189 {
00190   _enabled = p._enabled;
00191   _immediate_update = p._immediate_update;
00192   _retry_count = p._retry_count;
00193   _retry_interval = p._retry_interval;
00194   _concurrent_updates = p._concurrent_updates;
00195   _max_update_state_machines = p._max_update_state_machines;
00196   _memory_use_in_mb = p._memory_use_in_mb;
00197   return *this;
00198 }
00199 
00200 int
00201 UpdateConfigParams::operator==(UpdateConfigParams & p)
00202 {
00203   if (_enabled != p._enabled)
00204     return 0;
00205   if (_immediate_update != p._immediate_update)
00206     return 0;
00207   if (_retry_count != p._retry_count)
00208     return 0;
00209   if (_retry_interval != p._retry_interval)
00210     return 0;
00211   if (_concurrent_updates != p._concurrent_updates)
00212     return 0;
00213   if (_max_update_state_machines != p._max_update_state_machines)
00214     return 0;
00215   if (_memory_use_in_mb != p._memory_use_in_mb)
00216     return 0;
00217   return 1;
00218 }
00219 
00220 
00221 
00222 
00223 
00224 
00225 UpdateEntry::UpdateEntry():_group_link(0), _hash_link(0), _id(0), _url(0),
00226 _URLhandle(), _terminal_url(0),
00227 _request_headers(0), _num_request_headers(0),
00228 _http_hdr(0),
00229 _offset_hour(0), _interval(0), _max_depth(0), _start_time(0), _expired(0), _scheme_index(-1), _update_event_status(0)
00230 {
00231   http_parser_init(&_http_parser);
00232 }
00233 
00234 UpdateEntry::~UpdateEntry()
00235 {
00236   ats_free(_url);
00237   _url = NULL;
00238 
00239   if (_URLhandle.valid()) {
00240     _URLhandle.destroy();
00241   }
00242 
00243   ats_free(_request_headers);
00244   _request_headers = NULL;
00245 
00246   
00247   if (_http_hdr && _http_hdr->valid()) {
00248     _http_hdr->destroy();
00249     delete _http_hdr;
00250     _http_hdr = NULL;
00251   }
00252   _indirect_list = NULL;
00253 }
00254 
00255 void
00256 UpdateEntry::Init(int derived_url)
00257 {
00258   _id = ink_atomic_increment(&global_id, 1);
00259   if (derived_url) {
00260     return;
00261   }
00262   ComputeScheduleTime();
00263 
00264   int scheme_len;
00265   const char *scheme = _URLhandle.scheme_get(&scheme_len);
00266   if (scheme != URL_SCHEME_HTTP) {
00267     
00268     _max_depth = 0;
00269   }
00270 
00271 }
00272 
00273 int
00274 UpdateEntry::ValidURL(char *s, char *e)
00275 {
00276   
00277 
00278   const char *url_start = s;
00279   char *url_end = e;
00280   int err;
00281 
00282   _URLhandle.create(NULL);
00283   err = _URLhandle.parse(&url_start, url_end);
00284   if (err >= 0) {
00285     _url = ats_strdup(s);
00286     return 0;                   
00287   } else {
00288     _URLhandle.destroy();
00289     return 1;                   
00290   }
00291   return 0;
00292 }
00293 
00294 int
00295 UpdateEntry::ValidHeaders(char *s)
00296 {
00297   
00298 
00299   enum
00300   {
00301     FIND_START_OF_HEADER_NAME = 1,
00302     SCAN_FOR_HEADER_NAME,
00303     SCAN_FOR_END_OF_HEADER_VALUE
00304   };
00305 
00306   char *p = s;
00307   char *t;
00308   int bad_header = 0;
00309   int end_of_headers = 0;
00310   int scan_state = FIND_START_OF_HEADER_NAME;
00311 
00312   while (*p) {
00313     switch (scan_state) {
00314     case FIND_START_OF_HEADER_NAME:
00315       {
00316         if (!ValidHeaderNameChar(*p)) {
00317           bad_header = 1;
00318           break;
00319         } else {
00320           scan_state = SCAN_FOR_HEADER_NAME;
00321           break;
00322         }
00323       }
00324     case SCAN_FOR_HEADER_NAME:
00325       {
00326         if (!ValidHeaderNameChar(*p)) {
00327           if (*p == ':') {
00328             scan_state = SCAN_FOR_END_OF_HEADER_VALUE;
00329             break;
00330           } else {
00331             bad_header = 1;
00332             break;
00333           }
00334         } else {
00335           
00336           break;
00337         }
00338       }
00339     case SCAN_FOR_END_OF_HEADER_VALUE:
00340       {
00341         t = strchr(p, '\r');
00342         if (t) {
00343           if (*(t + 1) == '\n') {
00344             p = t + 1;
00345             ++_num_request_headers;
00346             scan_state = FIND_START_OF_HEADER_NAME;
00347             break;
00348           } else {
00349             bad_header = 1;
00350             break;
00351           }
00352         } else {
00353           t = strchr(p, 0);
00354           if (t) {
00355             ++_num_request_headers;
00356             end_of_headers = 1;
00357           } else {
00358             bad_header = 1;
00359           }
00360           break;
00361         }
00362       }
00363     }                           
00364 
00365     if (bad_header) {
00366       if (_num_request_headers) {
00367         return 1;               
00368       } else {
00369         if (p == s) {
00370           return 0;             
00371         } else {
00372           return 1;             
00373         }
00374       }
00375     } else {
00376       if (end_of_headers) {
00377         break;
00378       } else {
00379         ++p;
00380       }
00381     }
00382   }
00383 
00384   
00385 
00386   _request_headers = ats_strdup(s);
00387   return 0;                     
00388 }
00389 
00390 int
00391 UpdateEntry::BuildHttpRequest()
00392 {
00393   
00394   
00395 
00396   char request[MAX_LINE_LENGTH];
00397   int request_size;
00398 
00399   request_size = len_GET_METHOD + strlen(_url) +
00400     len_HTTP_VERSION + (_request_headers ? len_TERMINATOR + strlen(_request_headers) : 0) + len_REQUEST_TERMINATOR + 1;
00401   if (request_size > MAX_LINE_LENGTH) {
00402     return 1;
00403   }
00404   if (_request_headers) {
00405     snprintf(request, sizeof(request), "%s%s%s%s%s%s", GET_METHOD, _url,
00406              HTTP_VERSION, TERMINATOR, _request_headers, REQUEST_TERMINATOR);
00407   } else {
00408     snprintf(request, sizeof(request), "%s%s%s%s", GET_METHOD, _url, HTTP_VERSION, REQUEST_TERMINATOR);
00409   }
00410   _http_hdr = new HTTPHdr;
00411   http_parser_init(&_http_parser);
00412   _http_hdr->create(HTTP_TYPE_REQUEST);
00413   int err;
00414   const char *start = request;
00415   const char *end = start + request_size - 1;
00416 
00417   while (start < end) {
00418     err = _http_hdr->parse_req(&_http_parser, &start, end, false);
00419     if (err != PARSE_CONT) {
00420       break;
00421     }
00422     end = start + strlen(start);
00423   }
00424   http_parser_clear(&_http_parser);
00425   return 0;
00426 }
00427 
00428 int
00429 UpdateEntry::ValidHeaderNameChar(char c)
00430 {
00431   if ((c > 31) && (c < 127)) {
00432     if (ValidSeparatorChar(c)) {
00433       return 0;                 
00434     } else {
00435       return 1;                 
00436     }
00437   } else {
00438     return 0;                   
00439   }
00440 }
00441 
00442 int
00443 UpdateEntry::ValidSeparatorChar(char c)
00444 {
00445   switch (c) {
00446   case '(':
00447   case ')':
00448   case '<':
00449   case '>':
00450   case '@':
00451   case ',':
00452   case ';':
00453   case ':':
00454   case '\\':
00455   case '"':
00456   case '/':
00457   case '[':
00458   case ']':
00459   case '?':
00460   case '=':
00461   case '{':
00462   case '}':
00463   case ' ':
00464   case '\t':
00465     return 1;                   
00466   default:
00467     return 0;
00468   }
00469 }
00470 
00471 int
00472 UpdateEntry::ValidHour(char *s)
00473 {
00474   
00475 
00476   _offset_hour = atoi(s);
00477   if ((_offset_hour >= MIN_OFFSET_HOUR) && (_offset_hour <= MAX_OFFSET_HOUR)) {
00478     return 0;                   
00479   } else {
00480     return 1;                   
00481   }
00482 }
00483 
00484 int
00485 UpdateEntry::ValidInterval(char *s)
00486 {
00487   
00488 
00489   _interval = atoi(s);
00490   if ((_interval >= MIN_INTERVAL) && (_interval <= MAX_INTERVAL)) {
00491     return 0;                   
00492   } else {
00493     return 1;                   
00494   }
00495   return 0;
00496 }
00497 
00498 int
00499 UpdateEntry::ValidDepth(char *s)
00500 {
00501   
00502 
00503   _max_depth = atoi(s);
00504 
00505   if ((_max_depth >= MIN_DEPTH) && (_max_depth <= MAX_DEPTH)) {
00506     return 0;                   
00507   } else {
00508     return 1;                   
00509   }
00510   return 0;
00511 }
00512 
00513 void
00514 UpdateEntry::SetTerminalStatus(int term_url)
00515 {
00516   _terminal_url = term_url;
00517 }
00518 
00519 int
00520 UpdateEntry::TerminalURL()
00521 {
00522   return _terminal_url;
00523 }
00524 
00525 
00526 void
00527 UpdateEntry::ComputeScheduleTime()
00528 {
00529   ink_hrtime ht;
00530   time_t cur_time;
00531   struct tm cur_tm;
00532 
00533   if (_expired) {
00534     _expired = 0;
00535   } else {
00536     if (_start_time) {
00537       return;
00538     }
00539   }
00540 
00541   ht = ink_get_based_hrtime();
00542   cur_time = ht / HRTIME_SECOND;
00543 
00544   if (!_start_time) {
00545     time_t zero_hour; 
00546 
00547     
00548     
00549     ink_localtime_r(&cur_time, &cur_tm);
00550     cur_tm.tm_hour = _offset_hour;
00551     cur_tm.tm_min = 0;
00552     cur_tm.tm_sec = 0;
00553     
00554     zero_hour = convert_tm(&cur_tm);
00555     
00556     if (zero_hour > cur_time) zero_hour -= 24 * SECONDS_PER_HOUR;
00557     _start_time = cur_time + (_interval - ((cur_time - zero_hour) % _interval));
00558   } else {
00559     
00560     _start_time += _interval;
00561   }
00562 }
00563 
00564 int
00565 UpdateEntry::ScheduleNow(time_t cur_time)
00566 {
00567   if (cur_time >= _start_time) {
00568     _expired = 1;
00569     return 1;
00570   } else {
00571     return 0;
00572   }
00573 }
00574 
00575 
00576 
00577 
00578 
00579 UpdateConfigList::UpdateConfigList():_entry_q_elements(0), _pending_q_elements(0), _hash_table(0)
00580 {
00581 }
00582 
00583 UpdateConfigList::~UpdateConfigList()
00584 {
00585   if (_hash_table) {
00586     delete[]_hash_table;
00587     _hash_table = NULL;
00588   }
00589 }
00590 
00591 void
00592 UpdateConfigList::Add(UpdateEntry * e)
00593 {
00594   _entry_q_elements++;
00595   _entry_q.enqueue(e);
00596 }
00597 
00598 int
00599 UpdateConfigList::HashAdd(UpdateEntry * e)
00600 {
00601   uint64_t folded64 = e->_url_md5.fold();
00602   ink_assert(folded64);
00603   int32_t index = folded64 % HASH_TABLE_SIZE;
00604 
00605   if (!_hash_table) {
00606     
00607 
00608     _hash_table = new UpdateEntry *[HASH_TABLE_SIZE];
00609     memset((char *) _hash_table, 0, (sizeof(UpdateEntry *) * HASH_TABLE_SIZE));
00610   }
00611   
00612 
00613   UpdateEntry *he = _hash_table[index];
00614   UpdateEntry **last_link = &_hash_table[index];
00615 
00616   while (he) {
00617     if (e->_url_md5 == he->_url_md5) {
00618       return 1;                 
00619     } else {
00620       last_link = &he->_hash_link;
00621       he = he->_hash_link;
00622     }
00623   }
00624 
00625   
00626 
00627   e->_hash_link = *last_link;
00628   *last_link = e;
00629 
00630   
00631 
00632   Add(e);
00633 
00634   return 0;                     
00635 }
00636 
00637 UpdateEntry *
00638 UpdateConfigList::Remove()
00639 {
00640   UpdateEntry *e = _entry_q.dequeue();
00641   if (e) {
00642     _entry_q_elements--;
00643   }
00644   return e;
00645 }
00646 
00647 void
00648 UpdateConfigList::AddPending(UpdateEntry * e)
00649 {
00650   _pending_q_elements++;
00651   _pending_q.enqueue(e);
00652 }
00653 
00654 UpdateEntry *
00655 UpdateConfigList::RemovePending()
00656 {
00657   UpdateEntry *e = _pending_q.dequeue();
00658   if (e) {
00659     _pending_q_elements--;
00660   }
00661   return e;
00662 }
00663 
00664 
00665 
00666 
00667 
00668 
00669 UpdateManager::UpdateManager():_CM(0), _SCH(0)
00670 {
00671 }
00672 
00673 UpdateManager::~UpdateManager()
00674 {
00675 }
00676 
00677 int
00678 UpdateManager::start()
00679 {
00680   
00681 
00682   len_GET_METHOD = strlen(GET_METHOD);
00683   len_HTTP_VERSION = strlen(HTTP_VERSION);
00684   len_REQUEST_TERMINATOR = strlen(REQUEST_TERMINATOR);
00685   len_TERMINATOR = strlen(TERMINATOR);
00686   init_proto_schemes();
00687   init_supported_proto_schemes();
00688 
00689   _CM = new UpdateConfigManager;
00690   _CM->init();
00691 
00692   _SCH = new UpdateScheduler(_CM);
00693   _SCH->Init();
00694 
00695   return 0;
00696 }
00697 
00698 UpdateManager updateManager;
00699 
00700 typedef int (UpdateConfigManager::*UpdateConfigManagerContHandler) (int, void *);
00701 
00702 
00703 
00704 
00705 UpdateConfigManager::UpdateConfigManager()
00706 :Continuation(new_ProxyMutex()), _periodic_event(0), _filename(0)
00707 {
00708   SET_HANDLER((UpdateConfigManagerContHandler)
00709               & UpdateConfigManager::ProcessUpdate);
00710 }
00711 
00712 UpdateConfigManager::~UpdateConfigManager()
00713 {
00714 }
00715 
00716 int
00717 UpdateConfigManager::init()
00718 {
00719   update_rsb = RecAllocateRawStatBlock((int) update_stat_count);
00720 
00721   _CP_actual = new UpdateConfigParams;
00722 
00723   
00724 
00725   UpdateEstablishStaticConfigInteger(_CP_actual->_enabled, "proxy.config.update.enabled");
00726 
00727   UpdateEstablishStaticConfigInteger(_CP_actual->_immediate_update, "proxy.config.update.force");
00728 
00729   UpdateEstablishStaticConfigInteger(_CP_actual->_retry_count, "proxy.config.update.retry_count");
00730 
00731   UpdateEstablishStaticConfigInteger(_CP_actual->_retry_interval, "proxy.config.update.retry_interval");
00732 
00733   UpdateEstablishStaticConfigInteger(_CP_actual->_concurrent_updates, "proxy.config.update.concurrent_updates");
00734 
00735   UpdateEstablishStaticConfigInteger(_CP_actual->_max_update_state_machines,
00736                                      "proxy.config.update.max_update_state_machines");
00737 
00738   UpdateEstablishStaticConfigInteger(_CP_actual->_memory_use_in_mb, "proxy.config.update.memory_use_mb");
00739 
00740   
00741 
00742   RecRegisterRawStat(update_rsb, RECT_PROCESS,
00743                      "proxy.process.update.successes",
00744                      RECD_INT, RECP_NON_PERSISTENT, (int) update_successes_stat, RecRawStatSyncCount);
00745   UPDATE_CLEAR_DYN_STAT(update_successes_stat);
00746 
00747   RecRegisterRawStat(update_rsb, RECT_PROCESS,
00748                      "proxy.process.update.no_actions",
00749                      RECD_INT, RECP_NON_PERSISTENT, (int) update_no_actions_stat, RecRawStatSyncCount);
00750   UPDATE_CLEAR_DYN_STAT(update_no_actions_stat);
00751 
00752   RecRegisterRawStat(update_rsb, RECT_PROCESS,
00753                      "proxy.process.update.fails",
00754                      RECD_INT, RECP_NON_PERSISTENT, (int) update_fails_stat, RecRawStatSyncCount);
00755   UPDATE_CLEAR_DYN_STAT(update_fails_stat);
00756 
00757   RecRegisterRawStat(update_rsb, RECT_PROCESS,
00758                      "proxy.process.update.unknown_status",
00759                      RECD_INT, RECP_NON_PERSISTENT, (int) update_unknown_status_stat, RecRawStatSyncCount);
00760   UPDATE_CLEAR_DYN_STAT(update_unknown_status_stat);
00761 
00762   RecRegisterRawStat(update_rsb, RECT_PROCESS,
00763                      "proxy.process.update.state_machines",
00764                      RECD_INT, RECP_NON_PERSISTENT, (int) update_state_machines_stat, RecRawStatSyncCount);
00765   UPDATE_CLEAR_DYN_STAT(update_state_machines_stat);
00766 
00767   Debug("update",
00768         "Update params: enable %" PRId64" force %" PRId64" rcnt %" PRId64" rint %" PRId64" updates %" PRId64" "
00769         "max_sm %" PRId64" mem %" PRId64"",
00770         _CP_actual->_enabled, _CP_actual->_immediate_update,
00771         _CP_actual->_retry_count, _CP_actual->_retry_interval,
00772         _CP_actual->_concurrent_updates, _CP_actual->_max_update_state_machines, _CP_actual->_memory_use_in_mb);
00773 
00774   
00775 
00776   _CP = new UpdateConfigParams(*_CP_actual);
00777 
00778   
00779 
00780   SetFileName((char *) "update.config");
00781   REC_RegisterConfigUpdateFunc("proxy.config.update.update_configuration", URL_list_update_callout, (void *) this);
00782 
00783   
00784 
00785   handleEvent(EVENT_IMMEDIATE, (Event *) NULL);
00786 
00787   
00788 
00789   _periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
00790 
00791   return 0;
00792 }
00793 
00794 int
00795 UpdateConfigManager::GetConfigParams(Ptr<UpdateConfigParams> *P)
00796 {
00797   MUTEX_TRY_LOCK(lock, mutex, this_ethread());
00798   if (!lock) {
00799     return 0;                   
00800   } else {
00801     *P = _CP;
00802     return 1;                   
00803   }
00804 }
00805 
00806 int
00807 UpdateConfigManager::GetConfigList(Ptr<UpdateConfigList> *L)
00808 {
00809   MUTEX_TRY_LOCK(lock, mutex, this_ethread());
00810   if (!lock) {
00811     return 0;                   
00812   } else {
00813     *L = _CL;
00814     return 1;                   
00815   }
00816 }
00817 
00818 int
00819 UpdateConfigManager::URL_list_update_callout(const char *, RecDataT ,
00820                                              RecData data, void *cookie)
00821 {
00822   UpdateConfigManager *cm = (UpdateConfigManager *) cookie;
00823   cm->SetFileName((char *) data.rec_string);
00824 
00825 
00826   
00827   
00828 
00829   eventProcessor.schedule_imm(cm, ET_CACHE);
00830 
00831   return 0;
00832 }
00833 
00834 int
00835 UpdateConfigManager::ProcessUpdate(int event, Event * e)
00836 {
00837   if (event == EVENT_IMMEDIATE) {
00838 
00839     
00840 
00841 
00842     UpdateConfigList *l = NULL;
00843 
00844     l = BuildUpdateList();
00845     if (l) {
00846       _CL = l;
00847     }
00848     return EVENT_DONE;
00849   }
00850 
00851   if (event == EVENT_INTERVAL) {
00852 
00853     
00854 
00855 
00856     UpdateConfigParams *p = new UpdateConfigParams(*_CP_actual);
00857 
00858     if (!(*_CP == *p)) {
00859       _CP = p;
00860       Debug("update", "enable %" PRId64" force %" PRId64" rcnt %" PRId64" rint %" PRId64" updates %" PRId64" state machines %" PRId64" mem %" PRId64"",
00861             p->_enabled, p->_immediate_update, p->_retry_count,
00862             p->_retry_interval, p->_concurrent_updates, p->_max_update_state_machines, p->_memory_use_in_mb);
00863     } else {
00864       delete p;
00865     }
00866     return EVENT_DONE;
00867   }
00868   
00869 
00870   Debug("update", "ProcessUpdate: Unknown event %d %p", event, e);
00871   return EVENT_DONE;
00872 }
00873 
00874 UpdateConfigList *
00875 UpdateConfigManager::BuildUpdateList()
00876 {
00877   
00878   ats_scoped_str config_path;
00879 
00880   if (_filename) {
00881     config_path = Layout::get()->relative_to(Layout::get()->sysconfdir, _filename);
00882   } else {
00883     return (UpdateConfigList *) NULL;
00884   }
00885 
00886   int fd = open(config_path, O_RDONLY);
00887   if (fd < 0) {
00888     Warning("read update.config, open failed");
00889     SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, open failed");
00890     return (UpdateConfigList *) NULL;
00891   }
00892   return ParseConfigFile(fd);
00893 }
00894 
00895 int
00896 UpdateConfigManager::GetDataLine(int fd, int bufsize, char *buf, int field_delimiters, int delimiter)
00897 {
00898   char *line = buf;
00899   int linesize = bufsize;
00900   int bytes_read = 0;
00901   int rlen;
00902 
00903   while ((rlen = ink_file_fd_readline(fd, linesize, line)) > 0) {
00904 
00905     
00906     
00907     
00908     
00909 
00910 
00911     if (0 == bytes_read) {
00912       
00913       if (*line == '#') return rlen;
00914       else if (1 == rlen) continue; 
00915     }
00916     bytes_read += rlen;
00917 
00918     
00919 
00920     char *p = buf;
00921     int delimiters_found = 0;
00922 
00923     while (*p) {
00924       if (*p == delimiter) {
00925         delimiters_found++;
00926       }
00927       p++;
00928     }
00929     if (delimiters_found == field_delimiters) {
00930       
00931       return bytes_read;
00932 
00933     } else if ((delimiters_found == (field_delimiters - 1))
00934                && (*(p - 1) == '\n')) {
00935       
00936       
00937 
00938       *(p - 1) = '\\';
00939       return bytes_read;
00940     }
00941     
00942     line += rlen;
00943     linesize -= rlen;
00944   }
00945   return 0;
00946 }
00947 
00948 UpdateConfigList *
00949 UpdateConfigManager::ParseConfigFile(int f)
00950 {
00951   
00952 
00953 
00954 
00955 
00956   enum
00957   { F_URL, F_HEADERS, F_HOUR, F_INTERVAL, F_DEPTH, F_ITEMS };
00958   char *p_start[F_ITEMS];
00959   char *p_end[F_ITEMS];
00960 
00961   char line[MAX_LINE_LENGTH];
00962   char *p;
00963 
00964   int ln = 0;
00965   int i;
00966 
00967   UpdateEntry *e = NULL;
00968   UpdateConfigList *ul = new UpdateConfigList;
00969 
00970   while (GetDataLine(f, sizeof(line) - 1, line, F_ITEMS, '\\') > 0) {
00971     ++ln;
00972     if (*line == '#') {
00973       continue;
00974     } else {
00975       p = line;
00976     }
00977 
00978     
00979 
00980     for (i = 0; i < F_ITEMS; ++i) {
00981       p_start[i] = p;
00982       p_end[i] = strchr(p, '\\');
00983       *p_end[i] = 0;            
00984 
00985       if (p_end[i]) {
00986         p = p_end[i] + 1;
00987       } else {
00988         Warning("read update.config, invalid syntax, line %d", ln);
00989         SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid syntax");
00990         break;
00991       }
00992     }
00993     if (i < F_ITEMS) {
00994       
00995       goto abort_processing;
00996     }
00997     
00998 
00999     e = new UpdateEntry;
01000 
01001 
01002     
01003 
01004     if (e->ValidURL(p_start[F_URL], p_end[F_URL])) {
01005       Warning("read update.config, invalid URL field, line %d", ln);
01006       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid URL field");
01007       goto abort_processing;
01008     }
01009 
01010     
01011 
01012     if (e->ValidHeaders(p_start[F_HEADERS])) {
01013       Warning("read update.config, invalid headers field, line %d", ln);
01014       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid headers field");
01015       goto abort_processing;
01016     }
01017 
01018     
01019 
01020     if (e->BuildHttpRequest()) {
01021       Warning("read update.config, header processing error, line %d", ln);
01022       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, header processing error");
01023       goto abort_processing;
01024     }
01025 
01026     
01027 
01028     if (e->ValidHour(p_start[F_HOUR])) {
01029       Warning("read update.config, invalid hour field, line %d", ln);
01030       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid hour field");
01031       goto abort_processing;
01032     }
01033 
01034     
01035 
01036     if (e->ValidInterval(p_start[F_INTERVAL])) {
01037       Warning("read update.config, invalid interval field, line %d", ln);
01038       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid interval field");
01039       goto abort_processing;
01040     }
01041 
01042     
01043 
01044     if (e->ValidDepth(p_start[F_DEPTH])) {
01045       Warning("read update.config, invalid depth field, line %d", ln);
01046       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid depth field");
01047       goto abort_processing;
01048     }
01049     
01050 
01051     e->Init();
01052     Debug("update",
01053           "[%d] [%s] [%s] nhdrs %d hour %d interval %d depth %d",
01054           e->_id, e->_url, e->_request_headers, e->_num_request_headers, e->_offset_hour, e->_interval, e->_max_depth);
01055     ul->Add(e);
01056     e = NULL;
01057   }
01058 
01059   
01060 
01061   close(f);
01062   return ul;
01063 
01064 abort_processing:
01065   close(f);
01066   if (e) {
01067     delete e;
01068   }
01069   if (ul) {
01070     delete ul;
01071   }
01072   return (UpdateConfigList *) NULL;
01073 }
01074 
01075 
01076 
01077 
01078 
01079 UpdateScheduler::UpdateScheduler(UpdateConfigManager * c)
01080 :Continuation(new_ProxyMutex()), _periodic_event(0),
01081 _recursive_update(0), _CM(c), _schedule_event_callbacks(0), _update_state_machines(0), _base_EN(0), _parent_US(0)
01082 {
01083   SET_HANDLER((UpdateSchedulerContHandler)
01084               & UpdateScheduler::ScheduleEvent);
01085 }
01086 
01087 UpdateScheduler::~UpdateScheduler()
01088 {
01089 }
01090 
01091 int
01092 UpdateScheduler::Init()
01093 {
01094   _recursive_update = 0;
01095   _periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
01096   return 0;
01097 }
01098 
01099 int
01100 UpdateScheduler::Init(UpdateScheduler * us, UpdateEntry * ue, Ptr<UpdateConfigParams> p)
01101 {
01102   ink_assert(ue->_indirect_list->Entries());
01103 
01104   _recursive_update = 1;
01105   _CP = p;
01106   _CL = ue->_indirect_list;
01107   _base_EN = ue;
01108   _parent_US = us;
01109 
01110   
01111 
01112   UpdateEntry *e;
01113   while ((e = _CL->Remove())) {
01114     _CL->AddPending(e);
01115   }
01116   _periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
01117   return 0;
01118 }
01119 
01120 int
01121 UpdateScheduler::ScheduleEvent(int event, void *e)
01122 {
01123   UpdateEntry *ue = NULL;
01124   int update_complete = 1;
01125 
01126   if (event == EVENT_IMMEDIATE) {
01127 
01128     
01129 
01130     ue = (UpdateEntry *) e;
01131 
01132     switch (ue->_update_event_status) {
01133     case UPDATE_EVENT_SUCCESS:
01134       {
01135         Debug("update", "%s update complete, UPDATE_EVENT_SUCCESS id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
01136         UPDATE_INCREMENT_DYN_STAT(update_successes_stat);
01137 
01138         if ((ue->_max_depth > 0) && ue->_indirect_list) {
01139           if (ue->_indirect_list->Entries()) {
01140 
01141             
01142             
01143             
01144             
01145 
01146             Debug("update", "Starting UpdateScheduler for id: %d [%s]", ue->_id, ue->_url);
01147             UpdateScheduler *us = new UpdateScheduler();
01148             us->Init(this, ue, _CP);
01149             update_complete = 0;
01150 
01151           } else {
01152             ue->_indirect_list = NULL;
01153           }
01154         }
01155         break;
01156       }
01157     case UPDATE_EVENT_SUCCESS_NOACTION:
01158       {
01159         Debug("update",
01160               "%s update complete, UPDATE_EVENT_SUCCESS_NOACTION id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
01161         UPDATE_INCREMENT_DYN_STAT(update_no_actions_stat);
01162         break;
01163       }
01164     case UPDATE_EVENT_FAILED:
01165       {
01166         Debug("update", "%s update complete, UPDATE_EVENT_FAILED id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
01167         UPDATE_INCREMENT_DYN_STAT(update_fails_stat);
01168         break;
01169       }
01170     default:
01171       {
01172         Debug("update",
01173               "%s update complete, unknown status %d, id: %d",
01174               (_recursive_update ? "(R)" : ""), ue->_update_event_status, ue->_id);
01175         UPDATE_INCREMENT_DYN_STAT(update_unknown_status_stat);
01176         break;
01177       }
01178     }                           
01179 
01180     if (update_complete) {
01181       if (!_recursive_update) {
01182 
01183         
01184 
01185 
01186         ue->ComputeScheduleTime();
01187         _CL->Add(ue);           
01188 
01189       } else {
01190         delete ue;
01191       }
01192       --_update_state_machines;
01193       UPDATE_DECREMENT_DYN_STAT(update_state_machines_stat);
01194     }
01195 
01196     
01197     
01198 
01199 
01200     if (Schedule() < 0) {
01201       
01202       if (_update_state_machines == 0) {
01203 
01204         
01205 
01206 
01207         _CP = NULL;
01208         _CL = NULL;
01209 
01210         if (_recursive_update) {
01211           
01212           
01213           
01214           
01215           _periodic_event->cancel();
01216           _base_EN->_indirect_list = NULL;
01217           _base_EN->_update_event_status = UPDATE_EVENT_SUCCESS;
01218 
01219           SET_HANDLER((UpdateSchedulerContHandler)
01220                       & UpdateScheduler::ChildExitEventHandler);
01221           handleEvent(EVENT_IMMEDIATE, 0);
01222         }
01223       }
01224     }
01225     return EVENT_DONE;
01226   }
01227 
01228   
01229 
01230   if (event == EVENT_INTERVAL) {
01231     ++_schedule_event_callbacks;
01232   } else {
01233     
01234     Debug("update", "UpdateScheduler::ScheduleEvent unknown event %d", event);
01235     return EVENT_DONE;
01236   }
01237 
01238   if (!_CP && !_CL) {
01239     
01240 
01241     if (!_CM->GetConfigParams(&_CP)) {
01242       return EVENT_CONT;        
01243     }
01244     if (!_CM->GetConfigList(&_CL)) {
01245       _CP = NULL;
01246       return EVENT_CONT;        
01247     }
01248     
01249 
01250     if (!_CP || !_CL) {
01251       _CP = NULL;
01252       _CL = NULL;
01253       return EVENT_CONT;        
01254     }
01255     
01256 
01257     if (!_CP->IsEnabled()) {
01258       _CP = NULL;
01259       _CL = NULL;
01260       return EVENT_CONT;        
01261     }
01262 
01263   } else {
01264 
01265     
01266     
01267 
01268 
01269     Schedule();
01270     return EVENT_CONT;
01271   }
01272   ink_release_assert(!_update_state_machines);
01273 
01274 
01275   
01276 
01277 
01278   ink_hrtime ht = ink_get_based_hrtime();
01279   time_t cur_time = ht / HRTIME_SECOND;
01280   Queue<UpdateEntry> no_action_q;
01281   int time_expired;
01282 
01283   while ((ue = _CL->Remove())) {
01284     time_expired = ue->ScheduleNow(cur_time);
01285     if (time_expired || _CP->ImmediateUpdate()) {
01286       if (Schedule(ue) > 0) {
01287         Debug("update", "%s and started id: %d", time_expired ? "expired" : "force expire", ue->_id);
01288       } else {
01289         Debug("update", "%s with deferred start id: %d", time_expired ? "expired" : "force expire", ue->_id);
01290       }
01291 
01292     } else {
01293       no_action_q.enqueue(ue);
01294     }
01295   }
01296 
01297   
01298 
01299   while ((ue = no_action_q.dequeue())) {
01300     _CL->Add(ue);
01301   }
01302 
01303   if (!_update_state_machines && !_CL->_pending_q.head) {
01304     
01305     
01306 
01307     _CP = NULL;
01308     _CL = NULL;
01309   }
01310   return EVENT_DONE;
01311 }
01312 
01313 int
01314 UpdateScheduler::ChildExitEventHandler(int event, Event * )
01315 {
01316   switch (event) {
01317   case EVENT_IMMEDIATE:
01318   case EVENT_INTERVAL:
01319     {
01320       MUTEX_TRY_LOCK(lock, _parent_US->mutex, this_ethread());
01321       if (lock) {
01322         Debug("update", "Child UpdateScheduler exit id: %d", _base_EN->_id);
01323         _parent_US->handleEvent(EVENT_IMMEDIATE, _base_EN);
01324         delete this;
01325 
01326       } else {
01327         
01328         eventProcessor.schedule_in(this, HRTIME_MSECONDS(10));
01329       }
01330       break;
01331     }
01332   default:
01333     {
01334       ink_release_assert(!"UpdateScheduler::ChildExitEventHandler invalid event");
01335     }                           
01336   }                             
01337 
01338   return EVENT_DONE;
01339 }
01340 
01341 int
01342 UpdateScheduler::Schedule(UpdateEntry * e)
01343 {
01344   
01345   
01346   
01347 
01348   UpdateSM *usm;
01349   UpdateEntry *ue = e;
01350   int allow_schedule;
01351   RecInt count, sum;
01352   int max_concurrent_updates;
01353 
01354   UPDATE_READ_DYN_STAT(update_state_machines_stat, count, sum);
01355   if (_CP->ConcurrentUpdates() < _CP->MaxUpdateSM()) {
01356     max_concurrent_updates = _CP->ConcurrentUpdates();
01357   } else {
01358     max_concurrent_updates = _CP->MaxUpdateSM();
01359   }
01360   allow_schedule = (sum < max_concurrent_updates);
01361 
01362   if (allow_schedule) {
01363     ue = ue ? ue : _CL->RemovePending();
01364     if (ue) {
01365       ++_update_state_machines;
01366       UPDATE_INCREMENT_DYN_STAT(update_state_machines_stat);
01367       usm = new UpdateSM(this, _CP, ue);
01368       usm->Start();
01369 
01370       Debug("update", "%s %s start update id: %d [%s]",
01371             (_recursive_update ? "(R)" : ""), (e ? "directed" : "speculative"), ue->_id, ue->_url);
01372 
01373       return 1;                 
01374     } else {
01375       return -1;                
01376     }
01377 
01378   } else {
01379     if (ue) {
01380       _CL->AddPending(ue);
01381     }
01382     return 0;                   
01383   }
01384 }
01385 
01386 
01387 
01388 
01389 
01390 UpdateSM::UpdateSM(UpdateScheduler * us, Ptr<UpdateConfigParams> p, UpdateEntry * e)
01391 :Continuation(new_ProxyMutex()), _state(USM_INIT), _return_status(0), _retries(0)
01392 {
01393   SET_HANDLER((UpdateSMContHandler) & UpdateSM::HandleSMEvent);
01394   _US = us;
01395   _CP = p;
01396   _EN = e;
01397 }
01398 
01399 UpdateSM::~UpdateSM()
01400 {
01401   _CP = NULL;                   
01402 }
01403 
01404 void
01405 UpdateSM::Start()
01406 {
01407   eventProcessor.schedule_imm(this, ET_CACHE);
01408 }
01409 
01410 int
01411 UpdateSM::HandleSMEvent(int event, Event * )
01412 {
01413   while (1) {
01414     switch (_state) {
01415     case USM_INIT:
01416       {
01417 
01418         
01419         
01420         
01421         
01422 
01423         if (_EN->_max_depth > 0) {
01424           
01425           _state = USM_PROCESS_URL;
01426           break;
01427         }
01428 
01429         INK_MD5 url_md5;
01430 
01431         Cache::generate_key(&url_md5, &_EN->_URLhandle);
01432         ClusterMachine *m = cluster_machine_at_depth(cache_hash(url_md5));
01433         if (m) {
01434           
01435           _state = USM_EXIT;
01436           _EN->_update_event_status = UPDATE_EVENT_SUCCESS_NOACTION;
01437           break;
01438         } else {
01439           
01440           _state = USM_PROCESS_URL;
01441           break;
01442         }
01443       }
01444     case USM_PROCESS_URL:
01445       {
01446 
01447         
01448 
01449         int n;
01450         int scheme_len;
01451         const char *scheme;
01452         _state = USM_PROCESS_URL_COMPLETION;
01453         scheme = _EN->_URLhandle.scheme_get(&scheme_len);
01454         for (n = 0; n < N_SCHEMES; ++n) {
01455           if (scheme == *scheme_dispatch_table[n].scheme) {
01456             _EN->_scheme_index = n;
01457             if ((*scheme_dispatch_table[n].func) (this)) {
01458               break;            
01459             }
01460             return EVENT_CONT;
01461           }
01462         }
01463         
01464 
01465         _state = USM_EXIT;
01466         _EN->_update_event_status = UPDATE_EVENT_FAILED;
01467         break;
01468       }
01469     case USM_PROCESS_URL_COMPLETION:
01470       {
01471 
01472         
01473 
01474         _state = USM_EXIT;
01475         _EN->_update_event_status = event;
01476         (*scheme_post_dispatch_table[_EN->_scheme_index].func) (this);
01477         break;
01478       }
01479     case USM_EXIT:
01480       {
01481 
01482         
01483 
01484         if ((_return_status == UPDATE_EVENT_FAILED)
01485             && (_retries < _CP->RetryCount())) {
01486 
01487           
01488 
01489           ++_retries;
01490           _state = USM_PROCESS_URL;
01491           eventProcessor.schedule_in(this, HRTIME_SECONDS(_CP->RetryInterval()), ET_CACHE);
01492           return EVENT_DONE;
01493 
01494         } else {
01495           MUTEX_TRY_LOCK(lock, _US->mutex, this_ethread());
01496           if (lock) {
01497             _US->handleEvent(EVENT_IMMEDIATE, (void *) _EN);
01498             delete this;
01499             return EVENT_DONE;
01500 
01501           } else {
01502             
01503             eventProcessor.schedule_in(this, HRTIME_MSECONDS(10), ET_CACHE);
01504             return EVENT_CONT;
01505           }
01506         }
01507       }
01508     }                           
01509   }                             
01510 
01511   return EVENT_CONT;
01512 }
01513 
01514 struct dispatch_entry scheme_dispatch_table[UpdateSM::N_SCHEMES] = {
01515   {&URL_SCHEME_HTTP, UpdateSM::http_scheme},
01516 };
01517 
01518 struct dispatch_entry scheme_post_dispatch_table[UpdateSM::N_SCHEMES] = {
01519   {&URL_SCHEME_HTTP, UpdateSM::http_scheme_postproc},
01520 };
01521 
01522 int
01523 UpdateSM::http_scheme(UpdateSM * sm)
01524 {
01525   if (sm->_EN->_max_depth > 0) {
01526 
01527     
01528 
01529     Debug("update", "Start recursive HTTP GET id: %d [%s]", sm->_EN->_id, sm->_EN->_url);
01530     sm->_EN->_indirect_list = new UpdateConfigList;
01531     RecursiveHttpGet *RHttpGet = new RecursiveHttpGet;
01532 
01533     RHttpGet->Init(sm, sm->_EN->_url, sm->_EN->_request_headers,
01534                    &sm->_EN->_URLhandle, sm->_EN->_http_hdr,
01535                    sm->_EN->_max_depth, sm->_EN->_indirect_list, &update_allowable_html_tags[0]);
01536   } else {
01537 
01538     
01539 
01540     Debug("update", "Start HTTP GET id: %d [%s]", sm->_EN->_id, sm->_EN->_url);
01541     HttpUpdateSM *current_reader;
01542 
01543     current_reader = HttpUpdateSM::allocate();
01544     current_reader->init();
01545     
01546     current_reader->start_scheduled_update(sm, sm->_EN->_http_hdr);
01547   }
01548   return 0;
01549 }
01550 
01551 int
01552 UpdateSM::http_scheme_postproc(UpdateSM * sm)
01553 {
01554   
01555 
01556   switch (sm->_EN->_update_event_status) {
01557   case UPDATE_EVENT_SUCCESS:
01558   case UPDATE_EVENT_FAILED:
01559     
01560     sm->_return_status = sm->_EN->_update_event_status;
01561     break;
01562 
01563   case HTTP_SCH_UPDATE_EVENT_WRITTEN:
01564   case HTTP_SCH_UPDATE_EVENT_UPDATED:
01565   case HTTP_SCH_UPDATE_EVENT_DELETED:
01566   case HTTP_SCH_UPDATE_EVENT_NOT_CACHED:
01567   case HTTP_SCH_UPDATE_EVENT_NO_ACTION:
01568     sm->_EN->_update_event_status = UPDATE_EVENT_SUCCESS;
01569     sm->_return_status = UPDATE_EVENT_SUCCESS;
01570     break;
01571 
01572   case HTTP_SCH_UPDATE_EVENT_ERROR:
01573   default:
01574     sm->_EN->_update_event_status = UPDATE_EVENT_FAILED;
01575     sm->_return_status = UPDATE_EVENT_FAILED;
01576     break;
01577   }
01578   return 0;
01579 }
01580 
01581 
01582 
01583 
01584 
01585 
01586 char
01587   HtmlParser::default_zero_char = '\0';
01588 
01589 RecursiveHttpGet::RecursiveHttpGet()
01590 :Continuation(new_ProxyMutex()), _id(0), _caller_cont(0),
01591 _request_headers(0), _http_hdr(0), _recursion_depth(0), _OL(0), _group_link_head(0), _active_child_state_machines(0)
01592 {
01593   SET_HANDLER((RecursiveHttpGetContHandler)
01594               & RecursiveHttpGet::RecursiveHttpGetEvent);
01595 }
01596 
01597 RecursiveHttpGet::~RecursiveHttpGet()
01598 {
01599   _CL = NULL;
01600 }
01601 
01602 void
01603 RecursiveHttpGet::Init(Continuation * cont, char *url, char *request_headers,
01604                        URL * url_data, HTTPHdr * http_hdr, int recursion_depth,
01605                        Ptr<UpdateConfigList> L, struct html_tag *allowed_html_tags)
01606 {
01607 
01608   
01609   
01610 
01611   _id = ink_atomic_increment(&global_id, 1);
01612   _caller_cont = cont;
01613   _request_headers = request_headers;
01614   _url_data = url_data;
01615   _http_hdr = http_hdr;
01616   _recursion_depth = recursion_depth;
01617   _CL = L;
01618   _OL = ObjectReloadContAllocator.alloc();
01619   _OL->Init(this, url, strlen(url), _request_headers, (_request_headers ? strlen(_request_headers) : 0), 1, 1);
01620 
01621   html_parser.Init(url, allowed_html_tags);
01622 
01623   Debug("update", "Start recursive read rid: %d [%s]", _id, html_parser._url);
01624 }
01625 
01626 int
01627 RecursiveHttpGet::RecursiveHttpGetEvent(int event, Event * d)
01628 {
01629   char *url, *url_end;
01630   int status;
01631   UpdateEntry *ue;
01632   IOBufferReader *r = (IOBufferReader *) d;
01633 
01634   switch (event) {
01635   case NET_EVENT_OPEN_FAILED:
01636     {
01637       Debug("update", "RecursiveHttpGetEvent connect failed id: %d [%s]", _id, html_parser._url);
01638       break;
01639     }
01640   case VC_EVENT_ERROR:
01641     {
01642       Debug("update", "RecursiveHttpGetEvent connect event error id: %d [%s]", _id, html_parser._url);
01643       break;
01644     }
01645   case VC_EVENT_READ_READY:
01646   case VC_EVENT_READ_COMPLETE:
01647   case VC_EVENT_EOS:
01648     {
01649       while ((status = html_parser.ParseHtml(r, &url, &url_end))) {
01650         
01651 
01652         ue = new UpdateEntry;
01653         if (ue->ValidURL(url, url_end + 1  )) {
01654           delete ue;
01655           ue = NULL;
01656 
01657         } else {
01658           
01659 
01660           ue->_request_headers = ats_strdup(_request_headers);
01661           ue->BuildHttpRequest();
01662           ue->Init(1);          
01663 
01664           
01665           int ue_host_len;
01666           const char *ue_host = ue->_URLhandle.host_get(&ue_host_len);
01667           int url_host_len;
01668           const char *url_host = _url_data->host_get(&url_host_len);
01669 
01670           if (ue_host == NULL || url_host == NULL || ptr_len_casecmp(ue_host, ue_host_len, url_host, url_host_len)) {
01671             delete ue;
01672             ue = NULL;
01673             continue;
01674           }
01675           
01676           
01677           ue->_URLhandle.hash_get(&ue->_url_md5);
01678 
01679           if (_CL->HashAdd(ue)) {
01680             
01681 
01682             delete ue;
01683             ue = NULL;
01684 
01685           } else {
01686             
01687             
01688             
01689 
01690             ue->SetTerminalStatus(((status < 0) ? 1 : 0));
01691             Debug("update", "Recursive find rid: %d id: %d %s\n [%s]",
01692                   _id, ue->_id, (ue->TerminalURL()? "T " : ""), ue->_url);
01693 
01694             if (_group_link_head) {
01695               ue->_group_link = _group_link_head;
01696               _group_link_head = ue;
01697             } else {
01698               _group_link_head = ue;
01699               ue->_group_link = NULL;
01700             }
01701           }
01702         }
01703       }
01704       ink_release_assert(r->read_avail() == 0);
01705 
01706       if ((event == VC_EVENT_READ_COMPLETE)
01707           || (event == VC_EVENT_EOS)) {
01708         break;
01709 
01710       } else {
01711         return EVENT_CONT;
01712       }
01713     }
01714   case UPDATE_EVENT_SUCCESS:
01715   case UPDATE_EVENT_FAILED:
01716     {
01717       
01718 
01719       ink_release_assert(_active_child_state_machines > 0);
01720       _active_child_state_machines--;
01721       break;
01722     }
01723   default:
01724     {
01725       ink_release_assert(!"RecursiveHttpGetEvent invalid event");
01726       return EVENT_DONE;
01727 
01728     }                           
01729   }                             
01730 
01731   if (_group_link_head) {
01732     
01733     
01734     
01735 
01736     while (_group_link_head) {
01737       ue = _group_link_head;
01738       _group_link_head = ue->_group_link;
01739 
01740       if (!ue->TerminalURL()) {
01741         if (_recursion_depth <= 1) {
01742           continue;
01743         }
01744 
01745         Debug("update", "(R) start non-terminal HTTP GET rid: %d id: %d [%s]", _id, ue->_id, ue->_url);
01746 
01747         _active_child_state_machines++;
01748         RecursiveHttpGet *RHttpGet = new RecursiveHttpGet();
01749         RHttpGet->Init(this, ue->_url, _request_headers,
01750                        _url_data, _http_hdr, (_recursion_depth - 1), _CL, &update_allowable_html_tags[0]);
01751         return EVENT_CONT;
01752 
01753       }
01754     }
01755   }
01756   
01757   
01758 
01759   SET_HANDLER((RecursiveHttpGetContHandler)
01760               & RecursiveHttpGet::ExitEventHandler);
01761   handleEvent(EVENT_IMMEDIATE, 0);
01762   return EVENT_DONE;
01763 }
01764 
01765 int
01766 RecursiveHttpGet::ExitEventHandler(int event, Event * )
01767 {
01768   switch (event) {
01769   case EVENT_IMMEDIATE:
01770   case EVENT_INTERVAL:
01771     {
01772       MUTEX_TRY_LOCK(lock, _caller_cont->mutex, this_ethread());
01773       if (lock) {
01774         Debug("update", "Exiting recursive read rid: %d [%s]", _id, html_parser._url);
01775         _caller_cont->handleEvent(UPDATE_EVENT_SUCCESS, 0);
01776         delete this;
01777 
01778       } else {
01779         
01780         eventProcessor.schedule_in(this, HRTIME_MSECONDS(10));
01781       }
01782       break;
01783     }
01784   default:
01785     {
01786       ink_release_assert(!"RecursiveHttpGet::ExitEventHandler invalid event");
01787     }                           
01788   }                             
01789 
01790   return EVENT_DONE;
01791 }
01792 
01793 int
01794 HtmlParser::ParseHtml(IOBufferReader * r, char **url, char **url_end)
01795 {
01796   int status;
01797   while (1) {
01798     if ((status = ScanHtmlForURL(r, url, url_end))) {
01799       status = ConstructURL(url, url_end);
01800       if (status)
01801         return status;
01802     } else {
01803       return 0;                 
01804     }
01805   }
01806 }
01807 
01808 int
01809 HtmlParser::ScanHtmlForURL(IOBufferReader * r, char **url, char **url_end)
01810 {
01811   unsigned char c;
01812   int n = 0;
01813 
01814   while (1) {
01815     switch (_scan_state) {
01816     case SCAN_INIT:
01817       {
01818         _tag.clear();
01819 
01820         _attr.clear();
01821         _attr_value.clear();
01822         _attr_value_hash_char_index = -1;
01823         _attr_value_quoted = 0;
01824         _attr_matched = false;
01825 
01826         _scan_state = SCAN_START;
01827         n = -1;
01828         break;
01829       }
01830     case SCAN_START:
01831       {
01832         while ((n = r->read((char *) &c, 1))) {
01833           if (c == '<') {
01834             _scan_state = FIND_TAG_START;
01835             break;
01836           }
01837         }
01838         break;
01839       }
01840     case FIND_TAG_START:
01841       {
01842         while ((n = r->read((char *) &c, 1))) {
01843           if (!isspace(c)) {
01844             if (c == '>') {
01845 
01846               
01847 
01848               _scan_state = SCAN_INIT;
01849               break;
01850 
01851             } else {
01852               _tag(_tag.length()) = c;
01853               _scan_state = COPY_TAG;
01854               break;
01855             }
01856           }
01857         }
01858         break;
01859       }
01860     case COPY_TAG:
01861       {
01862         while ((n = r->read((char *) &c, 1))) {
01863           if (!isspace(c)) {
01864             if (c == '>') {
01865 
01866               
01867 
01868               _scan_state = SCAN_INIT;
01869               break;
01870 
01871             } else if (c == '=') {
01872 
01873               
01874 
01875               _scan_state = SCAN_INIT;
01876               break;
01877 
01878             } else {
01879               if (_tag.length() < MAX_TAG_NAME_LENGTH) {
01880                 _tag(_tag.length()) = c;
01881 
01882               } else {
01883 
01884                 
01885 
01886                 _scan_state = SCAN_INIT;
01887                 break;
01888               }
01889             }
01890 
01891           } else {
01892             _tag(_tag.length()) = 0;
01893             if (strcmp(_tag, HTML_COMMENT_TAG) == 0) {
01894               _scan_state = IGNORE_COMMENT_START;
01895             } else {
01896               _scan_state = FIND_ATTR_START;
01897             }
01898             break;
01899           }
01900         }
01901         break;
01902       }
01903     case IGNORE_COMMENT_START:
01904       {
01905         _comment_end_ptr = (char *) HTML_COMMENT_END;
01906         _scan_state = IGNORE_COMMENT;
01907         break;
01908       }
01909     case IGNORE_COMMENT:
01910       {
01911         while ((n = r->read((char *) &c, 1))) {
01912           if (!isspace(c)) {
01913             if (c == *_comment_end_ptr) {
01914               _comment_end_ptr++;
01915               if (!*_comment_end_ptr) {
01916                 _scan_state = SCAN_INIT;
01917                 break;
01918               }
01919             } else {
01920               _comment_end_ptr = (char *) HTML_COMMENT_END;
01921             }
01922           }
01923         }
01924         break;
01925       }
01926     case FIND_ATTR_START:
01927       {
01928         while ((n = r->read((char *) &c, 1))) {
01929           if (!isspace(c)) {
01930             if (c == '>') {
01931 
01932               
01933 
01934               _scan_state = SCAN_INIT;
01935               break;
01936 
01937             } else if (c == '=') {
01938 
01939               
01940 
01941               _scan_state = SCAN_INIT;
01942               break;
01943 
01944             } else {
01945               _attr(_attr.length()) = c;
01946               _scan_state = COPY_ATTR;
01947               break;
01948             }
01949           }
01950         }
01951         break;
01952       }
01953     case COPY_ATTR:
01954       {
01955         while ((n = r->read((char *) &c, 1))) {
01956           if (!isspace(c)) {
01957             if (c == '>') {
01958 
01959               
01960 
01961               _scan_state = SCAN_INIT;
01962               break;
01963 
01964             } else if (c == '=') {
01965 
01966               
01967 
01968               _attr(_attr.length()) = 0;
01969               _scan_state = FIND_ATTR_VALUE_START;
01970               break;
01971 
01972             } else {
01973               if (_attr.length() < MAX_ATTR_NAME_LENGTH) {
01974                 _attr(_attr.length()) = c;
01975 
01976               } else {
01977 
01978                 
01979 
01980                 _scan_state = SCAN_INIT;
01981                 break;
01982               }
01983             }
01984 
01985           } else {
01986             _attr(_attr.length()) = 0;
01987             _scan_state = FIND_ATTR_VALUE_DELIMITER;
01988             break;
01989           }
01990         }
01991         break;
01992       }
01993     case FIND_ATTR_VALUE_DELIMITER:
01994       {
01995         while ((n = r->read((char *) &c, 1))) {
01996           if (isspace(c) || (c == '=')) {
01997             if (c == '=') {
01998               _scan_state = FIND_ATTR_VALUE_START;
01999               break;
02000             }
02001           } else {
02002             _scan_state = SCAN_INIT;
02003             break;
02004           }
02005         }
02006         break;
02007       }
02008     case FIND_ATTR_VALUE_START:
02009       {
02010         while ((n = r->read((char *) &c, 1))) {
02011           if (!isspace(c)) {
02012             if (c == '>') {
02013 
02014               
02015 
02016               _scan_state = SCAN_INIT;
02017               break;
02018 
02019             } else if ((c == '\'') || (c == '\"')) {
02020               _attr_value_quoted = c;
02021               _scan_state = COPY_ATTR_VALUE;
02022               break;
02023 
02024             } else {
02025               _attr_value_quoted = 0;
02026               _attr_value(_attr_value.length()) = c;
02027               _scan_state = COPY_ATTR_VALUE;
02028               break;
02029             }
02030           }
02031         }
02032         break;
02033       }
02034     case COPY_ATTR_VALUE:
02035       {
02036         while ((n = r->read((char *) &c, 1))) {
02037           if (_attr_value_quoted) {
02038             if (c == _attr_value_quoted) {
02039 
02040               
02041 
02042               _attr_value(_attr_value.length()) = 0;
02043               _scan_state = VALIDATE_ENTRY;
02044               break;
02045 
02046             } else if (c == '\n') {
02047               _scan_state = TERMINATE_COPY_ATTR_VALUE;
02048               break;
02049             } else {
02050               _attr_value(_attr_value.length()) = c;
02051               if (c == '#') {
02052                 _attr_value_hash_char_index = _attr_value.length() - 1;
02053               }
02054             }
02055 
02056           } else {
02057             if (isspace(c)) {
02058 
02059               
02060 
02061               _attr_value(_attr_value.length()) = 0;
02062               _scan_state = VALIDATE_ENTRY;
02063               break;
02064 
02065             } else if (c == '>') {
02066 
02067               
02068 
02069               _attr_value(_attr_value.length()) = 0;
02070               _scan_state = VALIDATE_ENTRY_RESTART;
02071               break;
02072 
02073             } else {
02074               _attr_value(_attr_value.length()) = c;
02075               if (c == '#') {
02076                 _attr_value_hash_char_index = _attr_value.length() - 1;
02077               }
02078             }
02079           }
02080         }
02081         break;
02082       }
02083     case VALIDATE_ENTRY:
02084     case VALIDATE_ENTRY_RESTART:
02085       {
02086         if (_scan_state == VALIDATE_ENTRY) {
02087           _scan_state = RESUME_ATTR_VALUE_SCAN;
02088         } else {
02089           _scan_state = SCAN_INIT;
02090         }
02091         if (AllowTagAttrValue()) {
02092           if (ExtractURL(url, url_end)) {
02093             return 1;           
02094           }
02095         }
02096         break;                  
02097       }
02098     case RESUME_ATTR_VALUE_SCAN:
02099       {
02100         _attr.clear();
02101         _attr_value.clear();
02102         _attr_value_hash_char_index = -1;
02103         _attr_value_quoted = 0;
02104 
02105         _scan_state = FIND_ATTR_START;
02106         n = -2;
02107         break;
02108       }
02109     case TERMINATE_COPY_ATTR_VALUE:
02110       {
02111         while ((n = r->read((char *) &c, 1))) {
02112           if (c == _attr_value_quoted) {
02113             _scan_state = RESUME_ATTR_VALUE_SCAN;
02114             break;
02115           }
02116         }
02117         break;
02118       }
02119     default:
02120       {
02121         ink_release_assert(!"HtmlParser::ScanHtmlForURL bad state");
02122       }
02123     }                           
02124 
02125     if (n == 0) {
02126       return 0;                 
02127     }
02128 
02129   }                             
02130 }
02131 
02132 int
02133 HtmlParser::AllowTagAttrValue()
02134 {
02135   struct html_tag *p_tag = allowable_html_tags;
02136   struct html_tag *p_attr = allowable_html_attrs;
02137 
02138   if (!_tag || !_attr)
02139     return 0;
02140 
02141   while (p_tag->tag && p_tag->attr) {
02142     if (!strcasecmp(_tag, p_tag->tag)
02143         && !strcasecmp(_attr, p_tag->attr)) {
02144       if (p_attr == NULL || p_attr->tag == NULL)
02145         return 1;
02146       else if (_attr_matched) {
02147         return 1;
02148       } else {
02149         
02150         return 0;
02151       }
02152     } else {
02153       if (p_attr && p_attr->tag && p_attr->attr && _attr_value.length() > 0) {
02154         if (!strcasecmp(_attr, p_attr->tag)
02155             && !strcasecmp(_attr_value, p_attr->attr)) {
02156           _attr_matched = true;
02157         }
02158       }
02159       p_tag++;
02160       if (p_attr)
02161         p_attr++;
02162     }
02163   }
02164   return 0;
02165 }
02166 
02167 int
02168 HtmlParser::ValidProtoScheme(char *p)
02169 {
02170   int n;
02171   for (n = 0; proto_schemes[n].tag; ++n) {
02172     if (!strncasecmp(p, proto_schemes[n].tag, proto_schemes[n].tag_len)) {
02173       return 1;
02174     }
02175   }
02176   return 0;
02177 }
02178 
02179 int
02180 HtmlParser::ValidSupportedProtoScheme(char *p)
02181 {
02182   int n;
02183   for (n = 0; supported_proto_schemes[n].tag; ++n) {
02184     if (!strncasecmp(p, supported_proto_schemes[n].tag, supported_proto_schemes[n].tag_len)) {
02185       return 1;
02186     }
02187   }
02188   return 0;
02189 }
02190 
02191 int
02192 HtmlParser::ExtractURL(char **url, char **url_end)
02193 {
02194   intptr_t n;
02195 
02196   
02197   if (_attr_value_hash_char_index >= 0) {
02198     if (!_attr_value_hash_char_index) {
02199       return 0;                 
02200 
02201     } else {
02202       
02203       _attr_value.set_length(_attr_value_hash_char_index + 1);
02204       _attr_value[_attr_value_hash_char_index] = 0;
02205     }
02206   }
02207 
02208   if (!strcasecmp(_tag, "base") && !strcasecmp(_attr, "href")) {
02209     if (_html_doc_base) {
02210       _html_doc_base.clear();
02211     }
02212     for (n = 0; n < _attr_value.length(); ++n) {
02213       _html_doc_base(_html_doc_base.length()) = _attr_value[n];
02214     }
02215     _html_doc_base(_html_doc_base.length()) = 0;
02216     return 0;                   
02217 
02218   } else if (!strcasecmp(_tag, "meta") && !strcasecmp(_attr, "content")) {
02219 
02220     
02221     
02222 
02223     if (_attr_value.length()) {
02224       
02225       for (n = 0; n < _attr_value.length(); ++n) {
02226         if (!ParseRules::is_digit((unsigned char) _attr_value[n])) {
02227           break;
02228         }
02229       }
02230       if ((n < _attr_value.length()) && (((unsigned char) _attr_value[n]) == ';')) {
02231 
02232         for (; n < _attr_value.length(); ++n) {
02233           if (!isspace((unsigned char) _attr_value[n])) {
02234             break;
02235           }
02236         }
02237         if ((n < _attr_value.length()) && (!strncasecmp(&_attr_value[n], "URL=", 4))) {
02238           n += 4;
02239           if ((n < _attr_value.length())
02240               && ((_attr_value.length() - n) > 1)) {
02241             *url = &_attr_value[n];
02242             *url_end = &_attr_value[_attr_value.length() - 2];
02243             return 1;
02244           }
02245         }
02246       }
02247       return 0;                 
02248 
02249     } else {
02250       return 0;                 
02251     }
02252   }
02253 
02254   if (_attr_value.length() > 1) {
02255     *url = &_attr_value[(intptr_t)0];
02256     *url_end = &_attr_value[_attr_value.length() - 2];
02257     return 1;
02258 
02259   } else {
02260     return 0;                   
02261   }
02262 }
02263 
02264 int
02265 HtmlParser::ConstructURL(char **url, char **url_end)
02266 {
02267   unsigned char *p_url = (unsigned char *) *url;
02268   unsigned char *p_url_end = (unsigned char *) *url_end;
02269 
02270 
02271   
02272 
02273   while (p_url < p_url_end) {
02274     if (isspace(*p_url)) {
02275       ++p_url;
02276     } else {
02277       break;
02278     }
02279   }
02280 
02281 
02282   
02283 
02284   int relative_URL = 0;
02285   int http_needed = 0;
02286   if (ValidProtoScheme((char *) p_url)) {
02287     if (!strncasecmp((char *) p_url, "http:", 5)
02288         && (strncasecmp((char *) p_url, "http://", 7) != 0)) {
02289 
02290 
02291       
02292       
02293 
02294       p_url += strlen("http:");
02295       if (p_url > p_url_end) {
02296         return 0;               
02297       }
02298       relative_URL = 1;
02299     }
02300   } else {
02301     relative_URL = 1;
02302     
02303     if (strncasecmp((char *) p_url, "//", 2) == 0)
02304       http_needed = 1;
02305   }
02306 
02307 
02308   
02309 
02310   if (!relative_URL && !ValidSupportedProtoScheme((char *) p_url)) {
02311     return 0;                   
02312   }
02313 
02314   if (relative_URL) {
02315 
02316     
02317 
02318     DynArray<char>*base = 0;
02319     DynArray<char>*absolute_url = 0;
02320 
02321     if (http_needed) {
02322       absolute_url = PrependString("http:", 5, (char *) p_url, (p_url_end - p_url + 2));
02323     } else if (_html_doc_base.length()) {
02324 
02325       
02326 
02327       base = MakeURL(_url, _html_doc_base, _html_doc_base.length(), !ValidProtoScheme(_html_doc_base));
02328       absolute_url = MakeURL(*base, (char *) p_url, (p_url_end - p_url + 2), 1);
02329     } else {
02330       absolute_url = MakeURL(_url, (char *) p_url, (p_url_end - p_url + 2), 1);
02331     }
02332     _result.clear();
02333     _result = *absolute_url;
02334     absolute_url->detach();
02335 
02336     
02337     delete absolute_url;
02338     if (base)
02339       delete base;
02340 
02341     *url = &_result[(intptr_t)0];
02342     *url_end = &_result[_result.length() - 3];  
02343     
02344     
02345   } else {
02346     *url = (char *) p_url;
02347     *url_end = (char *) p_url_end;
02348   }
02349 
02350 
02351   
02352   
02353   
02354   
02355 
02356   if (!strncasecmp((char *) (p_url_end - 4), ".html", 5)
02357       || !strncasecmp((char *) (p_url_end - 3), ".htm", 4)
02358       || !strncasecmp((char *) (p_url_end), "/", 1)) {
02359     return 1;                   
02360   } else {
02361     return -1;                  
02362   }
02363 }
02364 
02365 DynArray<char>*
02366 HtmlParser::MakeURL(char *url, char *sub, int subsize, int relative_url)
02367 {
02368   int i, n;
02369   int skip_slashslash;
02370 
02371   DynArray<char>*result = new DynArray<char>(&default_zero_char, 128);
02372 
02373   if (relative_url) {
02374     if (*sub != '/') {
02375 
02376       int url_len = strlen(url);
02377 
02378       
02379       for (i = url_len; i && url[i] != '/'; i--);
02380 
02381       if (i && (url[i] == url[i - 1])) {
02382         
02383 
02384         for (n = 0; n < url_len; ++n) {
02385           (*result) (result->length()) = url[n];
02386         }
02387         (*result) (result->length()) = '/';
02388 
02389       } else {
02390         for (n = 0; n < (i + 1); ++n) {
02391           (*result) (result->length()) = url[n];
02392         }
02393       }
02394 
02395       for (n = 0; n < subsize; ++n) {
02396         (*result) (result->length()) = sub[n];
02397       }
02398       (*result) (result->length()) = '\0';
02399 
02400     } else {
02401       i = 0;
02402       do {
02403         
02404         for (; url[i] && url[i] != '/'; i++);
02405 
02406         if (!url[i]) {
02407           break;
02408         }
02409         
02410         skip_slashslash = ((url[i] == url[i + 1]) && (url[i + 1] == '/'));
02411 
02412         if (skip_slashslash) {
02413           i += 2;
02414         }
02415       } while (skip_slashslash);
02416 
02417       for (n = 0; n < (i - 1); ++n) {
02418         (*result) (result->length()) = url[n];
02419       }
02420 
02421       if (url[n] != '/') {
02422         (*result) (result->length()) = url[n];
02423       }
02424 
02425       for (n = 0; n < subsize; ++n) {
02426         (*result) (result->length()) = sub[n];
02427       }
02428       (*result) (result->length()) = '\0';
02429     }
02430 
02431   } else {
02432     for (n = 0; n < subsize; ++n) {
02433       (*result) (result->length()) = sub[n];
02434     }
02435     (*result) (result->length()) = '\0';
02436   }
02437   return result;
02438 }
02439 
02440 DynArray<char>*
02441 HtmlParser::PrependString(const char *pre, int presize, char *sub, int subsize)
02442 {
02443   int n;
02444 
02445   DynArray<char>*result = new DynArray<char>(&default_zero_char, 128);
02446 
02447   for (n = 0; n < presize; ++n) {
02448     (*result) (result->length()) = pre[n];
02449   }
02450   for (n = 0; n < subsize; ++n) {
02451     (*result) (result->length()) = sub[n];
02452   }
02453   (*result) (result->length()) = '\0';
02454 
02455   return result;
02456 }
02457 
02458 
02459 
02460 
02461 
02462 ClassAllocator<ObjectReloadCont> ObjectReloadContAllocator("ObjectReloadCont");
02463 
02464 ObjectReloadCont::ObjectReloadCont():Continuation(0),
02465 _caller_cont(0), _request_id(0), _send_data(0),
02466 _receive_data(0), _start_event(0),
02467 _state(START), _cur_action(0), _netvc(0), _write_vio(0), _read_vio(0), _read_event_callback(0)
02468 {
02469   SET_HANDLER((ObjectReloadContHandler) & ObjectReloadCont::ObjectReloadEvent);
02470 }
02471 
02472 ObjectReloadCont::~ObjectReloadCont()
02473 {
02474 }
02475 
02476 void
02477 ObjectReloadCont::Init(Continuation * cont, char *url, int url_len,
02478                        char *headers, int headers_len, int http_case, int read_event_callback)
02479 {
02480   int total_len;
02481 
02482   mutex = new_ProxyMutex();
02483   _caller_cont = cont;
02484   _request_id = ink_atomic_increment(&global_id, 1);
02485   _read_event_callback = read_event_callback;
02486 
02487   
02488   
02489 
02490   if (http_case) {
02491     if (headers_len) {
02492       total_len = len_GET_METHOD + url_len + len_HTTP_VERSION + len_TERMINATOR + headers_len + len_REQUEST_TERMINATOR;
02493     } else {
02494       total_len = len_GET_METHOD + url_len + len_HTTP_VERSION + len_REQUEST_TERMINATOR;
02495     }
02496     _send_data = new_MIOBuffer(buffer_size_to_index(total_len + 1));    
02497 
02498     memcpy(_send_data->end(), GET_METHOD, len_GET_METHOD);
02499     memcpy(&(_send_data->end())[len_GET_METHOD], url, url_len);
02500     memcpy(&(_send_data->end())[len_GET_METHOD + url_len], HTTP_VERSION, len_HTTP_VERSION);
02501 
02502     if (headers_len) {
02503       memcpy(&(_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION], TERMINATOR, len_TERMINATOR);
02504       memcpy(&(_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION + len_TERMINATOR], headers, headers_len);
02505       memcpy(&(_send_data->end())[len_GET_METHOD + url_len +
02506                                   len_HTTP_VERSION + len_TERMINATOR +
02507                                   headers_len], REQUEST_TERMINATOR, len_REQUEST_TERMINATOR);
02508 
02509       
02510       (_send_data->end())[len_GET_METHOD + url_len +
02511                           len_HTTP_VERSION + len_TERMINATOR + headers_len + len_REQUEST_TERMINATOR] = 0;
02512     } else {
02513       memcpy(&(_send_data->end())[len_GET_METHOD + url_len +
02514                                   len_HTTP_VERSION], REQUEST_TERMINATOR, len_REQUEST_TERMINATOR);
02515 
02516       
02517       (_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION + len_REQUEST_TERMINATOR] = 0;
02518     }
02519     _send_data->fill(total_len);
02520 
02521   } else {
02522     
02523     ink_assert(false);
02524   }
02525   handleEvent(EVENT_IMMEDIATE, (void *) NULL);
02526 }
02527 
02528 void
02529 ObjectReloadCont::free()
02530 {
02531   mutex = 0;
02532   if (_send_data) {
02533     free_MIOBuffer(_send_data);
02534     _send_data = 0;
02535   }
02536   if (_receive_data) {
02537     free_MIOBuffer(_receive_data);
02538     _receive_data = 0;
02539   }
02540 }
02541 
02542 int
02543 ObjectReloadCont::ObjectReloadEvent(int event, void *d)
02544 {
02545   switch (_state) {
02546   case START:
02547     {
02548       IpEndpoint target;
02549       
02550       Debug("update-reload", "Connect start id=%d", _request_id);
02551       _state = ObjectReloadCont::ATTEMPT_CONNECT;
02552       MUTEX_TRY_LOCK(lock, this->mutex, this_ethread());
02553       ink_release_assert(lock);
02554       target.setToLoopback(AF_INET);
02555       target.port() = htons(HttpProxyPort::findHttp(AF_INET)->m_port);
02556       _cur_action = netProcessor.connect_re(this, &target.sa);
02557       return EVENT_DONE;
02558     }
02559   case ATTEMPT_CONNECT:
02560     {
02561       if (event != NET_EVENT_OPEN) {
02562         
02563         Debug("update-reload", "Connect fail id=%d", _request_id);
02564         CallBackUser(event, 0);
02565         free();
02566         ObjectReloadContAllocator.free(this);
02567         return EVENT_DONE;
02568       }
02569       _netvc = (class NetVConnection *) d;
02570 
02571       
02572       Debug("update-reload", "Write start id=%d [%s]", _request_id, _send_data->start());
02573       _state = ObjectReloadCont::WRITING_URL;
02574       IOBufferReader *r = _send_data->alloc_reader();
02575       _write_vio = _netvc->do_io_write(this, r->read_avail(), r);
02576       return EVENT_DONE;
02577     }
02578   case WRITING_URL:
02579     {
02580       ink_release_assert(_write_vio == (VIO *) d);
02581       if (event == VC_EVENT_WRITE_READY) {
02582         _write_vio->reenable();
02583         return EVENT_DONE;
02584       } else if (event == VC_EVENT_WRITE_COMPLETE) {
02585         
02586         Debug("update-reload", "Read start id=%d", _request_id);
02587         _state = ObjectReloadCont::READING_DATA;
02588         _receive_data = new_MIOBuffer(max_iobuffer_size);
02589         _receive_data_reader = _receive_data->alloc_reader();
02590         _read_vio = _netvc->do_io_read(this, INT64_MAX, _receive_data);
02591         return EVENT_DONE;
02592       } else {
02593         
02594         Debug("update-reload", "Write fail id=%d", _request_id);
02595         _netvc->do_io(VIO::CLOSE);
02596         CallBackUser(event, 0);
02597         free();
02598         ObjectReloadContAllocator.free(this);
02599         return EVENT_DONE;
02600       }
02601     }
02602   case READING_DATA:
02603     {
02604       ink_release_assert(_read_vio == (VIO *) d);
02605       switch (event) {
02606       case VC_EVENT_READ_READY:
02607         {
02608           if (_read_event_callback) {
02609             _caller_cont->handleEvent(event, _receive_data_reader);
02610 
02611           } else {
02612             int64_t read_bytes = _receive_data_reader->read_avail();
02613             _receive_data_reader->consume(read_bytes);
02614             _read_vio->reenable();
02615           }
02616           return EVENT_CONT;
02617         }
02618       case VC_EVENT_READ_COMPLETE:
02619       case VC_EVENT_EOS:
02620         {
02621           if (_read_event_callback) {
02622             _caller_cont->handleEvent(event, _receive_data_reader);
02623           }
02624           
02625           Debug("update-reload", "Fill success id=%d", _request_id);
02626           break;
02627         }
02628       default:
02629         {
02630           Debug("update-reload", "Fill read fail id=%d", _request_id);
02631           CallBackUser(event, 0);
02632           break;
02633         }
02634       }                         
02635 
02636       _netvc->do_io(VIO::CLOSE);
02637       free();
02638       ObjectReloadContAllocator.free(this);
02639       return EVENT_DONE;
02640     }
02641   default:
02642     {
02643       ink_release_assert(!"ObjectReloadEvent invalid state");
02644     }
02645 
02646   }                             
02647   return 0;
02648 }
02649 
02650 int
02651 ObjectReloadCont::CallBackUser(int event, void *d)
02652 {
02653   _caller_cont->handleEvent(event, d);
02654   return 0;
02655 }
02656 
02657