• Main Page
  • Related Pages
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

Update.cc

Go to the documentation of this file.
00001 /** @file
00002 
00003   A brief file description
00004 
00005   @section license License
00006 
00007   Licensed to the Apache Software Foundation (ASF) under one
00008   or more contributor license agreements.  See the NOTICE file
00009   distributed with this work for additional information
00010   regarding copyright ownership.  The ASF licenses this file
00011   to you under the Apache License, Version 2.0 (the
00012   "License"); you may not use this file except in compliance
00013   with the License.  You may obtain a copy of the License at
00014 
00015       http://www.apache.org/licenses/LICENSE-2.0
00016 
00017   Unless required by applicable law or agreed to in writing, software
00018   distributed under the License is distributed on an "AS IS" BASIS,
00019   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020   See the License for the specific language governing permissions and
00021   limitations under the License.
00022  */
00023 
00024 #include "libts.h"
00025 
00026 #include "Main.h"
00027 #include "Update.h"
00028 #include "ProxyConfig.h"
00029 #include "StatSystem.h"
00030 #include "HttpUpdateSM.h"
00031 #include "HttpDebugNames.h"
00032 #include "URL.h"
00033 #include "HdrUtils.h"
00034 #include <records/I_RecHttp.h>
00035 #include "I_Layout.h"
00036 
00037 RecRawStatBlock *update_rsb;
00038 
00039 #define UpdateEstablishStaticConfigInteger(_ix,_n) \
00040   REC_EstablishStaticConfigInteger(_ix,_n); \
00041 
00042 #define UPDATE_INCREMENT_DYN_STAT(x) \
00043         RecIncrRawStat(update_rsb, mutex->thread_holding, (int) x, 1);
00044 #define UPDATE_DECREMENT_DYN_STAT(x) \
00045         RecIncrRawStat(update_rsb, mutex->thread_holding, (int) x, -1);
00046 #define UPDATE_READ_DYN_STAT(x, C, S) \
00047         RecGetRawStatCount(update_rsb, (int) x, &C); \
00048         RecGetRawStatSum(update_rsb, (int) x, &S);
00049 
00050 #define UPDATE_CLEAR_DYN_STAT(x) \
00051 do { \
00052         RecSetRawStatSum(update_rsb, x, 0); \
00053         RecSetRawStatCount(update_rsb, x, 0); \
00054 } while (0);
00055 
00056 #define UPDATE_ConfigReadInteger         REC_ConfigReadInteger
00057 #define UPDATE_ConfigReadString          REC_ConfigReadString
00058 #define UPDATE_RegisterConfigUpdateFunc  REC_RegisterConfigUpdateFunc
00059 
00060 
00061 
00062 // Fundamental constants
00063 
00064 static const char *const GET_METHOD = "GET ";
00065 static const char *const HTTP_VERSION = " HTTP/1.0";
00066 static const char *const REQUEST_TERMINATOR = "\r\n\r\n";
00067 static const char *const TERMINATOR = "\r\n";
00068 static const char *const HTML_COMMENT_TAG = "!--";
00069 static const char *const HTML_COMMENT_END = "-->";
00070 static const int MAX_LINE_LENGTH = (32 * 1024);
00071 
00072 // Fundamental constants initialized by UpdateManager::start()
00073 
00074 static int len_GET_METHOD = 0;
00075 static int len_HTTP_VERSION = 0;
00076 static int len_REQUEST_TERMINATOR = 0;
00077 static int len_TERMINATOR = 0;
00078 
00079 struct html_tag update_allowable_html_tags[] = {
00080   {"a", "href"},
00081   {"img", "src"},
00082   {"img", "href"},
00083   {"body", "background"},
00084   {"frame", "src"},
00085   {"iframe", "src"},
00086   {"fig", "src"},
00087   {"overlay", "src"},
00088   {"applet", "code"},
00089   {"script", "src"},
00090   {"embed", "src"},
00091   {"bgsound", "src"},
00092   {"area", "href"},
00093   {"base", "href"},             // special handling
00094   {"meta", "content"},          // special handling
00095   {NULL, NULL}
00096 };
00097 
00098 struct schemes_descriptor
00099 {
00100   const char *tag;
00101   int tag_len;
00102 };
00103 
00104 struct schemes_descriptor proto_schemes[] = {
00105   {"cid:", 0},
00106   {"clsid:", 0},
00107   {"file:", 0},
00108   {"finger:", 0},
00109   {"ftp:", 0},
00110   {"gopher:", 0},
00111   {"hdl:", 0},
00112   {"http:", 0},
00113   {"https:", 0},
00114   {"ilu:", 0},
00115   {"ior:", 0},
00116   {"irc:", 0},
00117   {"java:", 0},
00118   {"javascript:", 0},
00119   {"lifn:", 0},
00120   {"mailto:", 0},
00121   {"mid:", 0},
00122   {"news:", 0},
00123   {"path:", 0},
00124   {"prospero:", 0},
00125   {"rlogin:", 0},
00126   {"service:", 0},
00127   {"shttp:", 0},
00128   {"snews:", 0},
00129   {"stanf:", 0},
00130   {"telnet:", 0},
00131   {"tn3270:", 0},
00132   {"wais:", 0},
00133   {"whois++:", 0},
00134   {NULL, 0}
00135 };
00136 
00137 struct schemes_descriptor supported_proto_schemes[] = {
00138   {"http:",},
00139   {NULL, 0}
00140 };
00141 
00142 static int global_id = 1;
00143 
00144 void
00145 init_proto_schemes()
00146 {
00147   int n;
00148   for (n = 0; proto_schemes[n].tag; ++n) {
00149     proto_schemes[n].tag_len = strlen(proto_schemes[n].tag);
00150   }
00151 }
00152 
00153 void
00154 init_supported_proto_schemes()
00155 {
00156   int n;
00157   for (n = 0; supported_proto_schemes[n].tag; ++n) {
00158     supported_proto_schemes[n].tag_len = strlen(supported_proto_schemes[n].tag);
00159   }
00160 }
00161 
00162 ///////////////////////////////////////////////////////////////////////////////
00163 // Class UpdateConfigParams
00164 //      Global subsystem configuration parameters
00165 ///////////////////////////////////////////////////////////////////////////////
00166 
00167 UpdateConfigParams::UpdateConfigParams():
00168 _enabled(0), _immediate_update(0), _retry_count(0),
00169 _retry_interval(0), _concurrent_updates(0), _max_update_state_machines(0), _memory_use_in_mb(0)
00170 {
00171 }
00172 
00173 UpdateConfigParams::UpdateConfigParams(UpdateConfigParams & p)
00174 {
00175   _enabled = p._enabled;
00176   _immediate_update = p._immediate_update;
00177   _retry_count = p._retry_count;
00178   _retry_interval = p._retry_interval;
00179   _concurrent_updates = p._concurrent_updates;
00180   _max_update_state_machines = p._max_update_state_machines;
00181   _memory_use_in_mb = p._memory_use_in_mb;
00182 }
00183 
00184 UpdateConfigParams::~UpdateConfigParams()
00185 {
00186 }
00187 
00188 UpdateConfigParams & UpdateConfigParams::operator=(UpdateConfigParams & p)
00189 {
00190   _enabled = p._enabled;
00191   _immediate_update = p._immediate_update;
00192   _retry_count = p._retry_count;
00193   _retry_interval = p._retry_interval;
00194   _concurrent_updates = p._concurrent_updates;
00195   _max_update_state_machines = p._max_update_state_machines;
00196   _memory_use_in_mb = p._memory_use_in_mb;
00197   return *this;
00198 }
00199 
00200 int
00201 UpdateConfigParams::operator==(UpdateConfigParams & p)
00202 {
00203   if (_enabled != p._enabled)
00204     return 0;
00205   if (_immediate_update != p._immediate_update)
00206     return 0;
00207   if (_retry_count != p._retry_count)
00208     return 0;
00209   if (_retry_interval != p._retry_interval)
00210     return 0;
00211   if (_concurrent_updates != p._concurrent_updates)
00212     return 0;
00213   if (_max_update_state_machines != p._max_update_state_machines)
00214     return 0;
00215   if (_memory_use_in_mb != p._memory_use_in_mb)
00216     return 0;
00217   return 1;
00218 }
00219 
00220 /////////////////////////////////////////////////////////////////////////////
00221 // Class UpdateEntry
00222 //      Per update object descriptor
00223 /////////////////////////////////////////////////////////////////////////////
00224 
00225 UpdateEntry::UpdateEntry():_group_link(0), _hash_link(0), _id(0), _url(0),
00226 _URLhandle(), _terminal_url(0),
00227 _request_headers(0), _num_request_headers(0),
00228 _http_hdr(0),
00229 _offset_hour(0), _interval(0), _max_depth(0), _start_time(0), _expired(0), _scheme_index(-1), _update_event_status(0)
00230 {
00231   http_parser_init(&_http_parser);
00232 }
00233 
00234 UpdateEntry::~UpdateEntry()
00235 {
00236   ats_free(_url);
00237   _url = NULL;
00238 
00239   if (_URLhandle.valid()) {
00240     _URLhandle.destroy();
00241   }
00242 
00243   ats_free(_request_headers);
00244   _request_headers = NULL;
00245 
00246   // INKqa12891: _http_hdr can be NULL
00247   if (_http_hdr && _http_hdr->valid()) {
00248     _http_hdr->destroy();
00249     delete _http_hdr;
00250     _http_hdr = NULL;
00251   }
00252   _indirect_list = NULL;
00253 }
00254 
00255 void
00256 UpdateEntry::Init(int derived_url)
00257 {
00258   _id = ink_atomic_increment(&global_id, 1);
00259   if (derived_url) {
00260     return;
00261   }
00262   ComputeScheduleTime();
00263 
00264   int scheme_len;
00265   const char *scheme = _URLhandle.scheme_get(&scheme_len);
00266   if (scheme != URL_SCHEME_HTTP) {
00267     // Depth is only valid for scheme "http"
00268     _max_depth = 0;
00269   }
00270 
00271 }
00272 
00273 int
00274 UpdateEntry::ValidURL(char *s, char *e)
00275 {
00276   // Note: string 's' is null terminated.
00277 
00278   const char *url_start = s;
00279   char *url_end = e;
00280   int err;
00281 
00282   _URLhandle.create(NULL);
00283   err = _URLhandle.parse(&url_start, url_end);
00284   if (err >= 0) {
00285     _url = ats_strdup(s);
00286     return 0;                   // Valid URL
00287   } else {
00288     _URLhandle.destroy();
00289     return 1;                   // Invalid URL
00290   }
00291   return 0;
00292 }
00293 
00294 int
00295 UpdateEntry::ValidHeaders(char *s)
00296 {
00297   // Note: string 's' is null terminated.
00298 
00299   enum
00300   {
00301     FIND_START_OF_HEADER_NAME = 1,
00302     SCAN_FOR_HEADER_NAME,
00303     SCAN_FOR_END_OF_HEADER_VALUE
00304   };
00305 
00306   char *p = s;
00307   char *t;
00308   int bad_header = 0;
00309   int end_of_headers = 0;
00310   int scan_state = FIND_START_OF_HEADER_NAME;
00311 
00312   while (*p) {
00313     switch (scan_state) {
00314     case FIND_START_OF_HEADER_NAME:
00315       {
00316         if (!ValidHeaderNameChar(*p)) {
00317           bad_header = 1;
00318           break;
00319         } else {
00320           scan_state = SCAN_FOR_HEADER_NAME;
00321           break;
00322         }
00323       }
00324     case SCAN_FOR_HEADER_NAME:
00325       {
00326         if (!ValidHeaderNameChar(*p)) {
00327           if (*p == ':') {
00328             scan_state = SCAN_FOR_END_OF_HEADER_VALUE;
00329             break;
00330           } else {
00331             bad_header = 1;
00332             break;
00333           }
00334         } else {
00335           // Get next char
00336           break;
00337         }
00338       }
00339     case SCAN_FOR_END_OF_HEADER_VALUE:
00340       {
00341         t = strchr(p, '\r');
00342         if (t) {
00343           if (*(t + 1) == '\n') {
00344             p = t + 1;
00345             ++_num_request_headers;
00346             scan_state = FIND_START_OF_HEADER_NAME;
00347             break;
00348           } else {
00349             bad_header = 1;
00350             break;
00351           }
00352         } else {
00353           t = strchr(p, 0);
00354           if (t) {
00355             ++_num_request_headers;
00356             end_of_headers = 1;
00357           } else {
00358             bad_header = 1;
00359           }
00360           break;
00361         }
00362       }
00363     }                           // End of switch
00364 
00365     if (bad_header) {
00366       if (_num_request_headers) {
00367         return 1;               // Fail; Bad header with > 1 valid headers
00368       } else {
00369         if (p == s) {
00370           return 0;             // OK; user specified no headers
00371         } else {
00372           return 1;             // Fail; first header is invalid
00373         }
00374       }
00375     } else {
00376       if (end_of_headers) {
00377         break;
00378       } else {
00379         ++p;
00380       }
00381     }
00382   }
00383 
00384   // At least 1 valid header exists
00385 
00386   _request_headers = ats_strdup(s);
00387   return 0;                     // OK; > 1 valid headers
00388 }
00389 
00390 int
00391 UpdateEntry::BuildHttpRequest()
00392 {
00393   // Given the HTTP request and associated headers,
00394   // transform the data into a HTTPHdr object.
00395 
00396   char request[MAX_LINE_LENGTH];
00397   int request_size;
00398 
00399   request_size = len_GET_METHOD + strlen(_url) +
00400     len_HTTP_VERSION + (_request_headers ? len_TERMINATOR + strlen(_request_headers) : 0) + len_REQUEST_TERMINATOR + 1;
00401   if (request_size > MAX_LINE_LENGTH) {
00402     return 1;
00403   }
00404   if (_request_headers) {
00405     snprintf(request, sizeof(request), "%s%s%s%s%s%s", GET_METHOD, _url,
00406              HTTP_VERSION, TERMINATOR, _request_headers, REQUEST_TERMINATOR);
00407   } else {
00408     snprintf(request, sizeof(request), "%s%s%s%s", GET_METHOD, _url, HTTP_VERSION, REQUEST_TERMINATOR);
00409   }
00410   _http_hdr = new HTTPHdr;
00411   http_parser_init(&_http_parser);
00412   _http_hdr->create(HTTP_TYPE_REQUEST);
00413   int err;
00414   const char *start = request;
00415   const char *end = start + request_size - 1;
00416 
00417   while (start < end) {
00418     err = _http_hdr->parse_req(&_http_parser, &start, end, false);
00419     if (err != PARSE_CONT) {
00420       break;
00421     }
00422     end = start + strlen(start);
00423   }
00424   http_parser_clear(&_http_parser);
00425   return 0;
00426 }
00427 
00428 int
00429 UpdateEntry::ValidHeaderNameChar(char c)
00430 {
00431   if ((c > 31) && (c < 127)) {
00432     if (ValidSeparatorChar(c)) {
00433       return 0;                 // Invalid
00434     } else {
00435       return 1;                 // Valid
00436     }
00437   } else {
00438     return 0;                   // Invalid
00439   }
00440 }
00441 
00442 int
00443 UpdateEntry::ValidSeparatorChar(char c)
00444 {
00445   switch (c) {
00446   case '(':
00447   case ')':
00448   case '<':
00449   case '>':
00450   case '@':
00451   case ',':
00452   case ';':
00453   case ':':
00454   case '\\':
00455   case '"':
00456   case '/':
00457   case '[':
00458   case ']':
00459   case '?':
00460   case '=':
00461   case '{':
00462   case '}':
00463   case ' ':
00464   case '\t':
00465     return 1;                   // Valid separator char
00466   default:
00467     return 0;
00468   }
00469 }
00470 
00471 int
00472 UpdateEntry::ValidHour(char *s)
00473 {
00474   // Note: string 's' is null terminated.
00475 
00476   _offset_hour = atoi(s);
00477   if ((_offset_hour >= MIN_OFFSET_HOUR) && (_offset_hour <= MAX_OFFSET_HOUR)) {
00478     return 0;                   // Valid data
00479   } else {
00480     return 1;                   // Invalid data
00481   }
00482 }
00483 
00484 int
00485 UpdateEntry::ValidInterval(char *s)
00486 {
00487   // Note: string 's' is null terminated.
00488 
00489   _interval = atoi(s);
00490   if ((_interval >= MIN_INTERVAL) && (_interval <= MAX_INTERVAL)) {
00491     return 0;                   // Valid data
00492   } else {
00493     return 1;                   // Invalid data
00494   }
00495   return 0;
00496 }
00497 
00498 int
00499 UpdateEntry::ValidDepth(char *s)
00500 {
00501   // Note: string 's' is null terminated.
00502 
00503   _max_depth = atoi(s);
00504 
00505   if ((_max_depth >= MIN_DEPTH) && (_max_depth <= MAX_DEPTH)) {
00506     return 0;                   // Valid data
00507   } else {
00508     return 1;                   // Invalid data
00509   }
00510   return 0;
00511 }
00512 
00513 void
00514 UpdateEntry::SetTerminalStatus(int term_url)
00515 {
00516   _terminal_url = term_url;
00517 }
00518 
00519 int
00520 UpdateEntry::TerminalURL()
00521 {
00522   return _terminal_url;
00523 }
00524 
00525 
00526 void
00527 UpdateEntry::ComputeScheduleTime()
00528 {
00529   ink_hrtime ht;
00530   time_t cur_time;
00531   struct tm cur_tm;
00532 
00533   if (_expired) {
00534     _expired = 0;
00535   } else {
00536     if (_start_time) {
00537       return;
00538     }
00539   }
00540 
00541   ht = ink_get_based_hrtime();
00542   cur_time = ht / HRTIME_SECOND;
00543 
00544   if (!_start_time) {
00545     time_t zero_hour; // absolute time of offset hour.
00546 
00547     // Get the current time in a TM struct so we can
00548     // zero out the minute and second.
00549     ink_localtime_r(&cur_time, &cur_tm);
00550     cur_tm.tm_hour = _offset_hour;
00551     cur_tm.tm_min = 0;
00552     cur_tm.tm_sec = 0;
00553     // Now we can find out when the offset hour is today.
00554     zero_hour = convert_tm(&cur_tm);
00555     // If it's in the future, back up a day and use that as the base.
00556     if (zero_hour > cur_time) zero_hour -= 24 * SECONDS_PER_HOUR;
00557     _start_time = cur_time + (_interval - ((cur_time - zero_hour) % _interval));
00558   } else {
00559     // Compute next start time
00560     _start_time += _interval;
00561   }
00562 }
00563 
00564 int
00565 UpdateEntry::ScheduleNow(time_t cur_time)
00566 {
00567   if (cur_time >= _start_time) {
00568     _expired = 1;
00569     return 1;
00570   } else {
00571     return 0;
00572   }
00573 }
00574 
00575 /////////////////////////////////////////////////////////////////////////////
00576 // Class UpdateConfigList
00577 //      Container for UpdateEntry objects
00578 /////////////////////////////////////////////////////////////////////////////
00579 UpdateConfigList::UpdateConfigList():_entry_q_elements(0), _pending_q_elements(0), _hash_table(0)
00580 {
00581 }
00582 
00583 UpdateConfigList::~UpdateConfigList()
00584 {
00585   if (_hash_table) {
00586     delete[]_hash_table;
00587     _hash_table = NULL;
00588   }
00589 }
00590 
00591 void
00592 UpdateConfigList::Add(UpdateEntry * e)
00593 {
00594   _entry_q_elements++;
00595   _entry_q.enqueue(e);
00596 }
00597 
00598 int
00599 UpdateConfigList::HashAdd(UpdateEntry * e)
00600 {
00601   uint64_t folded64 = e->_url_md5.fold();
00602   ink_assert(folded64);
00603   int32_t index = folded64 % HASH_TABLE_SIZE;
00604 
00605   if (!_hash_table) {
00606     // One time initialization
00607 
00608     _hash_table = new UpdateEntry *[HASH_TABLE_SIZE];
00609     memset((char *) _hash_table, 0, (sizeof(UpdateEntry *) * HASH_TABLE_SIZE));
00610   }
00611   // Add to hash table only if unique
00612 
00613   UpdateEntry *he = _hash_table[index];
00614   UpdateEntry **last_link = &_hash_table[index];
00615 
00616   while (he) {
00617     if (e->_url_md5 == he->_url_md5) {
00618       return 1;                 // duplicate detected
00619     } else {
00620       last_link = &he->_hash_link;
00621       he = he->_hash_link;
00622     }
00623   }
00624 
00625   // Entry is unique, add to hash list
00626 
00627   e->_hash_link = *last_link;
00628   *last_link = e;
00629 
00630   // Add to entry queue
00631 
00632   Add(e);
00633 
00634   return 0;                     // Entry added
00635 }
00636 
00637 UpdateEntry *
00638 UpdateConfigList::Remove()
00639 {
00640   UpdateEntry *e = _entry_q.dequeue();
00641   if (e) {
00642     _entry_q_elements--;
00643   }
00644   return e;
00645 }
00646 
00647 void
00648 UpdateConfigList::AddPending(UpdateEntry * e)
00649 {
00650   _pending_q_elements++;
00651   _pending_q.enqueue(e);
00652 }
00653 
00654 UpdateEntry *
00655 UpdateConfigList::RemovePending()
00656 {
00657   UpdateEntry *e = _pending_q.dequeue();
00658   if (e) {
00659     _pending_q_elements--;
00660   }
00661   return e;
00662 }
00663 
00664 /////////////////////////////////////////////////////////////////////////////
00665 // Class UpdateManager
00666 //      External interface to Update subsystem
00667 /////////////////////////////////////////////////////////////////////////////
00668 
00669 UpdateManager::UpdateManager():_CM(0), _SCH(0)
00670 {
00671 }
00672 
00673 UpdateManager::~UpdateManager()
00674 {
00675 }
00676 
00677 int
00678 UpdateManager::start()
00679 {
00680   // Initialize fundamental constants
00681 
00682   len_GET_METHOD = strlen(GET_METHOD);
00683   len_HTTP_VERSION = strlen(HTTP_VERSION);
00684   len_REQUEST_TERMINATOR = strlen(REQUEST_TERMINATOR);
00685   len_TERMINATOR = strlen(TERMINATOR);
00686   init_proto_schemes();
00687   init_supported_proto_schemes();
00688 
00689   _CM = new UpdateConfigManager;
00690   _CM->init();
00691 
00692   _SCH = new UpdateScheduler(_CM);
00693   _SCH->Init();
00694 
00695   return 0;
00696 }
00697 
00698 UpdateManager updateManager;
00699 
00700 typedef int (UpdateConfigManager::*UpdateConfigManagerContHandler) (int, void *);
00701 /////////////////////////////////////////////////////////////////////////////
00702 // Class UpdateConfigManager
00703 //      Handle Update subsystem global configuration and URL list updates
00704 /////////////////////////////////////////////////////////////////////////////
00705 UpdateConfigManager::UpdateConfigManager()
00706 :Continuation(new_ProxyMutex()), _periodic_event(0), _filename(0)
00707 {
00708   SET_HANDLER((UpdateConfigManagerContHandler)
00709               & UpdateConfigManager::ProcessUpdate);
00710 }
00711 
00712 UpdateConfigManager::~UpdateConfigManager()
00713 {
00714 }
00715 
00716 int
00717 UpdateConfigManager::init()
00718 {
00719   update_rsb = RecAllocateRawStatBlock((int) update_stat_count);
00720 
00721   _CP_actual = new UpdateConfigParams;
00722 
00723   // Setup update handlers for each global configuration parameter
00724 
00725   UpdateEstablishStaticConfigInteger(_CP_actual->_enabled, "proxy.config.update.enabled");
00726 
00727   UpdateEstablishStaticConfigInteger(_CP_actual->_immediate_update, "proxy.config.update.force");
00728 
00729   UpdateEstablishStaticConfigInteger(_CP_actual->_retry_count, "proxy.config.update.retry_count");
00730 
00731   UpdateEstablishStaticConfigInteger(_CP_actual->_retry_interval, "proxy.config.update.retry_interval");
00732 
00733   UpdateEstablishStaticConfigInteger(_CP_actual->_concurrent_updates, "proxy.config.update.concurrent_updates");
00734 
00735   UpdateEstablishStaticConfigInteger(_CP_actual->_max_update_state_machines,
00736                                      "proxy.config.update.max_update_state_machines");
00737 
00738   UpdateEstablishStaticConfigInteger(_CP_actual->_memory_use_in_mb, "proxy.config.update.memory_use_mb");
00739 
00740   // Register Scheduled Update stats
00741 
00742   RecRegisterRawStat(update_rsb, RECT_PROCESS,
00743                      "proxy.process.update.successes",
00744                      RECD_INT, RECP_NON_PERSISTENT, (int) update_successes_stat, RecRawStatSyncCount);
00745   UPDATE_CLEAR_DYN_STAT(update_successes_stat);
00746 
00747   RecRegisterRawStat(update_rsb, RECT_PROCESS,
00748                      "proxy.process.update.no_actions",
00749                      RECD_INT, RECP_NON_PERSISTENT, (int) update_no_actions_stat, RecRawStatSyncCount);
00750   UPDATE_CLEAR_DYN_STAT(update_no_actions_stat);
00751 
00752   RecRegisterRawStat(update_rsb, RECT_PROCESS,
00753                      "proxy.process.update.fails",
00754                      RECD_INT, RECP_NON_PERSISTENT, (int) update_fails_stat, RecRawStatSyncCount);
00755   UPDATE_CLEAR_DYN_STAT(update_fails_stat);
00756 
00757   RecRegisterRawStat(update_rsb, RECT_PROCESS,
00758                      "proxy.process.update.unknown_status",
00759                      RECD_INT, RECP_NON_PERSISTENT, (int) update_unknown_status_stat, RecRawStatSyncCount);
00760   UPDATE_CLEAR_DYN_STAT(update_unknown_status_stat);
00761 
00762   RecRegisterRawStat(update_rsb, RECT_PROCESS,
00763                      "proxy.process.update.state_machines",
00764                      RECD_INT, RECP_NON_PERSISTENT, (int) update_state_machines_stat, RecRawStatSyncCount);
00765   UPDATE_CLEAR_DYN_STAT(update_state_machines_stat);
00766 
00767   Debug("update",
00768         "Update params: enable %" PRId64" force %" PRId64" rcnt %" PRId64" rint %" PRId64" updates %" PRId64" "
00769         "max_sm %" PRId64" mem %" PRId64"",
00770         _CP_actual->_enabled, _CP_actual->_immediate_update,
00771         _CP_actual->_retry_count, _CP_actual->_retry_interval,
00772         _CP_actual->_concurrent_updates, _CP_actual->_max_update_state_machines, _CP_actual->_memory_use_in_mb);
00773 
00774   // Make working and actual global config copies equal
00775 
00776   _CP = new UpdateConfigParams(*_CP_actual);
00777 
00778   // Setup "update.config" update handler
00779 
00780   SetFileName((char *) "update.config");
00781   REC_RegisterConfigUpdateFunc("proxy.config.update.update_configuration", URL_list_update_callout, (void *) this);
00782 
00783   // Simulate configuration update to sync working and current databases
00784 
00785   handleEvent(EVENT_IMMEDIATE, (Event *) NULL);
00786 
00787   // Setup periodic to detect global config updates
00788 
00789   _periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
00790 
00791   return 0;
00792 }
00793 
00794 int
00795 UpdateConfigManager::GetConfigParams(Ptr<UpdateConfigParams> *P)
00796 {
00797   MUTEX_TRY_LOCK(lock, mutex, this_ethread());
00798   if (!lock) {
00799     return 0;                   // Try again later
00800   } else {
00801     *P = _CP;
00802     return 1;                   // Success
00803   }
00804 }
00805 
00806 int
00807 UpdateConfigManager::GetConfigList(Ptr<UpdateConfigList> *L)
00808 {
00809   MUTEX_TRY_LOCK(lock, mutex, this_ethread());
00810   if (!lock) {
00811     return 0;                   // Try again later
00812   } else {
00813     *L = _CL;
00814     return 1;                   // Success
00815   }
00816 }
00817 
00818 int
00819 UpdateConfigManager::URL_list_update_callout(const char */* name ATS_UNUSED */, RecDataT /* data_type ATS_UNUSED */,
00820                                              RecData data, void *cookie)
00821 {
00822   UpdateConfigManager *cm = (UpdateConfigManager *) cookie;
00823   cm->SetFileName((char *) data.rec_string);
00824 
00825 
00826   // URL update may block in file i/o.
00827   // Reschedule on ET_CACHE thread.
00828 
00829   eventProcessor.schedule_imm(cm, ET_CACHE);
00830 
00831   return 0;
00832 }
00833 
00834 int
00835 UpdateConfigManager::ProcessUpdate(int event, Event * e)
00836 {
00837   if (event == EVENT_IMMEDIATE) {
00838     ////////////////////////////////////////////////////////////////////
00839     // EVENT_IMMEDIATE -- URL list update
00840     ////////////////////////////////////////////////////////////////////
00841 
00842     UpdateConfigList *l = NULL;
00843 
00844     l = BuildUpdateList();
00845     if (l) {
00846       _CL = l;
00847     }
00848     return EVENT_DONE;
00849   }
00850 
00851   if (event == EVENT_INTERVAL) {
00852     ////////////////////////////////////////////////////////////////////
00853     // EVENT_INTERVAL  -- Global configuration update check
00854     ////////////////////////////////////////////////////////////////////
00855 
00856     UpdateConfigParams *p = new UpdateConfigParams(*_CP_actual);
00857 
00858     if (!(*_CP == *p)) {
00859       _CP = p;
00860       Debug("update", "enable %" PRId64" force %" PRId64" rcnt %" PRId64" rint %" PRId64" updates %" PRId64" state machines %" PRId64" mem %" PRId64"",
00861             p->_enabled, p->_immediate_update, p->_retry_count,
00862             p->_retry_interval, p->_concurrent_updates, p->_max_update_state_machines, p->_memory_use_in_mb);
00863     } else {
00864       delete p;
00865     }
00866     return EVENT_DONE;
00867   }
00868   // Unknown event, ignore it.
00869 
00870   Debug("update", "ProcessUpdate: Unknown event %d %p", event, e);
00871   return EVENT_DONE;
00872 }
00873 
00874 UpdateConfigList *
00875 UpdateConfigManager::BuildUpdateList()
00876 {
00877   // Build pathname to "update.config" and open file
00878   ats_scoped_str config_path;
00879 
00880   if (_filename) {
00881     config_path = Layout::get()->relative_to(Layout::get()->sysconfdir, _filename);
00882   } else {
00883     return (UpdateConfigList *) NULL;
00884   }
00885 
00886   int fd = open(config_path, O_RDONLY);
00887   if (fd < 0) {
00888     Warning("read update.config, open failed");
00889     SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, open failed");
00890     return (UpdateConfigList *) NULL;
00891   }
00892   return ParseConfigFile(fd);
00893 }
00894 
00895 int
00896 UpdateConfigManager::GetDataLine(int fd, int bufsize, char *buf, int field_delimiters, int delimiter)
00897 {
00898   char *line = buf;
00899   int linesize = bufsize;
00900   int bytes_read = 0;
00901   int rlen;
00902 
00903   while ((rlen = ink_file_fd_readline(fd, linesize, line)) > 0) {
00904     ////////////////////////////////////////////////////////////////////
00905     // Notes:
00906     //      1) ink_file_fd_readline() null terminates returned buffer
00907     //      2) Input processing guarantees that the item delimiter '\'
00908     //         does not exist in any data field.
00909     ////////////////////////////////////////////////////////////////////
00910 
00911     if (0 == bytes_read) {
00912       // A comment line, just return.
00913       if (*line == '#') return rlen;
00914       else if (1 == rlen) continue; // leading blank line, ignore.
00915     }
00916     bytes_read += rlen;
00917 
00918     // Determine if we have a complete line.
00919 
00920     char *p = buf;
00921     int delimiters_found = 0;
00922 
00923     while (*p) {
00924       if (*p == delimiter) {
00925         delimiters_found++;
00926       }
00927       p++;
00928     }
00929     if (delimiters_found == field_delimiters) {
00930       // We have a complete line.
00931       return bytes_read;
00932 
00933     } else if ((delimiters_found == (field_delimiters - 1))
00934                && (*(p - 1) == '\n')) {
00935       // End of line not delimited.
00936       // Fix it and consider it a complete line.
00937 
00938       *(p - 1) = '\\';
00939       return bytes_read;
00940     }
00941     // Resume read
00942     line += rlen;
00943     linesize -= rlen;
00944   }
00945   return 0;
00946 }
00947 
00948 UpdateConfigList *
00949 UpdateConfigManager::ParseConfigFile(int f)
00950 {
00951   /*
00952      "update.config" line syntax:
00953      <URL><Request Headers><Offset Hour><Interval><Recursion depth>\
00954    */
00955 
00956   enum
00957   { F_URL, F_HEADERS, F_HOUR, F_INTERVAL, F_DEPTH, F_ITEMS };
00958   char *p_start[F_ITEMS];
00959   char *p_end[F_ITEMS];
00960 
00961   char line[MAX_LINE_LENGTH];
00962   char *p;
00963 
00964   int ln = 0;
00965   int i;
00966 
00967   UpdateEntry *e = NULL;
00968   UpdateConfigList *ul = new UpdateConfigList;
00969 
00970   while (GetDataLine(f, sizeof(line) - 1, line, F_ITEMS, '\\') > 0) {
00971     ++ln;
00972     if (*line == '#') {
00973       continue;
00974     } else {
00975       p = line;
00976     }
00977 
00978     // Extract fields
00979 
00980     for (i = 0; i < F_ITEMS; ++i) {
00981       p_start[i] = p;
00982       p_end[i] = strchr(p, '\\');
00983       *p_end[i] = 0;            // Null terminate string
00984 
00985       if (p_end[i]) {
00986         p = p_end[i] + 1;
00987       } else {
00988         Warning("read update.config, invalid syntax, line %d", ln);
00989         SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid syntax");
00990         break;
00991       }
00992     }
00993     if (i < F_ITEMS) {
00994       // Syntax error
00995       goto abort_processing;
00996     }
00997     // Validate data fields
00998 
00999     e = new UpdateEntry;
01000 
01001     ////////////////////////////////////
01002     // Validate URL
01003     ////////////////////////////////////
01004     if (e->ValidURL(p_start[F_URL], p_end[F_URL])) {
01005       Warning("read update.config, invalid URL field, line %d", ln);
01006       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid URL field");
01007       goto abort_processing;
01008     }
01009     ////////////////////////////////////
01010     // Validate headers
01011     ////////////////////////////////////
01012     if (e->ValidHeaders(p_start[F_HEADERS])) {
01013       Warning("read update.config, invalid headers field, line %d", ln);
01014       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid headers field");
01015       goto abort_processing;
01016     }
01017     /////////////////////////////////////////////////////////////
01018     // Convert request (URL+Headers) into HTTPHdr format.
01019     /////////////////////////////////////////////////////////////
01020     if (e->BuildHttpRequest()) {
01021       Warning("read update.config, header processing error, line %d", ln);
01022       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, header processing error");
01023       goto abort_processing;
01024     }
01025     ////////////////////////////////////
01026     // Validate hour
01027     ////////////////////////////////////
01028     if (e->ValidHour(p_start[F_HOUR])) {
01029       Warning("read update.config, invalid hour field, line %d", ln);
01030       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid hour field");
01031       goto abort_processing;
01032     }
01033     ////////////////////////////////////
01034     // Validate interval
01035     ////////////////////////////////////
01036     if (e->ValidInterval(p_start[F_INTERVAL])) {
01037       Warning("read update.config, invalid interval field, line %d", ln);
01038       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid interval field");
01039       goto abort_processing;
01040     }
01041     ////////////////////////////////////
01042     // Validate recursion depth
01043     ////////////////////////////////////
01044     if (e->ValidDepth(p_start[F_DEPTH])) {
01045       Warning("read update.config, invalid depth field, line %d", ln);
01046       SignalWarning(MGMT_SIGNAL_CONFIG_ERROR, "read update.config, invalid depth field");
01047       goto abort_processing;
01048     }
01049     // Valid entry, add to list
01050 
01051     e->Init();
01052     Debug("update",
01053           "[%d] [%s] [%s] nhdrs %d hour %d interval %d depth %d",
01054           e->_id, e->_url, e->_request_headers, e->_num_request_headers, e->_offset_hour, e->_interval, e->_max_depth);
01055     ul->Add(e);
01056     e = NULL;
01057   }
01058 
01059   // All file entries are valid.
01060 
01061   close(f);
01062   return ul;
01063 
01064 abort_processing:
01065   close(f);
01066   if (e) {
01067     delete e;
01068   }
01069   if (ul) {
01070     delete ul;
01071   }
01072   return (UpdateConfigList *) NULL;
01073 }
01074 
01075 /////////////////////////////////////////////////////////////////////////////
01076 // Class UpdateScheduler
01077 //      Handle scheduling of UpdateEntry objects
01078 /////////////////////////////////////////////////////////////////////////////
01079 UpdateScheduler::UpdateScheduler(UpdateConfigManager * c)
01080 :Continuation(new_ProxyMutex()), _periodic_event(0),
01081 _recursive_update(0), _CM(c), _schedule_event_callbacks(0), _update_state_machines(0), _base_EN(0), _parent_US(0)
01082 {
01083   SET_HANDLER((UpdateSchedulerContHandler)
01084               & UpdateScheduler::ScheduleEvent);
01085 }
01086 
01087 UpdateScheduler::~UpdateScheduler()
01088 {
01089 }
01090 
01091 int
01092 UpdateScheduler::Init()
01093 {
01094   _recursive_update = 0;
01095   _periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
01096   return 0;
01097 }
01098 
01099 int
01100 UpdateScheduler::Init(UpdateScheduler * us, UpdateEntry * ue, Ptr<UpdateConfigParams> p)
01101 {
01102   ink_assert(ue->_indirect_list->Entries());
01103 
01104   _recursive_update = 1;
01105   _CP = p;
01106   _CL = ue->_indirect_list;
01107   _base_EN = ue;
01108   _parent_US = us;
01109 
01110   // Schedule entries for update by moving entries to pending queue.
01111 
01112   UpdateEntry *e;
01113   while ((e = _CL->Remove())) {
01114     _CL->AddPending(e);
01115   }
01116   _periodic_event = eventProcessor.schedule_every(this, HRTIME_SECONDS(10));
01117   return 0;
01118 }
01119 
01120 int
01121 UpdateScheduler::ScheduleEvent(int event, void *e)
01122 {
01123   UpdateEntry *ue = NULL;
01124   int update_complete = 1;
01125 
01126   if (event == EVENT_IMMEDIATE) {
01127     //////////////////////////////////////////////////////////////////////
01128     // Callback on update completion from Update State Machine
01129     //////////////////////////////////////////////////////////////////////
01130     ue = (UpdateEntry *) e;
01131 
01132     switch (ue->_update_event_status) {
01133     case UPDATE_EVENT_SUCCESS:
01134       {
01135         Debug("update", "%s update complete, UPDATE_EVENT_SUCCESS id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
01136         UPDATE_INCREMENT_DYN_STAT(update_successes_stat);
01137 
01138         if ((ue->_max_depth > 0) && ue->_indirect_list) {
01139           if (ue->_indirect_list->Entries()) {
01140             //////////////////////////////////////////////////////////
01141             // Recursive update case.
01142             // At this point, we have a list of URLs which was
01143             // recursively derived from the base URL.
01144             // Instantiate UpdateScheduler to process this URL list.
01145             //////////////////////////////////////////////////////////
01146             Debug("update", "Starting UpdateScheduler for id: %d [%s]", ue->_id, ue->_url);
01147             UpdateScheduler *us = new UpdateScheduler();
01148             us->Init(this, ue, _CP);
01149             update_complete = 0;
01150 
01151           } else {
01152             ue->_indirect_list = NULL;
01153           }
01154         }
01155         break;
01156       }
01157     case UPDATE_EVENT_SUCCESS_NOACTION:
01158       {
01159         Debug("update",
01160               "%s update complete, UPDATE_EVENT_SUCCESS_NOACTION id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
01161         UPDATE_INCREMENT_DYN_STAT(update_no_actions_stat);
01162         break;
01163       }
01164     case UPDATE_EVENT_FAILED:
01165       {
01166         Debug("update", "%s update complete, UPDATE_EVENT_FAILED id: %d", (_recursive_update ? "(R)" : ""), ue->_id);
01167         UPDATE_INCREMENT_DYN_STAT(update_fails_stat);
01168         break;
01169       }
01170     default:
01171       {
01172         Debug("update",
01173               "%s update complete, unknown status %d, id: %d",
01174               (_recursive_update ? "(R)" : ""), ue->_update_event_status, ue->_id);
01175         UPDATE_INCREMENT_DYN_STAT(update_unknown_status_stat);
01176         break;
01177       }
01178     }                           // End of switch
01179 
01180     if (update_complete) {
01181       if (!_recursive_update) {
01182         /////////////////////////////////////////////////////////
01183         // Recompute expire time and place entry back on list
01184         /////////////////////////////////////////////////////////
01185 
01186         ue->ComputeScheduleTime();
01187         _CL->Add(ue);           // Place back on list
01188 
01189       } else {
01190         delete ue;
01191       }
01192       --_update_state_machines;
01193       UPDATE_DECREMENT_DYN_STAT(update_state_machines_stat);
01194     }
01195     ////////////////////////////////////////////////////////////////
01196     // Start another update SM if scheduling is allowed
01197     // and an entry exists on the pending list.
01198     ////////////////////////////////////////////////////////////////
01199 
01200     if (Schedule() < 0) {
01201       // Scheduling allowed, but nothing to schedule
01202       if (_update_state_machines == 0) {
01203         //////////////////////////////////////////////////////////////
01204         // No more active updates, deallocate config/entry structures
01205         //////////////////////////////////////////////////////////////
01206 
01207         _CP = NULL;
01208         _CL = NULL;
01209 
01210         if (_recursive_update) {
01211           //
01212           // Recursive list update is now complete.
01213           // Callback parent UpdateScheduler.
01214           //
01215           _periodic_event->cancel();
01216           _base_EN->_indirect_list = NULL;
01217           _base_EN->_update_event_status = UPDATE_EVENT_SUCCESS;
01218 
01219           SET_HANDLER((UpdateSchedulerContHandler)
01220                       & UpdateScheduler::ChildExitEventHandler);
01221           handleEvent(EVENT_IMMEDIATE, 0);
01222         }
01223       }
01224     }
01225     return EVENT_DONE;
01226   }
01227   //////////////////////////////////////
01228   // Periodic event callback
01229   //////////////////////////////////////
01230   if (event == EVENT_INTERVAL) {
01231     ++_schedule_event_callbacks;
01232   } else {
01233     // Unknown event, ignore it.
01234     Debug("update", "UpdateScheduler::ScheduleEvent unknown event %d", event);
01235     return EVENT_DONE;
01236   }
01237 
01238   if (!_CP && !_CL) {
01239     // No updates pending, attempt to schedule any expired updates
01240 
01241     if (!_CM->GetConfigParams(&_CP)) {
01242       return EVENT_CONT;        // Missed lock, try at next event
01243     }
01244     if (!_CM->GetConfigList(&_CL)) {
01245       _CP = NULL;
01246       return EVENT_CONT;        // Missed lock, try at next event
01247     }
01248     // Cannot do anything unless we have valid params and list
01249 
01250     if (!_CP || !_CL) {
01251       _CP = NULL;
01252       _CL = NULL;
01253       return EVENT_CONT;        // try at next event
01254     }
01255     // Determine if the subsystem is enabled
01256 
01257     if (!_CP->IsEnabled()) {
01258       _CP = NULL;
01259       _CL = NULL;
01260       return EVENT_CONT;        // try at next event
01261     }
01262 
01263   } else {
01264     ///////////////////////////////////////////////////////////////////
01265     // Updates pending from last schedule event, attempt to restart
01266     // additional update SM(s).
01267     ///////////////////////////////////////////////////////////////////
01268 
01269     Schedule();
01270     return EVENT_CONT;
01271   }
01272   ink_release_assert(!_update_state_machines);
01273 
01274   ///////////////////////////////////////////////////////
01275   // Scan entry list and schedule expired updates
01276   ///////////////////////////////////////////////////////
01277 
01278   ink_hrtime ht = ink_get_based_hrtime();
01279   time_t cur_time = ht / HRTIME_SECOND;
01280   Queue<UpdateEntry> no_action_q;
01281   int time_expired;
01282 
01283   while ((ue = _CL->Remove())) {
01284     time_expired = ue->ScheduleNow(cur_time);
01285     if (time_expired || _CP->ImmediateUpdate()) {
01286       if (Schedule(ue) > 0) {
01287         Debug("update", "%s and started id: %d", time_expired ? "expired" : "force expire", ue->_id);
01288       } else {
01289         Debug("update", "%s with deferred start id: %d", time_expired ? "expired" : "force expire", ue->_id);
01290       }
01291 
01292     } else {
01293       no_action_q.enqueue(ue);
01294     }
01295   }
01296 
01297   // Place no_action_q elements back on list
01298 
01299   while ((ue = no_action_q.dequeue())) {
01300     _CL->Add(ue);
01301   }
01302 
01303   if (!_update_state_machines && !_CL->_pending_q.head) {
01304     // Nothing active or pending.
01305     // Drop references to config/param structures.
01306 
01307     _CP = NULL;
01308     _CL = NULL;
01309   }
01310   return EVENT_DONE;
01311 }
01312 
01313 int
01314 UpdateScheduler::ChildExitEventHandler(int event, Event * /* e ATS_UNUSED */)
01315 {
01316   switch (event) {
01317   case EVENT_IMMEDIATE:
01318   case EVENT_INTERVAL:
01319     {
01320       MUTEX_TRY_LOCK(lock, _parent_US->mutex, this_ethread());
01321       if (lock) {
01322         Debug("update", "Child UpdateScheduler exit id: %d", _base_EN->_id);
01323         _parent_US->handleEvent(EVENT_IMMEDIATE, _base_EN);
01324         delete this;
01325 
01326       } else {
01327         // Lock miss, try again later.
01328         eventProcessor.schedule_in(this, HRTIME_MSECONDS(10));
01329       }
01330       break;
01331     }
01332   default:
01333     {
01334       ink_release_assert(!"UpdateScheduler::ChildExitEventHandler invalid event");
01335     }                           // End of case
01336   }                             // End of switch
01337 
01338   return EVENT_DONE;
01339 }
01340 
01341 int
01342 UpdateScheduler::Schedule(UpdateEntry * e)
01343 {
01344   // Return > 0,  UpdateEntry scheduled
01345   // Return == 0, Scheduling not allowed
01346   // Return < 0,  Scheduling allowed, but nothing to schedule
01347 
01348   UpdateSM *usm;
01349   UpdateEntry *ue = e;
01350   int allow_schedule;
01351   RecInt count, sum;
01352   int max_concurrent_updates;
01353 
01354   UPDATE_READ_DYN_STAT(update_state_machines_stat, count, sum);
01355   if (_CP->ConcurrentUpdates() < _CP->MaxUpdateSM()) {
01356     max_concurrent_updates = _CP->ConcurrentUpdates();
01357   } else {
01358     max_concurrent_updates = _CP->MaxUpdateSM();
01359   }
01360   allow_schedule = (sum < max_concurrent_updates);
01361 
01362   if (allow_schedule) {
01363     ue = ue ? ue : _CL->RemovePending();
01364     if (ue) {
01365       ++_update_state_machines;
01366       UPDATE_INCREMENT_DYN_STAT(update_state_machines_stat);
01367       usm = new UpdateSM(this, _CP, ue);
01368       usm->Start();
01369 
01370       Debug("update", "%s %s start update id: %d [%s]",
01371             (_recursive_update ? "(R)" : ""), (e ? "directed" : "speculative"), ue->_id, ue->_url);
01372 
01373       return 1;                 // UpdateEntry scheduled
01374     } else {
01375       return -1;                // Scheduling allowed but nothing to schedule
01376     }
01377 
01378   } else {
01379     if (ue) {
01380       _CL->AddPending(ue);
01381     }
01382     return 0;                   // Scheduling not allowed
01383   }
01384 }
01385 
01386 /////////////////////////////////////////////////////////////////////////////
01387 // Class UpdateSM
01388 //      State machine which handles object update action
01389 /////////////////////////////////////////////////////////////////////////////
01390 UpdateSM::UpdateSM(UpdateScheduler * us, Ptr<UpdateConfigParams> p, UpdateEntry * e)
01391 :Continuation(new_ProxyMutex()), _state(USM_INIT), _return_status(0), _retries(0)
01392 {
01393   SET_HANDLER((UpdateSMContHandler) & UpdateSM::HandleSMEvent);
01394   _US = us;
01395   _CP = p;
01396   _EN = e;
01397 }
01398 
01399 UpdateSM::~UpdateSM()
01400 {
01401   _CP = NULL;                   // drop reference
01402 }
01403 
01404 void
01405 UpdateSM::Start()
01406 {
01407   eventProcessor.schedule_imm(this, ET_CACHE);
01408 }
01409 
01410 int
01411 UpdateSM::HandleSMEvent(int event, Event * /* e ATS_UNUSED */)
01412 {
01413   while (1) {
01414     switch (_state) {
01415     case USM_INIT:
01416       {
01417         ////////////////////////////////////////////////////////////////////
01418         // Cluster considerations.
01419         // For non-recursive URL(s), only process it if the cluster
01420         // hash returns this node.  Recursive URL(s) are processed by
01421         // all nodes in the cluster.
01422         ////////////////////////////////////////////////////////////////////
01423         if (_EN->_max_depth > 0) {
01424           // Recursive URL(s) are processed by all nodes.
01425           _state = USM_PROCESS_URL;
01426           break;
01427         }
01428 
01429         INK_MD5 url_md5;
01430 
01431         Cache::generate_key(&url_md5, &_EN->_URLhandle);
01432         ClusterMachine *m = cluster_machine_at_depth(cache_hash(url_md5));
01433         if (m) {
01434           // URL hashed to remote node, do nothing.
01435           _state = USM_EXIT;
01436           _EN->_update_event_status = UPDATE_EVENT_SUCCESS_NOACTION;
01437           break;
01438         } else {
01439           // URL hashed to local node, start processing.
01440           _state = USM_PROCESS_URL;
01441           break;
01442         }
01443       }
01444     case USM_PROCESS_URL:
01445       {
01446         ///////////////////////////////////
01447         // Dispatch to target handler
01448         ///////////////////////////////////
01449         int n;
01450         int scheme_len;
01451         const char *scheme;
01452         _state = USM_PROCESS_URL_COMPLETION;
01453         scheme = _EN->_URLhandle.scheme_get(&scheme_len);
01454         for (n = 0; n < N_SCHEMES; ++n) {
01455           if (scheme == *scheme_dispatch_table[n].scheme) {
01456             _EN->_scheme_index = n;
01457             if ((*scheme_dispatch_table[n].func) (this)) {
01458               break;            // Error in initiation
01459             }
01460             return EVENT_CONT;
01461           }
01462         }
01463         // Error in initiation or bad scheme.
01464 
01465         _state = USM_EXIT;
01466         _EN->_update_event_status = UPDATE_EVENT_FAILED;
01467         break;
01468       }
01469     case USM_PROCESS_URL_COMPLETION:
01470       {
01471         ///////////////////////////////////
01472         // Await URL update completion
01473         ///////////////////////////////////
01474         _state = USM_EXIT;
01475         _EN->_update_event_status = event;
01476         (*scheme_post_dispatch_table[_EN->_scheme_index].func) (this);
01477         break;
01478       }
01479     case USM_EXIT:
01480       {
01481         /////////////////////////////////////////////
01482         // Operation complete
01483         /////////////////////////////////////////////
01484         if ((_return_status == UPDATE_EVENT_FAILED)
01485             && (_retries < _CP->RetryCount())) {
01486 
01487           // Retry operation
01488 
01489           ++_retries;
01490           _state = USM_PROCESS_URL;
01491           eventProcessor.schedule_in(this, HRTIME_SECONDS(_CP->RetryInterval()), ET_CACHE);
01492           return EVENT_DONE;
01493 
01494         } else {
01495           MUTEX_TRY_LOCK(lock, _US->mutex, this_ethread());
01496           if (lock) {
01497             _US->handleEvent(EVENT_IMMEDIATE, (void *) _EN);
01498             delete this;
01499             return EVENT_DONE;
01500 
01501           } else {
01502             // Missed lock, try again later
01503             eventProcessor.schedule_in(this, HRTIME_MSECONDS(10), ET_CACHE);
01504             return EVENT_CONT;
01505           }
01506         }
01507       }
01508     }                           // End of switch
01509   }                             // End of while
01510 
01511   return EVENT_CONT;
01512 }
01513 
01514 struct dispatch_entry scheme_dispatch_table[UpdateSM::N_SCHEMES] = {
01515   {&URL_SCHEME_HTTP, UpdateSM::http_scheme},
01516 };
01517 
01518 struct dispatch_entry scheme_post_dispatch_table[UpdateSM::N_SCHEMES] = {
01519   {&URL_SCHEME_HTTP, UpdateSM::http_scheme_postproc},
01520 };
01521 
01522 int
01523 UpdateSM::http_scheme(UpdateSM * sm)
01524 {
01525   if (sm->_EN->_max_depth > 0) {
01526     ////////////////////////////////////
01527     // Recursive Update
01528     ////////////////////////////////////
01529     Debug("update", "Start recursive HTTP GET id: %d [%s]", sm->_EN->_id, sm->_EN->_url);
01530     sm->_EN->_indirect_list = new UpdateConfigList;
01531     RecursiveHttpGet *RHttpGet = new RecursiveHttpGet;
01532 
01533     RHttpGet->Init(sm, sm->_EN->_url, sm->_EN->_request_headers,
01534                    &sm->_EN->_URLhandle, sm->_EN->_http_hdr,
01535                    sm->_EN->_max_depth, sm->_EN->_indirect_list, &update_allowable_html_tags[0]);
01536   } else {
01537     ////////////////////////////////////
01538     // One URL update
01539     ////////////////////////////////////
01540     Debug("update", "Start HTTP GET id: %d [%s]", sm->_EN->_id, sm->_EN->_url);
01541     HttpUpdateSM *current_reader;
01542 
01543     current_reader = HttpUpdateSM::allocate();
01544     current_reader->init();
01545     // TODO: Do anything with the returned Action* ?
01546     current_reader->start_scheduled_update(sm, sm->_EN->_http_hdr);
01547   }
01548   return 0;
01549 }
01550 
01551 int
01552 UpdateSM::http_scheme_postproc(UpdateSM * sm)
01553 {
01554   // Map HttpUpdateSM return event code to internal status code
01555 
01556   switch (sm->_EN->_update_event_status) {
01557   case UPDATE_EVENT_SUCCESS:
01558   case UPDATE_EVENT_FAILED:
01559     // Returned only by RecursiveHttpGet
01560     sm->_return_status = sm->_EN->_update_event_status;
01561     break;
01562 
01563   case HTTP_SCH_UPDATE_EVENT_WRITTEN:
01564   case HTTP_SCH_UPDATE_EVENT_UPDATED:
01565   case HTTP_SCH_UPDATE_EVENT_DELETED:
01566   case HTTP_SCH_UPDATE_EVENT_NOT_CACHED:
01567   case HTTP_SCH_UPDATE_EVENT_NO_ACTION:
01568     sm->_EN->_update_event_status = UPDATE_EVENT_SUCCESS;
01569     sm->_return_status = UPDATE_EVENT_SUCCESS;
01570     break;
01571 
01572   case HTTP_SCH_UPDATE_EVENT_ERROR:
01573   default:
01574     sm->_EN->_update_event_status = UPDATE_EVENT_FAILED;
01575     sm->_return_status = UPDATE_EVENT_FAILED;
01576     break;
01577   }
01578   return 0;
01579 }
01580 
01581 /////////////////////////////////////////////////////////////////////////////
01582 // Class RecursiveHttpGet
01583 //      Generate URL list by recursively traversing non-terminal URL(s)
01584 //      up to the specified depth.
01585 /////////////////////////////////////////////////////////////////////////////
01586 char
01587   HtmlParser::default_zero_char = '\0';
01588 
01589 RecursiveHttpGet::RecursiveHttpGet()
01590 :Continuation(new_ProxyMutex()), _id(0), _caller_cont(0),
01591 _request_headers(0), _http_hdr(0), _recursion_depth(0), _OL(0), _group_link_head(0), _active_child_state_machines(0)
01592 {
01593   SET_HANDLER((RecursiveHttpGetContHandler)
01594               & RecursiveHttpGet::RecursiveHttpGetEvent);
01595 }
01596 
01597 RecursiveHttpGet::~RecursiveHttpGet()
01598 {
01599   _CL = NULL;
01600 }
01601 
01602 void
01603 RecursiveHttpGet::Init(Continuation * cont, char *url, char *request_headers,
01604                        URL * url_data, HTTPHdr * http_hdr, int recursion_depth,
01605                        Ptr<UpdateConfigList> L, struct html_tag *allowed_html_tags)
01606 {
01607   /////////////////////////////////////////////////////////////////////////
01608   // Note: URL and request header data pointers are assumed to be
01609   //       valid during the life of this class.
01610   /////////////////////////////////////////////////////////////////////////
01611   _id = ink_atomic_increment(&global_id, 1);
01612   _caller_cont = cont;
01613   _request_headers = request_headers;
01614   _url_data = url_data;
01615   _http_hdr = http_hdr;
01616   _recursion_depth = recursion_depth;
01617   _CL = L;
01618   _OL = ObjectReloadContAllocator.alloc();
01619   _OL->Init(this, url, strlen(url), _request_headers, (_request_headers ? strlen(_request_headers) : 0), 1, 1);
01620 
01621   html_parser.Init(url, allowed_html_tags);
01622 
01623   Debug("update", "Start recursive read rid: %d [%s]", _id, html_parser._url);
01624 }
01625 
01626 int
01627 RecursiveHttpGet::RecursiveHttpGetEvent(int event, Event * d)
01628 {
01629   char *url, *url_end;
01630   int status;
01631   UpdateEntry *ue;
01632   IOBufferReader *r = (IOBufferReader *) d;
01633 
01634   switch (event) {
01635   case NET_EVENT_OPEN_FAILED:
01636     {
01637       Debug("update", "RecursiveHttpGetEvent connect failed id: %d [%s]", _id, html_parser._url);
01638       break;
01639     }
01640   case VC_EVENT_ERROR:
01641     {
01642       Debug("update", "RecursiveHttpGetEvent connect event error id: %d [%s]", _id, html_parser._url);
01643       break;
01644     }
01645   case VC_EVENT_READ_READY:
01646   case VC_EVENT_READ_COMPLETE:
01647   case VC_EVENT_EOS:
01648     {
01649       while ((status = html_parser.ParseHtml(r, &url, &url_end))) {
01650         // Validate given URL.
01651 
01652         ue = new UpdateEntry;
01653         if (ue->ValidURL(url, url_end + 1 /* Point to null */ )) {
01654           delete ue;
01655           ue = NULL;
01656 
01657         } else {
01658           // Complete remaining UpdateEntry initializations
01659 
01660           ue->_request_headers = ats_strdup(_request_headers);
01661           ue->BuildHttpRequest();
01662           ue->Init(1);          // Derived URL
01663 
01664           // Discard remote URL(s)
01665           int ue_host_len;
01666           const char *ue_host = ue->_URLhandle.host_get(&ue_host_len);
01667           int url_host_len;
01668           const char *url_host = _url_data->host_get(&url_host_len);
01669 
01670           if (ue_host == NULL || url_host == NULL || ptr_len_casecmp(ue_host, ue_host_len, url_host, url_host_len)) {
01671             delete ue;
01672             ue = NULL;
01673             continue;
01674           }
01675           // I think we're generating the cache key just to get a hash of the URL.
01676           // Used to use Cache::generate_key that no longer works with vary_on_user_agent
01677           ue->_URLhandle.hash_get(&ue->_url_md5);
01678 
01679           if (_CL->HashAdd(ue)) {
01680             // Entry already exists
01681 
01682             delete ue;
01683             ue = NULL;
01684 
01685           } else {
01686             // Entry is unique and has been added to hash table.
01687             // Set terminal URL status and add to current
01688             // recursion level list.
01689 
01690             ue->SetTerminalStatus(((status < 0) ? 1 : 0));
01691             Debug("update", "Recursive find rid: %d id: %d %s\n [%s]",
01692                   _id, ue->_id, (ue->TerminalURL()? "T " : ""), ue->_url);
01693 
01694             if (_group_link_head) {
01695               ue->_group_link = _group_link_head;
01696               _group_link_head = ue;
01697             } else {
01698               _group_link_head = ue;
01699               ue->_group_link = NULL;
01700             }
01701           }
01702         }
01703       }
01704       ink_release_assert(r->read_avail() == 0);
01705 
01706       if ((event == VC_EVENT_READ_COMPLETE)
01707           || (event == VC_EVENT_EOS)) {
01708         break;
01709 
01710       } else {
01711         return EVENT_CONT;
01712       }
01713     }
01714   case UPDATE_EVENT_SUCCESS:
01715   case UPDATE_EVENT_FAILED:
01716     {
01717       // Child state machine completed.
01718 
01719       ink_release_assert(_active_child_state_machines > 0);
01720       _active_child_state_machines--;
01721       break;
01722     }
01723   default:
01724     {
01725       ink_release_assert(!"RecursiveHttpGetEvent invalid event");
01726       return EVENT_DONE;
01727 
01728     }                           // End of case
01729   }                             // End of switch
01730 
01731   if (_group_link_head) {
01732     // At this point, we have a list of valid terminal
01733     // and non-terminal URL(s).
01734     // Sequentially initiate the read on the non-terminal URL(s).
01735 
01736     while (_group_link_head) {
01737       ue = _group_link_head;
01738       _group_link_head = ue->_group_link;
01739 
01740       if (!ue->TerminalURL()) {
01741         if (_recursion_depth <= 1) {
01742           continue;
01743         }
01744 
01745         Debug("update", "(R) start non-terminal HTTP GET rid: %d id: %d [%s]", _id, ue->_id, ue->_url);
01746 
01747         _active_child_state_machines++;
01748         RecursiveHttpGet *RHttpGet = new RecursiveHttpGet();
01749         RHttpGet->Init(this, ue->_url, _request_headers,
01750                        _url_data, _http_hdr, (_recursion_depth - 1), _CL, &update_allowable_html_tags[0]);
01751         return EVENT_CONT;
01752 
01753       }
01754     }
01755   }
01756   // All child state machines have completed, tell our parent
01757   // and delete ourself.
01758 
01759   SET_HANDLER((RecursiveHttpGetContHandler)
01760               & RecursiveHttpGet::ExitEventHandler);
01761   handleEvent(EVENT_IMMEDIATE, 0);
01762   return EVENT_DONE;
01763 }
01764 
01765 int
01766 RecursiveHttpGet::ExitEventHandler(int event, Event * /* e ATS_UNUSED */)
01767 {
01768   switch (event) {
01769   case EVENT_IMMEDIATE:
01770   case EVENT_INTERVAL:
01771     {
01772       MUTEX_TRY_LOCK(lock, _caller_cont->mutex, this_ethread());
01773       if (lock) {
01774         Debug("update", "Exiting recursive read rid: %d [%s]", _id, html_parser._url);
01775         _caller_cont->handleEvent(UPDATE_EVENT_SUCCESS, 0);
01776         delete this;
01777 
01778       } else {
01779         // Lock miss, try again later.
01780         eventProcessor.schedule_in(this, HRTIME_MSECONDS(10));
01781       }
01782       break;
01783     }
01784   default:
01785     {
01786       ink_release_assert(!"RecursiveHttpGet::ExitEventHandler invalid event");
01787     }                           // End of case
01788   }                             // End of switch
01789 
01790   return EVENT_DONE;
01791 }
01792 
01793 int
01794 HtmlParser::ParseHtml(IOBufferReader * r, char **url, char **url_end)
01795 {
01796   int status;
01797   while (1) {
01798     if ((status = ScanHtmlForURL(r, url, url_end))) {
01799       status = ConstructURL(url, url_end);
01800       if (status)
01801         return status;
01802     } else {
01803       return 0;                 // No more bytes
01804     }
01805   }
01806 }
01807 
01808 int
01809 HtmlParser::ScanHtmlForURL(IOBufferReader * r, char **url, char **url_end)
01810 {
01811   unsigned char c;
01812   int n = 0;
01813 
01814   while (1) {
01815     switch (_scan_state) {
01816     case SCAN_INIT:
01817       {
01818         _tag.clear();
01819 
01820         _attr.clear();
01821         _attr_value.clear();
01822         _attr_value_hash_char_index = -1;
01823         _attr_value_quoted = 0;
01824         _attr_matched = false;
01825 
01826         _scan_state = SCAN_START;
01827         n = -1;
01828         break;
01829       }
01830     case SCAN_START:
01831       {
01832         while ((n = r->read((char *) &c, 1))) {
01833           if (c == '<') {
01834             _scan_state = FIND_TAG_START;
01835             break;
01836           }
01837         }
01838         break;
01839       }
01840     case FIND_TAG_START:
01841       {
01842         while ((n = r->read((char *) &c, 1))) {
01843           if (!isspace(c)) {
01844             if (c == '>') {
01845               ////////////////////////////////////////////////////
01846               // '< >' with >= 0 embedded spaces, ignore it.
01847               ////////////////////////////////////////////////////
01848               _scan_state = SCAN_INIT;
01849               break;
01850 
01851             } else {
01852               _tag(_tag.length()) = c;
01853               _scan_state = COPY_TAG;
01854               break;
01855             }
01856           }
01857         }
01858         break;
01859       }
01860     case COPY_TAG:
01861       {
01862         while ((n = r->read((char *) &c, 1))) {
01863           if (!isspace(c)) {
01864             if (c == '>') {
01865               /////////////////////////////
01866               // <tag>, ignore it
01867               /////////////////////////////
01868               _scan_state = SCAN_INIT;
01869               break;
01870 
01871             } else if (c == '=') {
01872               ///////////////////////////////
01873               // <tag=something>, ignore it
01874               ///////////////////////////////
01875               _scan_state = SCAN_INIT;
01876               break;
01877 
01878             } else {
01879               if (_tag.length() < MAX_TAG_NAME_LENGTH) {
01880                 _tag(_tag.length()) = c;
01881 
01882               } else {
01883                 ///////////////////////////////////
01884                 // Tag name to long, ignore it
01885                 ///////////////////////////////////
01886                 _scan_state = SCAN_INIT;
01887                 break;
01888               }
01889             }
01890 
01891           } else {
01892             _tag(_tag.length()) = 0;
01893             if (strcmp(_tag, HTML_COMMENT_TAG) == 0) {
01894               _scan_state = IGNORE_COMMENT_START;
01895             } else {
01896               _scan_state = FIND_ATTR_START;
01897             }
01898             break;
01899           }
01900         }
01901         break;
01902       }
01903     case IGNORE_COMMENT_START:
01904       {
01905         _comment_end_ptr = (char *) HTML_COMMENT_END;
01906         _scan_state = IGNORE_COMMENT;
01907         break;
01908       }
01909     case IGNORE_COMMENT:
01910       {
01911         while ((n = r->read((char *) &c, 1))) {
01912           if (!isspace(c)) {
01913             if (c == *_comment_end_ptr) {
01914               _comment_end_ptr++;
01915               if (!*_comment_end_ptr) {
01916                 _scan_state = SCAN_INIT;
01917                 break;
01918               }
01919             } else {
01920               _comment_end_ptr = (char *) HTML_COMMENT_END;
01921             }
01922           }
01923         }
01924         break;
01925       }
01926     case FIND_ATTR_START:
01927       {
01928         while ((n = r->read((char *) &c, 1))) {
01929           if (!isspace(c)) {
01930             if (c == '>') {
01931               ////////////////////////////////////////////////
01932               // <tag > with >=1 embedded spaces, ignore it
01933               ////////////////////////////////////////////////
01934               _scan_state = SCAN_INIT;
01935               break;
01936 
01937             } else if (c == '=') {
01938               //////////////////////////////////////////////////////////
01939               // <tag =something> with >=1 embedded spaces, ignore it
01940               //////////////////////////////////////////////////////////
01941               _scan_state = SCAN_INIT;
01942               break;
01943 
01944             } else {
01945               _attr(_attr.length()) = c;
01946               _scan_state = COPY_ATTR;
01947               break;
01948             }
01949           }
01950         }
01951         break;
01952       }
01953     case COPY_ATTR:
01954       {
01955         while ((n = r->read((char *) &c, 1))) {
01956           if (!isspace(c)) {
01957             if (c == '>') {
01958               /////////////////////////////
01959               // <tag attr>, ignore it
01960               /////////////////////////////
01961               _scan_state = SCAN_INIT;
01962               break;
01963 
01964             } else if (c == '=') {
01965               ///////////////////////////////
01966               // <tag attr=something>
01967               ///////////////////////////////
01968               _attr(_attr.length()) = 0;
01969               _scan_state = FIND_ATTR_VALUE_START;
01970               break;
01971 
01972             } else {
01973               if (_attr.length() < MAX_ATTR_NAME_LENGTH) {
01974                 _attr(_attr.length()) = c;
01975 
01976               } else {
01977                 ///////////////////////////////////
01978                 // Attr name to long, ignore it
01979                 ///////////////////////////////////
01980                 _scan_state = SCAN_INIT;
01981                 break;
01982               }
01983             }
01984 
01985           } else {
01986             _attr(_attr.length()) = 0;
01987             _scan_state = FIND_ATTR_VALUE_DELIMITER;
01988             break;
01989           }
01990         }
01991         break;
01992       }
01993     case FIND_ATTR_VALUE_DELIMITER:
01994       {
01995         while ((n = r->read((char *) &c, 1))) {
01996           if (isspace(c) || (c == '=')) {
01997             if (c == '=') {
01998               _scan_state = FIND_ATTR_VALUE_START;
01999               break;
02000             }
02001           } else {
02002             _scan_state = SCAN_INIT;
02003             break;
02004           }
02005         }
02006         break;
02007       }
02008     case FIND_ATTR_VALUE_START:
02009       {
02010         while ((n = r->read((char *) &c, 1))) {
02011           if (!isspace(c)) {
02012             if (c == '>') {
02013               /////////////////////////////
02014               // <tag attr= >, ignore
02015               /////////////////////////////
02016               _scan_state = SCAN_INIT;
02017               break;
02018 
02019             } else if ((c == '\'') || (c == '\"')) {
02020               _attr_value_quoted = c;
02021               _scan_state = COPY_ATTR_VALUE;
02022               break;
02023 
02024             } else {
02025               _attr_value_quoted = 0;
02026               _attr_value(_attr_value.length()) = c;
02027               _scan_state = COPY_ATTR_VALUE;
02028               break;
02029             }
02030           }
02031         }
02032         break;
02033       }
02034     case COPY_ATTR_VALUE:
02035       {
02036         while ((n = r->read((char *) &c, 1))) {
02037           if (_attr_value_quoted) {
02038             if (c == _attr_value_quoted) {
02039               ///////////////////////////////////////////
02040               // We have a complete <tag attr='value'
02041               ///////////////////////////////////////////
02042               _attr_value(_attr_value.length()) = 0;
02043               _scan_state = VALIDATE_ENTRY;
02044               break;
02045 
02046             } else if (c == '\n') {
02047               _scan_state = TERMINATE_COPY_ATTR_VALUE;
02048               break;
02049             } else {
02050               _attr_value(_attr_value.length()) = c;
02051               if (c == '#') {
02052                 _attr_value_hash_char_index = _attr_value.length() - 1;
02053               }
02054             }
02055 
02056           } else {
02057             if (isspace(c)) {
02058               ///////////////////////////////////////////
02059               // We have a complete <tag attr=value
02060               ///////////////////////////////////////////
02061               _attr_value(_attr_value.length()) = 0;
02062               _scan_state = VALIDATE_ENTRY;
02063               break;
02064 
02065             } else if (c == '>') {
02066               /////////////////////////////////////////
02067               // We have a complete <tag attr=value>
02068               /////////////////////////////////////////
02069               _attr_value(_attr_value.length()) = 0;
02070               _scan_state = VALIDATE_ENTRY_RESTART;
02071               break;
02072 
02073             } else {
02074               _attr_value(_attr_value.length()) = c;
02075               if (c == '#') {
02076                 _attr_value_hash_char_index = _attr_value.length() - 1;
02077               }
02078             }
02079           }
02080         }
02081         break;
02082       }
02083     case VALIDATE_ENTRY:
02084     case VALIDATE_ENTRY_RESTART:
02085       {
02086         if (_scan_state == VALIDATE_ENTRY) {
02087           _scan_state = RESUME_ATTR_VALUE_SCAN;
02088         } else {
02089           _scan_state = SCAN_INIT;
02090         }
02091         if (AllowTagAttrValue()) {
02092           if (ExtractURL(url, url_end)) {
02093             return 1;           // valid URL
02094           }
02095         }
02096         break;                  // resume scan
02097       }
02098     case RESUME_ATTR_VALUE_SCAN:
02099       {
02100         _attr.clear();
02101         _attr_value.clear();
02102         _attr_value_hash_char_index = -1;
02103         _attr_value_quoted = 0;
02104 
02105         _scan_state = FIND_ATTR_START;
02106         n = -2;
02107         break;
02108       }
02109     case TERMINATE_COPY_ATTR_VALUE:
02110       {
02111         while ((n = r->read((char *) &c, 1))) {
02112           if (c == _attr_value_quoted) {
02113             _scan_state = RESUME_ATTR_VALUE_SCAN;
02114             break;
02115           }
02116         }
02117         break;
02118       }
02119     default:
02120       {
02121         ink_release_assert(!"HtmlParser::ScanHtmlForURL bad state");
02122       }
02123     }                           // end of switch
02124 
02125     if (n == 0) {
02126       return 0;                 // No more data
02127     }
02128 
02129   }                             // end of while
02130 }
02131 
02132 int
02133 HtmlParser::AllowTagAttrValue()
02134 {
02135   struct html_tag *p_tag = allowable_html_tags;
02136   struct html_tag *p_attr = allowable_html_attrs;
02137 
02138   if (!_tag || !_attr)
02139     return 0;
02140 
02141   while (p_tag->tag && p_tag->attr) {
02142     if (!strcasecmp(_tag, p_tag->tag)
02143         && !strcasecmp(_attr, p_tag->attr)) {
02144       if (p_attr == NULL || p_attr->tag == NULL)
02145         return 1;
02146       else if (_attr_matched) {
02147         return 1;
02148       } else {
02149         // attributes don't match
02150         return 0;
02151       }
02152     } else {
02153       if (p_attr && p_attr->tag && p_attr->attr && _attr_value.length() > 0) {
02154         if (!strcasecmp(_attr, p_attr->tag)
02155             && !strcasecmp(_attr_value, p_attr->attr)) {
02156           _attr_matched = true;
02157         }
02158       }
02159       p_tag++;
02160       if (p_attr)
02161         p_attr++;
02162     }
02163   }
02164   return 0;
02165 }
02166 
02167 int
02168 HtmlParser::ValidProtoScheme(char *p)
02169 {
02170   int n;
02171   for (n = 0; proto_schemes[n].tag; ++n) {
02172     if (!strncasecmp(p, proto_schemes[n].tag, proto_schemes[n].tag_len)) {
02173       return 1;
02174     }
02175   }
02176   return 0;
02177 }
02178 
02179 int
02180 HtmlParser::ValidSupportedProtoScheme(char *p)
02181 {
02182   int n;
02183   for (n = 0; supported_proto_schemes[n].tag; ++n) {
02184     if (!strncasecmp(p, supported_proto_schemes[n].tag, supported_proto_schemes[n].tag_len)) {
02185       return 1;
02186     }
02187   }
02188   return 0;
02189 }
02190 
02191 int
02192 HtmlParser::ExtractURL(char **url, char **url_end)
02193 {
02194   intptr_t n;
02195 
02196   // '#' considerations
02197   if (_attr_value_hash_char_index >= 0) {
02198     if (!_attr_value_hash_char_index) {
02199       return 0;                 // No URL
02200 
02201     } else {
02202       // '#' terminates _attr_value
02203       _attr_value.set_length(_attr_value_hash_char_index + 1);
02204       _attr_value[_attr_value_hash_char_index] = 0;
02205     }
02206   }
02207 
02208   if (!strcasecmp(_tag, "base") && !strcasecmp(_attr, "href")) {
02209     if (_html_doc_base) {
02210       _html_doc_base.clear();
02211     }
02212     for (n = 0; n < _attr_value.length(); ++n) {
02213       _html_doc_base(_html_doc_base.length()) = _attr_value[n];
02214     }
02215     _html_doc_base(_html_doc_base.length()) = 0;
02216     return 0;                   // No URL
02217 
02218   } else if (!strcasecmp(_tag, "meta") && !strcasecmp(_attr, "content")) {
02219     /////////////////////////////////////////////////////////////////
02220     // General form:
02221     //      <META HTTP-EQUIV=Refresh CONTENT="0; URL=index.html">
02222     /////////////////////////////////////////////////////////////////
02223     if (_attr_value.length()) {
02224       // Locate start of URL
02225       for (n = 0; n < _attr_value.length(); ++n) {
02226         if (!ParseRules::is_digit((unsigned char) _attr_value[n])) {
02227           break;
02228         }
02229       }
02230       if ((n < _attr_value.length()) && (((unsigned char) _attr_value[n]) == ';')) {
02231 
02232         for (; n < _attr_value.length(); ++n) {
02233           if (!isspace((unsigned char) _attr_value[n])) {
02234             break;
02235           }
02236         }
02237         if ((n < _attr_value.length()) && (!strncasecmp(&_attr_value[n], "URL=", 4))) {
02238           n += 4;
02239           if ((n < _attr_value.length())
02240               && ((_attr_value.length() - n) > 1)) {
02241             *url = &_attr_value[n];
02242             *url_end = &_attr_value[_attr_value.length() - 2];
02243             return 1;
02244           }
02245         }
02246       }
02247       return 0;                 // No URL
02248 
02249     } else {
02250       return 0;                 // No URL
02251     }
02252   }
02253 
02254   if (_attr_value.length() > 1) {
02255     *url = &_attr_value[(intptr_t)0];
02256     *url_end = &_attr_value[_attr_value.length() - 2];
02257     return 1;
02258 
02259   } else {
02260     return 0;                   // No URL
02261   }
02262 }
02263 
02264 int
02265 HtmlParser::ConstructURL(char **url, char **url_end)
02266 {
02267   unsigned char *p_url = (unsigned char *) *url;
02268   unsigned char *p_url_end = (unsigned char *) *url_end;
02269 
02270   /////////////////////////////////////////////////////////////////////
02271   // Handle the <a href="[spaces]URI"> case by skipping over spaces
02272   /////////////////////////////////////////////////////////////////////
02273   while (p_url < p_url_end) {
02274     if (isspace(*p_url)) {
02275       ++p_url;
02276     } else {
02277       break;
02278     }
02279   }
02280 
02281   ////////////////////////////////////////////////////
02282   // Determine if we have a relative or absolute URI
02283   ////////////////////////////////////////////////////
02284   int relative_URL = 0;
02285   int http_needed = 0;
02286   if (ValidProtoScheme((char *) p_url)) {
02287     if (!strncasecmp((char *) p_url, "http:", 5)
02288         && (strncasecmp((char *) p_url, "http://", 7) != 0)) {
02289 
02290       //////////////////////////////////////////////////////////
02291       // Bad relative URI references of the form http:URL.
02292       // Skip over the "http:" part.
02293       //////////////////////////////////////////////////////////
02294       p_url += strlen("http:");
02295       if (p_url > p_url_end) {
02296         return 0;               // Invalid URL
02297       }
02298       relative_URL = 1;
02299     }
02300   } else {
02301     relative_URL = 1;
02302     // problem found with www.slashdot.com
02303     if (strncasecmp((char *) p_url, "//", 2) == 0)
02304       http_needed = 1;
02305   }
02306 
02307   //////////////////////////////////////////////
02308   // Only handle supported protocol schemes
02309   //////////////////////////////////////////////
02310   if (!relative_URL && !ValidSupportedProtoScheme((char *) p_url)) {
02311     return 0;                   // Invalid URL
02312   }
02313 
02314   if (relative_URL) {
02315     ////////////////////////////////////
02316     // Compute document base path
02317     ////////////////////////////////////
02318     DynArray<char>*base = 0;
02319     DynArray<char>*absolute_url = 0;
02320 
02321     if (http_needed) {
02322       absolute_url = PrependString("http:", 5, (char *) p_url, (p_url_end - p_url + 2));
02323     } else if (_html_doc_base.length()) {
02324       ///////////////////////////////////////////////////////////////
02325       // Document base specified via <base href="...">
02326       ///////////////////////////////////////////////////////////////
02327       base = MakeURL(_url, _html_doc_base, _html_doc_base.length(), !ValidProtoScheme(_html_doc_base));
02328       absolute_url = MakeURL(*base, (char *) p_url, (p_url_end - p_url + 2), 1);
02329     } else {
02330       absolute_url = MakeURL(_url, (char *) p_url, (p_url_end - p_url + 2), 1);
02331     }
02332     _result.clear();
02333     _result = *absolute_url;
02334     absolute_url->detach();
02335 
02336     // fix INKqa07208; need to reclaim memory
02337     delete absolute_url;
02338     if (base)
02339       delete base;
02340 
02341     *url = &_result[(intptr_t)0];
02342     *url_end = &_result[_result.length() - 3];  // -1 (real len)
02343     // -1 (skip null)
02344     // -1 (zero base)
02345   } else {
02346     *url = (char *) p_url;
02347     *url_end = (char *) p_url_end;
02348   }
02349 
02350   //////////////////////////////////////////////////////////////////
02351   // Determine if we have a terminal or non-terminal URL.
02352   // URL ending with '/', .htm or .html is considered non-terminal.
02353   //    Return < 0 ==> Terminal URL
02354   //    Return > 0 ==> Non terminal URL
02355   //////////////////////////////////////////////////////////////////
02356   if (!strncasecmp((char *) (p_url_end - 4), ".html", 5)
02357       || !strncasecmp((char *) (p_url_end - 3), ".htm", 4)
02358       || !strncasecmp((char *) (p_url_end), "/", 1)) {
02359     return 1;                   // Non-terminal URL
02360   } else {
02361     return -1;                  // Terminal URL
02362   }
02363 }
02364 
02365 DynArray<char>*
02366 HtmlParser::MakeURL(char *url, char *sub, int subsize, int relative_url)
02367 {
02368   int i, n;
02369   int skip_slashslash;
02370 
02371   DynArray<char>*result = new DynArray<char>(&default_zero_char, 128);
02372 
02373   if (relative_url) {
02374     if (*sub != '/') {
02375 
02376       int url_len = strlen(url);
02377 
02378       // Locate last '/' in url
02379       for (i = url_len; i && url[i] != '/'; i--);
02380 
02381       if (i && (url[i] == url[i - 1])) {
02382         // http://hostname case with no terminating '/'
02383 
02384         for (n = 0; n < url_len; ++n) {
02385           (*result) (result->length()) = url[n];
02386         }
02387         (*result) (result->length()) = '/';
02388 
02389       } else {
02390         for (n = 0; n < (i + 1); ++n) {
02391           (*result) (result->length()) = url[n];
02392         }
02393       }
02394 
02395       for (n = 0; n < subsize; ++n) {
02396         (*result) (result->length()) = sub[n];
02397       }
02398       (*result) (result->length()) = '\0';
02399 
02400     } else {
02401       i = 0;
02402       do {
02403         // Locate leading '/'
02404         for (; url[i] && url[i] != '/'; i++);
02405 
02406         if (!url[i]) {
02407           break;
02408         }
02409         // Skip over '<scheme>://'
02410         skip_slashslash = ((url[i] == url[i + 1]) && (url[i + 1] == '/'));
02411 
02412         if (skip_slashslash) {
02413           i += 2;
02414         }
02415       } while (skip_slashslash);
02416 
02417       for (n = 0; n < (i - 1); ++n) {
02418         (*result) (result->length()) = url[n];
02419       }
02420 
02421       if (url[n] != '/') {
02422         (*result) (result->length()) = url[n];
02423       }
02424 
02425       for (n = 0; n < subsize; ++n) {
02426         (*result) (result->length()) = sub[n];
02427       }
02428       (*result) (result->length()) = '\0';
02429     }
02430 
02431   } else {
02432     for (n = 0; n < subsize; ++n) {
02433       (*result) (result->length()) = sub[n];
02434     }
02435     (*result) (result->length()) = '\0';
02436   }
02437   return result;
02438 }
02439 
02440 DynArray<char>*
02441 HtmlParser::PrependString(const char *pre, int presize, char *sub, int subsize)
02442 {
02443   int n;
02444 
02445   DynArray<char>*result = new DynArray<char>(&default_zero_char, 128);
02446 
02447   for (n = 0; n < presize; ++n) {
02448     (*result) (result->length()) = pre[n];
02449   }
02450   for (n = 0; n < subsize; ++n) {
02451     (*result) (result->length()) = sub[n];
02452   }
02453   (*result) (result->length()) = '\0';
02454 
02455   return result;
02456 }
02457 
02458 ///////////////////////////////////////////////////////////////////
02459 // Class ObjectReloadCont
02460 //      Background load URL into local cache
02461 ///////////////////////////////////////////////////////////////////
02462 ClassAllocator<ObjectReloadCont> ObjectReloadContAllocator("ObjectReloadCont");
02463 
02464 ObjectReloadCont::ObjectReloadCont():Continuation(0),
02465 _caller_cont(0), _request_id(0), _send_data(0),
02466 _receive_data(0), _start_event(0),
02467 _state(START), _cur_action(0), _netvc(0), _write_vio(0), _read_vio(0), _read_event_callback(0)
02468 {
02469   SET_HANDLER((ObjectReloadContHandler) & ObjectReloadCont::ObjectReloadEvent);
02470 }
02471 
02472 ObjectReloadCont::~ObjectReloadCont()
02473 {
02474 }
02475 
02476 void
02477 ObjectReloadCont::Init(Continuation * cont, char *url, int url_len,
02478                        char *headers, int headers_len, int http_case, int read_event_callback)
02479 {
02480   int total_len;
02481 
02482   mutex = new_ProxyMutex();
02483   _caller_cont = cont;
02484   _request_id = ink_atomic_increment(&global_id, 1);
02485   _read_event_callback = read_event_callback;
02486 
02487   // Setup send data buffer by prepending the HTTP GET method to the
02488   // given NULL terminated URL and terminating with HTTP version
02489 
02490   if (http_case) {
02491     if (headers_len) {
02492       total_len = len_GET_METHOD + url_len + len_HTTP_VERSION + len_TERMINATOR + headers_len + len_REQUEST_TERMINATOR;
02493     } else {
02494       total_len = len_GET_METHOD + url_len + len_HTTP_VERSION + len_REQUEST_TERMINATOR;
02495     }
02496     _send_data = new_MIOBuffer(buffer_size_to_index(total_len + 1));    // allow for NULL
02497 
02498     memcpy(_send_data->end(), GET_METHOD, len_GET_METHOD);
02499     memcpy(&(_send_data->end())[len_GET_METHOD], url, url_len);
02500     memcpy(&(_send_data->end())[len_GET_METHOD + url_len], HTTP_VERSION, len_HTTP_VERSION);
02501 
02502     if (headers_len) {
02503       memcpy(&(_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION], TERMINATOR, len_TERMINATOR);
02504       memcpy(&(_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION + len_TERMINATOR], headers, headers_len);
02505       memcpy(&(_send_data->end())[len_GET_METHOD + url_len +
02506                                   len_HTTP_VERSION + len_TERMINATOR +
02507                                   headers_len], REQUEST_TERMINATOR, len_REQUEST_TERMINATOR);
02508 
02509       // Add NULL for Debug URL output
02510       (_send_data->end())[len_GET_METHOD + url_len +
02511                           len_HTTP_VERSION + len_TERMINATOR + headers_len + len_REQUEST_TERMINATOR] = 0;
02512     } else {
02513       memcpy(&(_send_data->end())[len_GET_METHOD + url_len +
02514                                   len_HTTP_VERSION], REQUEST_TERMINATOR, len_REQUEST_TERMINATOR);
02515 
02516       // Add NULL for Debug URL output
02517       (_send_data->end())[len_GET_METHOD + url_len + len_HTTP_VERSION + len_REQUEST_TERMINATOR] = 0;
02518     }
02519     _send_data->fill(total_len);
02520 
02521   } else {
02522     // Unhandled case... TODO: Do we need to actually handle this?
02523     ink_assert(false);
02524   }
02525   handleEvent(EVENT_IMMEDIATE, (void *) NULL);
02526 }
02527 
02528 void
02529 ObjectReloadCont::free()
02530 {
02531   mutex = 0;
02532   if (_send_data) {
02533     free_MIOBuffer(_send_data);
02534     _send_data = 0;
02535   }
02536   if (_receive_data) {
02537     free_MIOBuffer(_receive_data);
02538     _receive_data = 0;
02539   }
02540 }
02541 
02542 int
02543 ObjectReloadCont::ObjectReloadEvent(int event, void *d)
02544 {
02545   switch (_state) {
02546   case START:
02547     {
02548       IpEndpoint target;
02549       // Schedule connect to localhost:<proxy port>
02550       Debug("update-reload", "Connect start id=%d", _request_id);
02551       _state = ObjectReloadCont::ATTEMPT_CONNECT;
02552       MUTEX_TRY_LOCK(lock, this->mutex, this_ethread());
02553       ink_release_assert(lock);
02554       target.setToLoopback(AF_INET);
02555       target.port() = htons(HttpProxyPort::findHttp(AF_INET)->m_port);
02556       _cur_action = netProcessor.connect_re(this, &target.sa);
02557       return EVENT_DONE;
02558     }
02559   case ATTEMPT_CONNECT:
02560     {
02561       if (event != NET_EVENT_OPEN) {
02562         // Connect error, terminate processing
02563         Debug("update-reload", "Connect fail id=%d", _request_id);
02564         CallBackUser(event, 0);
02565         free();
02566         ObjectReloadContAllocator.free(this);
02567         return EVENT_DONE;
02568       }
02569       _netvc = (class NetVConnection *) d;
02570 
02571       // Start URL write
02572       Debug("update-reload", "Write start id=%d [%s]", _request_id, _send_data->start());
02573       _state = ObjectReloadCont::WRITING_URL;
02574       IOBufferReader *r = _send_data->alloc_reader();
02575       _write_vio = _netvc->do_io_write(this, r->read_avail(), r);
02576       return EVENT_DONE;
02577     }
02578   case WRITING_URL:
02579     {
02580       ink_release_assert(_write_vio == (VIO *) d);
02581       if (event == VC_EVENT_WRITE_READY) {
02582         _write_vio->reenable();
02583         return EVENT_DONE;
02584       } else if (event == VC_EVENT_WRITE_COMPLETE) {
02585         // Write successful, start read
02586         Debug("update-reload", "Read start id=%d", _request_id);
02587         _state = ObjectReloadCont::READING_DATA;
02588         _receive_data = new_MIOBuffer(max_iobuffer_size);
02589         _receive_data_reader = _receive_data->alloc_reader();
02590         _read_vio = _netvc->do_io_read(this, INT64_MAX, _receive_data);
02591         return EVENT_DONE;
02592       } else {
02593         // Write error, terminate processing
02594         Debug("update-reload", "Write fail id=%d", _request_id);
02595         _netvc->do_io(VIO::CLOSE);
02596         CallBackUser(event, 0);
02597         free();
02598         ObjectReloadContAllocator.free(this);
02599         return EVENT_DONE;
02600       }
02601     }
02602   case READING_DATA:
02603     {
02604       ink_release_assert(_read_vio == (VIO *) d);
02605       switch (event) {
02606       case VC_EVENT_READ_READY:
02607         {
02608           if (_read_event_callback) {
02609             _caller_cont->handleEvent(event, _receive_data_reader);
02610 
02611           } else {
02612             int64_t read_bytes = _receive_data_reader->read_avail();
02613             _receive_data_reader->consume(read_bytes);
02614             _read_vio->reenable();
02615           }
02616           return EVENT_CONT;
02617         }
02618       case VC_EVENT_READ_COMPLETE:
02619       case VC_EVENT_EOS:
02620         {
02621           if (_read_event_callback) {
02622             _caller_cont->handleEvent(event, _receive_data_reader);
02623           }
02624           // Object injected into local cache
02625           Debug("update-reload", "Fill success id=%d", _request_id);
02626           break;
02627         }
02628       default:
02629         {
02630           Debug("update-reload", "Fill read fail id=%d", _request_id);
02631           CallBackUser(event, 0);
02632           break;
02633         }
02634       }                         // End of switch
02635 
02636       _netvc->do_io(VIO::CLOSE);
02637       free();
02638       ObjectReloadContAllocator.free(this);
02639       return EVENT_DONE;
02640     }
02641   default:
02642     {
02643       ink_release_assert(!"ObjectReloadEvent invalid state");
02644     }
02645 
02646   }                             // End of switch
02647   return 0;
02648 }
02649 
02650 int
02651 ObjectReloadCont::CallBackUser(int event, void *d)
02652 {
02653   _caller_cont->handleEvent(event, d);
02654   return 0;
02655 }
02656 
02657 // End of Update.cc

Generated by  doxygen 1.7.1