00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include "P_EventSystem.h"
00033 #include "URL.h"
00034 #include "HTTP.h"
00035
00036 #ifndef _Update_h_
00037 #define _Update_h_
00038
00039
00040
00041
00042 #define UPDATE_EVENT_SUCCESS (UPDATE_EVENT_EVENTS_START+0)
00043 #define UPDATE_EVENT_SUCCESS_NOACTION (UPDATE_EVENT_EVENTS_START+1)
00044 #define UPDATE_EVENT_FAILED (UPDATE_EVENT_EVENTS_START+2)
00045
00046 #define MAX_UPDATE_EVENT UPDATE_EVENT_FAILED
00047
00048 #define IS_UPDATE_EVENT(_e) \
00049 ( ( ((_e) >= UPDATE_EVENT_EVENTS_START) && \
00050 ((_e) <= MAX_UPDATE_EVENT) ) ? 1 : 0 )
00051
00052 extern RecRawStatBlock *update_rsb;
00053
00054 enum
00055 {
00056 update_successes_stat,
00057 update_no_actions_stat,
00058 update_fails_stat,
00059 update_unknown_status_stat,
00060 update_state_machines_stat,
00061
00062 update_stat_count
00063 };
00064
00065
00066
00067
00068 class UpdateConfigParams:public RefCountObj
00069 {
00070 public:
00071 UpdateConfigParams();
00072 UpdateConfigParams(UpdateConfigParams &);
00073 ~UpdateConfigParams();
00074 UpdateConfigParams & operator=(UpdateConfigParams &);
00075 int operator==(UpdateConfigParams &);
00076
00077 int IsEnabled()
00078 {
00079 return _enabled;
00080 }
00081 int ImmediateUpdate()
00082 {
00083 return _immediate_update;
00084 }
00085 int RetryCount()
00086 {
00087 return _retry_count;
00088 }
00089 int RetryInterval()
00090 {
00091 return _retry_interval;
00092 }
00093 int ConcurrentUpdates()
00094 {
00095 return _concurrent_updates;
00096 }
00097 int MaxUpdateSM()
00098 {
00099 return _max_update_state_machines;
00100 }
00101 int MaxMemoryUsageMB()
00102 {
00103 return _memory_use_in_mb;
00104 }
00105
00106 public:
00107 RecInt _enabled;
00108 RecInt _immediate_update;
00109 RecInt _retry_count;
00110 RecInt _retry_interval;
00111 RecInt _concurrent_updates;
00112 RecInt _max_update_state_machines;
00113 RecInt _memory_use_in_mb;
00114 };
00115
00116
00117
00118
00119 class UpdateConfigList;
00120
00121 class UpdateEntry
00122 {
00123 public:
00124 UpdateEntry();
00125 ~UpdateEntry();
00126
00127 enum
00128 {
00129 MIN_OFFSET_HOUR = 0, MAX_OFFSET_HOUR = 23,
00130 MIN_INTERVAL = 0, MAX_INTERVAL = 86400,
00131 MIN_DEPTH = 0, MAX_DEPTH = 128,
00132 SECONDS_PER_HOUR = 3600,
00133 SECONDS_PER_MIN = 60
00134 };
00135
00136 void Init(int derived_url = 0);
00137 int ValidURL(char *, char *);
00138 int ValidHeaders(char *);
00139 int BuildHttpRequest();
00140 int ValidHeaderNameChar(char);
00141 int ValidSeparatorChar(char);
00142 int ValidHour(char *);
00143 int ValidInterval(char *);
00144 int ValidDepth(char *);
00145 int TerminalURL();
00146 void SetTerminalStatus(int);
00147
00148 void ComputeScheduleTime();
00149 int ScheduleNow(time_t);
00150
00151 public:
00152 LINK(UpdateEntry, link);
00153 UpdateEntry *_group_link;
00154 UpdateEntry *_hash_link;
00155
00156 public:
00157
00158
00159
00160 int _id;
00161 char *_url;
00162 URL _URLhandle;
00163 INK_MD5 _url_md5;
00164 int _terminal_url;
00165
00166
00167
00168
00169 char *_request_headers;
00170 int _num_request_headers;
00171 HTTPHdr *_http_hdr;
00172 HTTPParser _http_parser;
00173
00174
00175
00176
00177 int _offset_hour;
00178 int _interval;
00179 int _max_depth;
00180
00181
00182
00183
00184 time_t _start_time;
00185 int _expired;
00186
00187 int _scheme_index;
00188 int _update_event_status;
00189
00190 Ptr<UpdateConfigList> _indirect_list;
00191 };
00192
00193
00194
00195
00196 class UpdateConfigList:public RefCountObj
00197 {
00198 public:
00199 UpdateConfigList();
00200 ~UpdateConfigList();
00201 void Add(UpdateEntry *);
00202 int HashAdd(UpdateEntry *);
00203 UpdateEntry *Remove();
00204 void AddPending(UpdateEntry *);
00205 UpdateEntry *RemovePending();
00206 int Entries()
00207 {
00208 return _entry_q_elements;
00209 }
00210 int PendingEntries()
00211 {
00212 return _pending_q_elements;
00213 }
00214
00215 public:
00216 enum
00217 {
00218 HASH_TABLE_SIZE = 4096
00219 };
00220 int _entry_q_elements;
00221 Queue<UpdateEntry> _entry_q;
00222 int _pending_q_elements;
00223 Queue<UpdateEntry> _pending_q;
00224 UpdateEntry **_hash_table;
00225 };
00226
00227
00228
00229
00230 class UpdateConfigManager;
00231 class UpdateScheduler;
00232
00233 class UpdateManager
00234 {
00235 public:
00236 UpdateManager();
00237 ~UpdateManager();
00238 int start();
00239
00240 private:
00241 UpdateConfigManager * _CM;
00242 UpdateScheduler *_SCH;
00243 };
00244
00245 extern UpdateManager updateManager;
00246
00247
00248
00249
00250
00251 typedef int (UpdateConfigManager::*UpdateConfigManagerContHandler) (int, void *);
00252
00253 class UpdateConfigManager:public Continuation
00254 {
00255 public:
00256 UpdateConfigManager();
00257 ~UpdateConfigManager();
00258 int init();
00259 int GetConfigParams(Ptr<UpdateConfigParams> *);
00260 int GetConfigList(Ptr<UpdateConfigList> *);
00261
00262 static int URL_list_update_callout(const char *name, RecDataT data_type, RecData data, void *cookie);
00263
00264 void SetFileName(char *f)
00265 {
00266 _filename = f;
00267 }
00268 char *GetFileName()
00269 {
00270 return _filename;
00271 }
00272
00273 int ProcessUpdate(int event, Event * e);
00274 UpdateConfigList *BuildUpdateList();
00275 UpdateConfigList *ParseConfigFile(int);
00276 int GetDataLine(int, int, char *, int, int);
00277
00278 private:
00279 Event * _periodic_event;
00280 char *_filename;
00281 Ptr<UpdateConfigParams> _CP;
00282 Ptr<UpdateConfigParams> _CP_actual;
00283 Ptr<UpdateConfigList> _CL;
00284 };
00285
00286
00287
00288
00289 typedef int (UpdateScheduler::*UpdateSchedulerContHandler) (int, void *);
00290
00291 class UpdateScheduler:public Continuation
00292 {
00293 public:
00294 UpdateScheduler(UpdateConfigManager * cm = NULL);
00295 ~UpdateScheduler();
00296 int Init();
00297 int Init(UpdateScheduler *, UpdateEntry *, Ptr<UpdateConfigParams>);
00298
00299 int ScheduleEvent(int, void *);
00300 int Schedule(UpdateEntry * e = NULL);
00301 int ChildExitEventHandler(int, Event *);
00302
00303 private:
00304 Event * _periodic_event;
00305 int _recursive_update;
00306 UpdateConfigManager *_CM;
00307 Ptr<UpdateConfigParams> _CP;
00308 Ptr<UpdateConfigList> _CL;
00309 int _schedule_event_callbacks;
00310 int _update_state_machines;
00311
00312 UpdateEntry *_base_EN;
00313
00314 UpdateScheduler *_parent_US;
00315 };
00316
00317
00318
00319
00320 class UpdateSM;
00321 typedef int (UpdateSM::*UpdateSMContHandler) (int, void *);
00322
00323 class UpdateSM:public Continuation
00324 {
00325 public:
00326 enum state_t
00327 {
00328 USM_INIT = 1,
00329 USM_PROCESS_URL,
00330 USM_PROCESS_URL_COMPLETION,
00331 USM_EXIT
00332 };
00333
00334 enum
00335 {
00336 N_SCHEMES = 1
00337 };
00338
00339 static int http_scheme(UpdateSM *);
00340 static int http_scheme_postproc(UpdateSM *);
00341
00342 UpdateSM(UpdateScheduler *, Ptr<UpdateConfigParams>, UpdateEntry *);
00343 ~UpdateSM();
00344 void Start();
00345 int HandleSMEvent(int, Event *);
00346
00347 public:
00348 UpdateEntry * _EN;
00349
00350 private:
00351 UpdateScheduler * _US;
00352 Ptr<UpdateConfigParams> _CP;
00353 state_t _state;
00354 int _return_status;
00355 int _retries;
00356 };
00357
00358 struct dispatch_entry
00359 {
00360 const char **scheme;
00361 int (*func) (UpdateSM *);
00362 };
00363
00364 extern struct dispatch_entry scheme_dispatch_table[UpdateSM::N_SCHEMES];
00365 extern struct dispatch_entry scheme_post_dispatch_table[UpdateSM::N_SCHEMES];
00366
00367 struct html_tag
00368 {
00369 const char *tag;
00370 const char *attr;
00371 };
00372
00373
00374
00375
00376
00377 class ObjectReloadCont;
00378 class RecursiveHttpGet;
00379
00380 typedef int (RecursiveHttpGet::*RecursiveHttpGetContHandler) (int, Event *);
00381
00382 class HtmlParser
00383 {
00384
00385 public:
00386 static char default_zero_char;
00387
00388 enum scan_state_t
00389 {
00390 SCAN_INIT = 1,
00391 SCAN_START,
00392 FIND_TAG_START,
00393 COPY_TAG,
00394 IGNORE_COMMENT_START,
00395 IGNORE_COMMENT,
00396 FIND_ATTR_START,
00397 COPY_ATTR,
00398 FIND_ATTR_VALUE_DELIMITER,
00399 FIND_ATTR_VALUE_START,
00400 COPY_ATTR_VALUE,
00401 VALIDATE_ENTRY,
00402 VALIDATE_ENTRY_RESTART,
00403 RESUME_ATTR_VALUE_SCAN,
00404 TERMINATE_COPY_ATTR_VALUE
00405 };
00406
00407 enum
00408 {
00409 MAX_TAG_NAME_LENGTH = 1024,
00410 MAX_ATTR_NAME_LENGTH = 1024
00411 };
00412
00413 HtmlParser()
00414 : _attr_matched(false), _url(0), _comment_end_ptr(0), _scan_state(SCAN_INIT),
00415 _tag(&default_zero_char, 32), _attr(&default_zero_char, 32),
00416 _attr_value(&default_zero_char, 32),
00417 _attr_value_hash_char_index(-1), _attr_value_quoted(0),
00418 _html_doc_base(&default_zero_char, 128),
00419 _result(&default_zero_char, 128), allowable_html_tags(0), allowable_html_attrs(0)
00420 { }
00421
00422 ~HtmlParser()
00423 { }
00424
00425 void Init(char *url, struct html_tag *allowed_html_tags, struct html_tag *allowed_html_attrs = NULL) {
00426 _url = url;
00427 allowable_html_tags = allowed_html_tags;
00428 allowable_html_attrs = allowed_html_attrs;
00429 _attr_matched = false;
00430 }
00431
00432 int ParseHtml(IOBufferReader *, char **, char **);
00433 int ScanHtmlForURL(IOBufferReader *, char **, char **);
00434 int AllowTagAttrValue();
00435 int ValidProtoScheme(char *);
00436 int ValidSupportedProtoScheme(char *);
00437 int ExtractURL(char **, char **);
00438 int ConstructURL(char **, char **);
00439 DynArray<char>*MakeURL(char *, char *, int, int);
00440 DynArray<char>*PrependString(const char *, int, char *, int);
00441 bool _attr_matched;
00442
00443 char *_url;
00444 char *_comment_end_ptr;
00445 scan_state_t _scan_state;
00446 DynArray<char>_tag;
00447 DynArray<char>_attr;
00448 DynArray<char>_attr_value;
00449 intptr_t _attr_value_hash_char_index;
00450 unsigned char _attr_value_quoted;
00451 DynArray<char>_html_doc_base;
00452 DynArray<char>_result;
00453
00454 struct html_tag *allowable_html_tags;
00455 struct html_tag *allowable_html_attrs;
00456 };
00457
00458 class RecursiveHttpGet:public Continuation
00459 {
00460 public:
00461 RecursiveHttpGet();
00462 ~RecursiveHttpGet();
00463 void Init(Continuation *, char *, char *, URL *, HTTPHdr *, int,
00464 Ptr<UpdateConfigList>, struct html_tag *allowed_html_tags);
00465 int RecursiveHttpGetEvent(int, Event *);
00466
00467 int ExitEventHandler(int, Event *);
00468
00469 public:
00470 int _id;
00471 Continuation *_caller_cont;
00472 char *_request_headers;
00473 URL *_url_data;
00474 HTTPHdr *_http_hdr;
00475 int _recursion_depth;
00476 Ptr<UpdateConfigList> _CL;
00477 ObjectReloadCont *_OL;
00478 UpdateEntry *_group_link_head;
00479 int _active_child_state_machines;
00480
00481 HtmlParser html_parser;
00482 };
00483
00484
00485
00486
00487 class ObjectReloadCont;
00488 typedef int (ObjectReloadCont::*ObjectReloadContHandler) (int, void *);
00489
00490 class ObjectReloadCont:public Continuation
00491 {
00492 public:
00493 ObjectReloadCont();
00494 ~ObjectReloadCont();
00495 void Init(Continuation *, char *, int, char *, int, int, int);
00496 void free();
00497 int ObjectReloadEvent(int, void *);
00498 int CallBackUser(int, void *);
00499
00500 enum state_t
00501 {
00502 START = 1,
00503 ATTEMPT_CONNECT,
00504 WRITING_URL,
00505 READING_DATA
00506 };
00507
00508 Continuation *_caller_cont;
00509 int _request_id;
00510 MIOBuffer *_send_data;
00511 MIOBuffer *_receive_data;
00512 IOBufferReader *_receive_data_reader;
00513 Event *_start_event;
00514 state_t _state;
00515 Action *_cur_action;
00516 class NetVConnection *_netvc;
00517 VIO *_write_vio;
00518 VIO *_read_vio;
00519 int _read_event_callback;
00520
00521 };
00522
00523 extern ClassAllocator<ObjectReloadCont> ObjectReloadContAllocator;
00524
00525 #endif // _Update_h_
00526
00527