• Main Page
  • Related Pages
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

Update.h

Go to the documentation of this file.
00001 /** @file
00002 
00003   A brief file description
00004 
00005   @section license License
00006 
00007   Licensed to the Apache Software Foundation (ASF) under one
00008   or more contributor license agreements.  See the NOTICE file
00009   distributed with this work for additional information
00010   regarding copyright ownership.  The ASF licenses this file
00011   to you under the Apache License, Version 2.0 (the
00012   "License"); you may not use this file except in compliance
00013   with the License.  You may obtain a copy of the License at
00014 
00015       http://www.apache.org/licenses/LICENSE-2.0
00016 
00017   Unless required by applicable law or agreed to in writing, software
00018   distributed under the License is distributed on an "AS IS" BASIS,
00019   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020   See the License for the specific language governing permissions and
00021   limitations under the License.
00022  */
00023 
00024 
00025 /****************************************************************************
00026 
00027   Update.h
00028 
00029 
00030 ****************************************************************************/
00031 
00032 #include "P_EventSystem.h"
00033 #include "URL.h"
00034 #include "HTTP.h"
00035 
00036 #ifndef _Update_h_
00037 #define _Update_h_
00038 
00039 /////////////////////////////////////////////////////////
00040 // Update subsystem specific events
00041 /////////////////////////////////////////////////////////
00042 #define UPDATE_EVENT_SUCCESS            (UPDATE_EVENT_EVENTS_START+0)
00043 #define UPDATE_EVENT_SUCCESS_NOACTION   (UPDATE_EVENT_EVENTS_START+1)
00044 #define UPDATE_EVENT_FAILED             (UPDATE_EVENT_EVENTS_START+2)
00045 
00046 #define MAX_UPDATE_EVENT                UPDATE_EVENT_FAILED
00047 
00048 #define IS_UPDATE_EVENT(_e) \
00049         ( ( ((_e) >= UPDATE_EVENT_EVENTS_START) && \
00050             ((_e) <= MAX_UPDATE_EVENT) ) ? 1 : 0 )
00051 
00052 extern RecRawStatBlock *update_rsb;
00053 
00054 enum
00055 {
00056   update_successes_stat,
00057   update_no_actions_stat,
00058   update_fails_stat,
00059   update_unknown_status_stat,
00060   update_state_machines_stat,
00061 
00062   update_stat_count
00063 };
00064 
00065 //////////////////////////////////////////////////////////////////////
00066 // UpdateConfigParams -- Global subsystem configuration parameters
00067 //////////////////////////////////////////////////////////////////////
00068 class UpdateConfigParams:public RefCountObj
00069 {
00070 public:
00071   UpdateConfigParams();
00072   UpdateConfigParams(UpdateConfigParams &);
00073   ~UpdateConfigParams();
00074   UpdateConfigParams & operator=(UpdateConfigParams &);
00075   int operator==(UpdateConfigParams &);
00076 
00077   int IsEnabled()
00078   {
00079     return _enabled;
00080   }
00081   int ImmediateUpdate()
00082   {
00083     return _immediate_update;
00084   }
00085   int RetryCount()
00086   {
00087     return _retry_count;
00088   }
00089   int RetryInterval()
00090   {
00091     return _retry_interval;
00092   }
00093   int ConcurrentUpdates()
00094   {
00095     return _concurrent_updates;
00096   }
00097   int MaxUpdateSM()
00098   {
00099     return _max_update_state_machines;
00100   }
00101   int MaxMemoryUsageMB()
00102   {
00103     return _memory_use_in_mb;
00104   }
00105 
00106 public:
00107   RecInt _enabled;
00108   RecInt _immediate_update;
00109   RecInt _retry_count;
00110   RecInt _retry_interval;
00111   RecInt _concurrent_updates;
00112   RecInt _max_update_state_machines;
00113   RecInt _memory_use_in_mb;
00114 };
00115 
00116 ///////////////////////////////////////////////////
00117 // UpdateEntry -- Per update object descriptor
00118 ///////////////////////////////////////////////////
00119 class UpdateConfigList;
00120 
00121 class UpdateEntry
00122 {
00123 public:
00124   UpdateEntry();
00125   ~UpdateEntry();
00126 
00127   enum
00128   {
00129     MIN_OFFSET_HOUR = 0, MAX_OFFSET_HOUR = 23,
00130     MIN_INTERVAL = 0, MAX_INTERVAL = 86400,
00131     MIN_DEPTH = 0, MAX_DEPTH = 128,
00132     SECONDS_PER_HOUR = 3600,
00133     SECONDS_PER_MIN = 60
00134   };
00135 
00136   void Init(int derived_url = 0);
00137   int ValidURL(char *, char *);
00138   int ValidHeaders(char *);
00139   int BuildHttpRequest();
00140   int ValidHeaderNameChar(char);
00141   int ValidSeparatorChar(char);
00142   int ValidHour(char *);
00143   int ValidInterval(char *);
00144   int ValidDepth(char *);
00145   int TerminalURL();
00146   void SetTerminalStatus(int);
00147 
00148   void ComputeScheduleTime();
00149   int ScheduleNow(time_t);
00150 
00151 public:
00152   LINK(UpdateEntry, link);
00153   UpdateEntry *_group_link;
00154   UpdateEntry *_hash_link;
00155 
00156 public:
00157   //////////////////////
00158   // URL data
00159   //////////////////////
00160   int _id;
00161   char *_url;
00162   URL _URLhandle;
00163   INK_MD5 _url_md5;
00164   int _terminal_url;
00165 
00166   ////////////////////////////
00167   // Request header data
00168   ////////////////////////////
00169   char *_request_headers;
00170   int _num_request_headers;
00171   HTTPHdr *_http_hdr;
00172   HTTPParser _http_parser;
00173 
00174   ///////////////////////////////
00175   // Configuration data
00176   ///////////////////////////////
00177   int _offset_hour;
00178   int _interval;
00179   int _max_depth;
00180 
00181   //////////////////////////////////
00182   // State data
00183   //////////////////////////////////
00184   time_t _start_time;
00185   int _expired;
00186 
00187   int _scheme_index;
00188   int _update_event_status;
00189 
00190     Ptr<UpdateConfigList> _indirect_list;
00191 };
00192 
00193 /////////////////////////////////////////////////////////////
00194 // UpdateConfigList -- Container for UpdateEntry objects
00195 /////////////////////////////////////////////////////////////
00196 class UpdateConfigList:public RefCountObj
00197 {
00198 public:
00199   UpdateConfigList();
00200   ~UpdateConfigList();
00201   void Add(UpdateEntry *);
00202   int HashAdd(UpdateEntry *);
00203   UpdateEntry *Remove();
00204   void AddPending(UpdateEntry *);
00205   UpdateEntry *RemovePending();
00206   int Entries()
00207   {
00208     return _entry_q_elements;
00209   }
00210   int PendingEntries()
00211   {
00212     return _pending_q_elements;
00213   }
00214 
00215 public:
00216   enum
00217   {
00218     HASH_TABLE_SIZE = 4096
00219   };
00220   int _entry_q_elements;
00221   Queue<UpdateEntry> _entry_q;
00222   int _pending_q_elements;
00223   Queue<UpdateEntry> _pending_q;
00224   UpdateEntry **_hash_table;
00225 };
00226 
00227 ////////////////////////////////////////////////////////////////
00228 // UpdateManager -- External interface to Update subsystem
00229 ////////////////////////////////////////////////////////////////
00230 class UpdateConfigManager;
00231 class UpdateScheduler;
00232 
00233 class UpdateManager
00234 {
00235 public:
00236   UpdateManager();
00237   ~UpdateManager();
00238   int start();
00239 
00240 private:
00241     UpdateConfigManager * _CM;
00242   UpdateScheduler *_SCH;
00243 };
00244 
00245 extern UpdateManager updateManager;
00246 
00247 //////////////////////////////////////////////////////////////////////////
00248 // UpdateConfigManager -- Handle Update subsystem global configuration
00249 //                        and URL list updates
00250 //////////////////////////////////////////////////////////////////////////
00251 typedef int (UpdateConfigManager::*UpdateConfigManagerContHandler) (int, void *);
00252 
00253 class UpdateConfigManager:public Continuation
00254 {
00255 public:
00256   UpdateConfigManager();
00257   ~UpdateConfigManager();
00258   int init();
00259   int GetConfigParams(Ptr<UpdateConfigParams> *);
00260   int GetConfigList(Ptr<UpdateConfigList> *);
00261 
00262   static int URL_list_update_callout(const char *name, RecDataT data_type, RecData data, void *cookie);
00263 
00264   void SetFileName(char *f)
00265   {
00266     _filename = f;
00267   }
00268   char *GetFileName()
00269   {
00270     return _filename;
00271   }
00272 
00273   int ProcessUpdate(int event, Event * e);
00274   UpdateConfigList *BuildUpdateList();
00275   UpdateConfigList *ParseConfigFile(int);
00276   int GetDataLine(int, int, char *, int, int);
00277 
00278 private:
00279   Event * _periodic_event;
00280   char *_filename;
00281   Ptr<UpdateConfigParams> _CP;
00282   Ptr<UpdateConfigParams> _CP_actual;
00283   Ptr<UpdateConfigList> _CL;
00284 };
00285 
00286 ////////////////////////////////////////////////////////////////////////
00287 // UpdateScheduler -- Handle scheduling of UpdateEntry objects
00288 ////////////////////////////////////////////////////////////////////////
00289 typedef int (UpdateScheduler::*UpdateSchedulerContHandler) (int, void *);
00290 
00291 class UpdateScheduler:public Continuation
00292 {
00293 public:
00294   UpdateScheduler(UpdateConfigManager * cm = NULL);
00295   ~UpdateScheduler();
00296   int Init();
00297   int Init(UpdateScheduler *, UpdateEntry *, Ptr<UpdateConfigParams>);
00298 
00299   int ScheduleEvent(int, void *);
00300   int Schedule(UpdateEntry * e = NULL);
00301   int ChildExitEventHandler(int, Event *);
00302 
00303 private:
00304     Event * _periodic_event;
00305   int _recursive_update;
00306   UpdateConfigManager *_CM;
00307     Ptr<UpdateConfigParams> _CP;
00308     Ptr<UpdateConfigList> _CL;
00309   int _schedule_event_callbacks;
00310   int _update_state_machines;
00311 
00312   UpdateEntry *_base_EN;        // Entry from which recursive
00313   //   list was derived
00314   UpdateScheduler *_parent_US;  // Parent which created us
00315 };
00316 
00317 /////////////////////////////////////////////////////////////////
00318 // UpdateSM -- State machine which handles object update action
00319 /////////////////////////////////////////////////////////////////
00320 class UpdateSM;
00321 typedef int (UpdateSM::*UpdateSMContHandler) (int, void *);
00322 
00323 class UpdateSM:public Continuation
00324 {
00325 public:
00326   enum state_t
00327   {
00328     USM_INIT = 1,
00329     USM_PROCESS_URL,
00330     USM_PROCESS_URL_COMPLETION,
00331     USM_EXIT
00332   };
00333 
00334   enum
00335   {
00336     N_SCHEMES = 1
00337   };
00338 
00339   static int http_scheme(UpdateSM *);
00340   static int http_scheme_postproc(UpdateSM *);
00341 
00342   UpdateSM(UpdateScheduler *, Ptr<UpdateConfigParams>, UpdateEntry *);
00343   ~UpdateSM();
00344   void Start();
00345   int HandleSMEvent(int, Event *);
00346 
00347 public:
00348   UpdateEntry * _EN;
00349 
00350 private:
00351   UpdateScheduler * _US;
00352   Ptr<UpdateConfigParams> _CP;
00353   state_t _state;
00354   int _return_status;
00355   int _retries;
00356 };
00357 
00358 struct dispatch_entry
00359 {
00360   const char **scheme;
00361   int (*func) (UpdateSM *);
00362 };
00363 
00364 extern struct dispatch_entry scheme_dispatch_table[UpdateSM::N_SCHEMES];
00365 extern struct dispatch_entry scheme_post_dispatch_table[UpdateSM::N_SCHEMES];
00366 
00367 struct html_tag
00368 {
00369   const char *tag;
00370   const char *attr;
00371 };
00372 
00373 /////////////////////////////////////////////////////////////////////////////
00374 // RecursiveHttpGet -- Generate URL list by recursively traversing
00375 //                     non-terminal URL(s) up to the specified depth.
00376 /////////////////////////////////////////////////////////////////////////////
00377 class ObjectReloadCont;
00378 class RecursiveHttpGet;
00379 
00380 typedef int (RecursiveHttpGet::*RecursiveHttpGetContHandler) (int, Event *);
00381 
00382 class HtmlParser
00383 {
00384   // Parse Html routines
00385 public:
00386   static char default_zero_char;
00387 
00388   enum scan_state_t
00389   {
00390     SCAN_INIT = 1,
00391     SCAN_START,
00392     FIND_TAG_START,
00393     COPY_TAG,
00394     IGNORE_COMMENT_START,
00395     IGNORE_COMMENT,
00396     FIND_ATTR_START,
00397     COPY_ATTR,
00398     FIND_ATTR_VALUE_DELIMITER,
00399     FIND_ATTR_VALUE_START,
00400     COPY_ATTR_VALUE,
00401     VALIDATE_ENTRY,
00402     VALIDATE_ENTRY_RESTART,
00403     RESUME_ATTR_VALUE_SCAN,
00404     TERMINATE_COPY_ATTR_VALUE
00405   };
00406 
00407   enum
00408   {
00409     MAX_TAG_NAME_LENGTH = 1024,
00410     MAX_ATTR_NAME_LENGTH = 1024
00411   };
00412 
00413     HtmlParser()
00414   : _attr_matched(false), _url(0), _comment_end_ptr(0), _scan_state(SCAN_INIT),
00415     _tag(&default_zero_char, 32), _attr(&default_zero_char, 32),
00416     _attr_value(&default_zero_char, 32),
00417     _attr_value_hash_char_index(-1), _attr_value_quoted(0),
00418     _html_doc_base(&default_zero_char, 128),
00419     _result(&default_zero_char, 128), allowable_html_tags(0), allowable_html_attrs(0)
00420   { }
00421 
00422    ~HtmlParser()
00423   { }
00424 
00425   void Init(char *url, struct html_tag *allowed_html_tags, struct html_tag *allowed_html_attrs = NULL) {
00426     _url = url;
00427     allowable_html_tags = allowed_html_tags;
00428     allowable_html_attrs = allowed_html_attrs;
00429     _attr_matched = false;
00430   }
00431 
00432   int ParseHtml(IOBufferReader *, char **, char **);
00433   int ScanHtmlForURL(IOBufferReader *, char **, char **);
00434   int AllowTagAttrValue();
00435   int ValidProtoScheme(char *);
00436   int ValidSupportedProtoScheme(char *);
00437   int ExtractURL(char **, char **);
00438   int ConstructURL(char **, char **);
00439   DynArray<char>*MakeURL(char *, char *, int, int);
00440   DynArray<char>*PrependString(const char *, int, char *, int);
00441   bool _attr_matched;
00442 
00443   char *_url;
00444   char *_comment_end_ptr;
00445   scan_state_t _scan_state;
00446   DynArray<char>_tag;
00447   DynArray<char>_attr;
00448   DynArray<char>_attr_value;
00449   intptr_t _attr_value_hash_char_index;      // '#' char loc
00450   unsigned char _attr_value_quoted;
00451   DynArray<char>_html_doc_base;
00452   DynArray<char>_result;
00453 
00454   struct html_tag *allowable_html_tags;
00455   struct html_tag *allowable_html_attrs;
00456 };
00457 
00458 class RecursiveHttpGet:public Continuation
00459 {
00460 public:
00461   RecursiveHttpGet();
00462   ~RecursiveHttpGet();
00463   void Init(Continuation *, char *, char *, URL *, HTTPHdr *, int,
00464             Ptr<UpdateConfigList>, struct html_tag *allowed_html_tags);
00465   int RecursiveHttpGetEvent(int, Event *);
00466 
00467   int ExitEventHandler(int, Event *);
00468 
00469 public:
00470   int _id;
00471   Continuation *_caller_cont;
00472   char *_request_headers;
00473   URL *_url_data;
00474   HTTPHdr *_http_hdr;
00475   int _recursion_depth;
00476     Ptr<UpdateConfigList> _CL;
00477   ObjectReloadCont *_OL;
00478   UpdateEntry *_group_link_head;
00479   int _active_child_state_machines;
00480 
00481   HtmlParser html_parser;
00482 };
00483 
00484 /////////////////////////////////////////////////////////////////////////
00485 // ObjectReloadCont -- Read given URL object via the local proxy port
00486 /////////////////////////////////////////////////////////////////////////
00487 class ObjectReloadCont;
00488 typedef int (ObjectReloadCont::*ObjectReloadContHandler) (int, void *);
00489 
00490 class ObjectReloadCont:public Continuation
00491 {
00492 public:
00493   ObjectReloadCont();
00494   ~ObjectReloadCont();
00495   void Init(Continuation *, char *, int, char *, int, int, int);
00496   void free();
00497   int ObjectReloadEvent(int, void *);
00498   int CallBackUser(int, void *);
00499 
00500   enum state_t
00501   {
00502     START = 1,
00503     ATTEMPT_CONNECT,
00504     WRITING_URL,
00505     READING_DATA
00506   };
00507 
00508   Continuation *_caller_cont;
00509   int _request_id;
00510   MIOBuffer *_send_data;
00511   MIOBuffer *_receive_data;
00512   IOBufferReader *_receive_data_reader;
00513   Event *_start_event;
00514   state_t _state;
00515   Action *_cur_action;
00516   class NetVConnection *_netvc;
00517   VIO *_write_vio;
00518   VIO *_read_vio;
00519   int _read_event_callback;
00520 
00521 };
00522 
00523 extern ClassAllocator<ObjectReloadCont> ObjectReloadContAllocator;
00524 
00525 #endif // _Update_h_
00526 
00527 // End of Update.h

Generated by  doxygen 1.7.1