Public Types | Public Member Functions | Data Fields | Static Public Attributes

HtmlParser Class Reference

#include <Update.h>

Collaboration diagram for HtmlParser:
Collaboration graph
[legend]

Public Types

enum  scan_state_t {
  SCAN_INIT = 1, SCAN_START, FIND_TAG_START, COPY_TAG,
  IGNORE_COMMENT_START, IGNORE_COMMENT, FIND_ATTR_START, COPY_ATTR,
  FIND_ATTR_VALUE_DELIMITER, FIND_ATTR_VALUE_START, COPY_ATTR_VALUE, VALIDATE_ENTRY,
  VALIDATE_ENTRY_RESTART, RESUME_ATTR_VALUE_SCAN, TERMINATE_COPY_ATTR_VALUE
}
enum  { MAX_TAG_NAME_LENGTH = 1024, MAX_ATTR_NAME_LENGTH = 1024 }

Public Member Functions

 HtmlParser ()
 ~HtmlParser ()
void Init (char *url, struct html_tag *allowed_html_tags, struct html_tag *allowed_html_attrs=NULL)
int ParseHtml (IOBufferReader *, char **, char **)
int ScanHtmlForURL (IOBufferReader *, char **, char **)
int AllowTagAttrValue ()
int ValidProtoScheme (char *)
int ValidSupportedProtoScheme (char *)
int ExtractURL (char **, char **)
int ConstructURL (char **, char **)
DynArray< char > * MakeURL (char *, char *, int, int)
DynArray< char > * PrependString (const char *, int, char *, int)

Data Fields

bool _attr_matched
char * _url
char * _comment_end_ptr
scan_state_t _scan_state
DynArray< char > _tag
DynArray< char > _attr
DynArray< char > _attr_value
intptr_t _attr_value_hash_char_index
unsigned char _attr_value_quoted
DynArray< char > _html_doc_base
DynArray< char > _result
struct html_tagallowable_html_tags
struct html_tagallowable_html_attrs

Static Public Attributes

static char default_zero_char = '\0'

Detailed Description

Definition at line 382 of file Update.h.


Member Enumeration Documentation

anonymous enum
Enumerator:
MAX_TAG_NAME_LENGTH 
MAX_ATTR_NAME_LENGTH 

Definition at line 407 of file Update.h.

Enumerator:
SCAN_INIT 
SCAN_START 
FIND_TAG_START 
COPY_TAG 
IGNORE_COMMENT_START 
IGNORE_COMMENT 
FIND_ATTR_START 
COPY_ATTR 
FIND_ATTR_VALUE_DELIMITER 
FIND_ATTR_VALUE_START 
COPY_ATTR_VALUE 
VALIDATE_ENTRY 
VALIDATE_ENTRY_RESTART 
RESUME_ATTR_VALUE_SCAN 
TERMINATE_COPY_ATTR_VALUE 

Definition at line 388 of file Update.h.


Constructor & Destructor Documentation

HtmlParser::HtmlParser (  )  [inline]

Definition at line 413 of file Update.h.

HtmlParser::~HtmlParser (  )  [inline]

Definition at line 422 of file Update.h.


Member Function Documentation

int HtmlParser::AllowTagAttrValue (  ) 
int HtmlParser::ConstructURL ( char **  url,
char **  url_end 
)
int HtmlParser::ExtractURL ( char **  url,
char **  url_end 
)
void HtmlParser::Init ( char *  url,
struct html_tag allowed_html_tags,
struct html_tag allowed_html_attrs = NULL 
) [inline]
DynArray< char > * HtmlParser::MakeURL ( char *  url,
char *  sub,
int  subsize,
int  relative_url 
)

Definition at line 2366 of file Update.cc.

References default_zero_char, and DynArray< T >::length().

Referenced by ConstructURL().

int HtmlParser::ParseHtml ( IOBufferReader r,
char **  url,
char **  url_end 
)
DynArray< char > * HtmlParser::PrependString ( const char *  pre,
int  presize,
char *  sub,
int  subsize 
)

Definition at line 2441 of file Update.cc.

References default_zero_char, and DynArray< T >::length().

Referenced by ConstructURL().

int HtmlParser::ScanHtmlForURL ( IOBufferReader r,
char **  url,
char **  url_end 
)
int HtmlParser::ValidProtoScheme ( char *  p  ) 

Definition at line 2168 of file Update.cc.

References html_tag::tag, and schemes_descriptor::tag.

Referenced by ConstructURL().

int HtmlParser::ValidSupportedProtoScheme ( char *  p  ) 

Definition at line 2180 of file Update.cc.

References html_tag::tag, and schemes_descriptor::tag.

Referenced by ConstructURL().


Field Documentation

Definition at line 447 of file Update.h.

Referenced by AllowTagAttrValue(), ExtractURL(), and ScanHtmlForURL().

Definition at line 441 of file Update.h.

Referenced by AllowTagAttrValue(), Init(), and ScanHtmlForURL().

Definition at line 448 of file Update.h.

Referenced by AllowTagAttrValue(), ExtractURL(), and ScanHtmlForURL().

Definition at line 449 of file Update.h.

Referenced by ExtractURL(), and ScanHtmlForURL().

Definition at line 450 of file Update.h.

Referenced by ScanHtmlForURL().

Definition at line 444 of file Update.h.

Referenced by ScanHtmlForURL().

Definition at line 451 of file Update.h.

Referenced by ConstructURL(), and ExtractURL().

Definition at line 452 of file Update.h.

Referenced by ConstructURL().

Definition at line 445 of file Update.h.

Referenced by ScanHtmlForURL().

Definition at line 446 of file Update.h.

Referenced by AllowTagAttrValue(), ExtractURL(), and ScanHtmlForURL().

Definition at line 455 of file Update.h.

Referenced by AllowTagAttrValue(), and Init().

Definition at line 454 of file Update.h.

Referenced by AllowTagAttrValue(), and Init().

char HtmlParser::default_zero_char = '\0' [static]

Definition at line 386 of file Update.h.

Referenced by MakeURL(), and PrependString().


The documentation for this class was generated from the following files: