• Main Page
  • Related Pages
  • Namespaces
  • Data Structures
  • Files
  • File List
  • Globals

HdrToken.cc

Go to the documentation of this file.
00001 /** @file
00002 
00003   A brief file description
00004 
00005   @section license License
00006 
00007   Licensed to the Apache Software Foundation (ASF) under one
00008   or more contributor license agreements.  See the NOTICE file
00009   distributed with this work for additional information
00010   regarding copyright ownership.  The ASF licenses this file
00011   to you under the Apache License, Version 2.0 (the
00012   "License"); you may not use this file except in compliance
00013   with the License.  You may obtain a copy of the License at
00014 
00015       http://www.apache.org/licenses/LICENSE-2.0
00016 
00017   Unless required by applicable law or agreed to in writing, software
00018   distributed under the License is distributed on an "AS IS" BASIS,
00019   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00020   See the License for the specific language governing permissions and
00021   limitations under the License.
00022  */
00023 
00024 #include "libts.h"
00025 #include <stdio.h>
00026 #include "Allocator.h"
00027 #include "Compatability.h"
00028 #include "HTTP.h"
00029 #include "HdrToken.h"
00030 #include "MIME.h"
00031 #include "Regex.h"
00032 #include "URL.h"
00033 
00034 /* 
00035  You SHOULD add to _hdrtoken_commonly_tokenized_strs, with the same ordering
00036  ** important, ordering matters **
00037  
00038  You want a regexp like 'Accept' after "greedier" choices so it doesn't match 'Accept-Ranges' earlier than
00039  it should. The regexp are anchored (^Accept), but I dont see a way with the current system to 
00040  match the word ONLY without making _hdrtoken_strs a real PCRE, but then that breaks the hashing
00041  hdrtoken_hash("^Accept$") != hdrtoken_hash("Accept")
00042  
00043  So, the current hack is to have "Accept" follow "Accept-.*", lame, I know
00044  
00045   /ericb
00046 */
00047 
00048 static const char *_hdrtoken_strs[] = {
00049   // MIME Field names
00050   "Accept-Charset",
00051   "Accept-Encoding",
00052   "Accept-Language",
00053   "Accept-Ranges",
00054   "Accept",
00055   "Age",
00056   "Allow",
00057   "Approved",                   // NNTP
00058   "Authorization",
00059   "Bytes",                      // NNTP
00060   "Cache-Control",
00061   "Client-ip",
00062   "Connection",
00063   "Content-Base",
00064   "Content-Encoding",
00065   "Content-Language",
00066   "Content-Length",
00067   "Content-Location",
00068   "Content-MD5",
00069   "Content-Range",
00070   "Content-Type",
00071   "Control",                    // NNTP
00072   "Cookie",
00073   "Date",
00074   "Distribution",               // NNTP
00075   "Etag",
00076   "Expect",
00077   "Expires",
00078   "Followup-To",                // NNTP
00079   "From",
00080   "Host",
00081   "If-Match",
00082   "If-Modified-Since",
00083   "If-None-Match",
00084   "If-Range",
00085   "If-Unmodified-Since",
00086   "Keep-Alive",
00087   "Keywords",                   // NNTP
00088   "Last-Modified",
00089   "Lines",                      // NNTP
00090   "Location",
00091   "Max-Forwards",
00092   "Message-ID",                 // NNTP
00093   "MIME-Version",
00094   "Newsgroups",                 // NNTP
00095   "Organization",               // NNTP
00096   "Path",                       // NNTP
00097   "Pragma",
00098   "Proxy-Authenticate",
00099   "Proxy-Authorization",
00100   "Proxy-Connection",
00101   "Public",
00102   "Range",
00103   "References",                 // NNTP
00104   "Referer",
00105   "Reply-To",                   // NNTP
00106   "Retry-After",
00107   "Sender",                     // NNTP
00108   "Server",
00109   "Set-Cookie",
00110   "Subject",                    // NNTP
00111   "Summary",                    // NNTP
00112   "Transfer-Encoding",
00113   "Upgrade",
00114   "User-Agent",
00115   "Vary",
00116   "Via",
00117   "Warning",
00118   "Www-Authenticate",
00119   "Xref",                       // NNTP
00120   "@DataInfo",                  // Internal Hack
00121   
00122   // Accept-Encoding
00123   "compress",
00124   "deflate",
00125   "gzip",
00126   "identity",
00127   
00128   // Cache-Control flags
00129   "max-age",
00130   "max-stale",
00131   "min-fresh",
00132   "must-revalidate",
00133   "no-cache",
00134   "no-store",
00135   "no-transform",
00136   "only-if-cached",
00137   "private",
00138   "proxy-revalidate",
00139   "s-maxage",
00140   "need-revalidate-once",
00141   
00142   // HTTP miscellaneous
00143   "none",
00144   "chunked",
00145   "close",
00146   
00147   // WS
00148   "websocket",
00149   "Sec-WebSocket-Key",
00150   "Sec-WebSocket-Version",
00151 
00152   // URL schemes
00153   "file",
00154   "ftp",
00155   "gopher",
00156   "https",
00157   "http",
00158   "mailto",
00159   "news",
00160   "nntp",
00161   "prospero",
00162   "telnet",
00163   "tunnel",
00164   "wais",
00165   "pnm",
00166   "rtspu",
00167   "rtsp",
00168   "mmsu",
00169   "mmst",
00170   "mms",
00171   "wss",
00172   "ws",
00173   
00174   // HTTP methods
00175   "CONNECT",
00176   "DELETE",
00177   "GET",
00178   "POST",
00179   "HEAD",
00180   "ICP_QUERY",
00181   "OPTIONS",
00182   "PURGE",
00183   "PUT",
00184   "TRACE",
00185   "PUSH",
00186   
00187   // Header extensions
00188   "X-ID",
00189   "X-Forwarded-For",
00190   "TE",
00191   "Strict-Transport-Security",
00192   "100-continue"
00193 };
00194 
00195 static HdrTokenTypeBinding _hdrtoken_strs_type_initializers[] = {
00196   {"file", HDRTOKEN_TYPE_SCHEME},
00197   {"ftp", HDRTOKEN_TYPE_SCHEME},
00198   {"gopher", HDRTOKEN_TYPE_SCHEME},
00199   {"http", HDRTOKEN_TYPE_SCHEME},
00200   {"https", HDRTOKEN_TYPE_SCHEME},
00201   {"mailto", HDRTOKEN_TYPE_SCHEME},
00202   {"news", HDRTOKEN_TYPE_SCHEME},
00203   {"nntp", HDRTOKEN_TYPE_SCHEME},
00204   {"prospero", HDRTOKEN_TYPE_SCHEME},
00205   {"telnet", HDRTOKEN_TYPE_SCHEME},
00206   {"tunnel", HDRTOKEN_TYPE_SCHEME},
00207   {"wais", HDRTOKEN_TYPE_SCHEME},
00208   {"pnm", HDRTOKEN_TYPE_SCHEME},
00209   {"rtsp", HDRTOKEN_TYPE_SCHEME},
00210   {"rtspu", HDRTOKEN_TYPE_SCHEME},
00211   {"mms", HDRTOKEN_TYPE_SCHEME},
00212   {"mmsu", HDRTOKEN_TYPE_SCHEME},
00213   {"mmst", HDRTOKEN_TYPE_SCHEME},
00214   {"wss", HDRTOKEN_TYPE_SCHEME},
00215   {"ws", HDRTOKEN_TYPE_SCHEME},
00216 
00217   {"CONNECT", HDRTOKEN_TYPE_METHOD},
00218   {"DELETE", HDRTOKEN_TYPE_METHOD},
00219   {"GET", HDRTOKEN_TYPE_METHOD},
00220   {"HEAD", HDRTOKEN_TYPE_METHOD},
00221   {"ICP_QUERY", HDRTOKEN_TYPE_METHOD},
00222   {"OPTIONS", HDRTOKEN_TYPE_METHOD},
00223   {"POST", HDRTOKEN_TYPE_METHOD},
00224   {"PURGE", HDRTOKEN_TYPE_METHOD},
00225   {"PUT", HDRTOKEN_TYPE_METHOD},
00226   {"TRACE", HDRTOKEN_TYPE_METHOD},
00227   {"PUSH", HDRTOKEN_TYPE_METHOD},
00228 
00229   {"max-age", HDRTOKEN_TYPE_CACHE_CONTROL},
00230   {"max-stale", HDRTOKEN_TYPE_CACHE_CONTROL},
00231   {"min-fresh", HDRTOKEN_TYPE_CACHE_CONTROL},
00232   {"must-revalidate", HDRTOKEN_TYPE_CACHE_CONTROL},
00233   {"no-cache", HDRTOKEN_TYPE_CACHE_CONTROL},
00234   {"no-store", HDRTOKEN_TYPE_CACHE_CONTROL},
00235   {"no-transform", HDRTOKEN_TYPE_CACHE_CONTROL},
00236   {"only-if-cached", HDRTOKEN_TYPE_CACHE_CONTROL},
00237   {"private", HDRTOKEN_TYPE_CACHE_CONTROL},
00238   {"proxy-revalidate", HDRTOKEN_TYPE_CACHE_CONTROL},
00239   {"public", HDRTOKEN_TYPE_CACHE_CONTROL},
00240   {"s-maxage", HDRTOKEN_TYPE_CACHE_CONTROL},
00241   {"need-revalidate-once", HDRTOKEN_TYPE_CACHE_CONTROL},
00242 
00243   {(char *) NULL, (HdrTokenType) 0}
00244 };
00245 
00246 
00247 static HdrTokenFieldInfo _hdrtoken_strs_field_initializers[] = {
00248   {"Accept", MIME_SLOTID_ACCEPT, MIME_PRESENCE_ACCEPT, (HTIF_COMMAS | HTIF_MULTVALS)},
00249   {"Accept-Charset", MIME_SLOTID_ACCEPT_CHARSET, MIME_PRESENCE_ACCEPT_CHARSET, (HTIF_COMMAS | HTIF_MULTVALS)},
00250   {"Accept-Encoding", MIME_SLOTID_ACCEPT_ENCODING, MIME_PRESENCE_ACCEPT_ENCODING, (HTIF_COMMAS | HTIF_MULTVALS)},
00251   {"Accept-Language", MIME_SLOTID_ACCEPT_LANGUAGE, MIME_PRESENCE_ACCEPT_LANGUAGE, (HTIF_COMMAS | HTIF_MULTVALS)},
00252   {"Accept-Ranges", MIME_SLOTID_NONE, MIME_PRESENCE_ACCEPT_RANGES, (HTIF_COMMAS | HTIF_MULTVALS)},
00253   {"Age", MIME_SLOTID_AGE, MIME_PRESENCE_AGE, HTIF_NONE},
00254   {"Allow", MIME_SLOTID_NONE, MIME_PRESENCE_ALLOW, (HTIF_COMMAS | HTIF_MULTVALS)},
00255   {"Approved", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00256   {"Authorization", MIME_SLOTID_AUTHORIZATION, MIME_PRESENCE_AUTHORIZATION, HTIF_NONE},
00257   {"Bytes", MIME_SLOTID_NONE, MIME_PRESENCE_BYTES, HTIF_NONE},
00258   {"Cache-Control", MIME_SLOTID_CACHE_CONTROL, MIME_PRESENCE_CACHE_CONTROL, (HTIF_COMMAS | HTIF_MULTVALS)},
00259   {"Client-ip", MIME_SLOTID_CLIENT_IP, MIME_PRESENCE_CLIENT_IP, HTIF_NONE},
00260   {"Connection", MIME_SLOTID_CONNECTION, MIME_PRESENCE_CONNECTION, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
00261   {"Content-Base", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00262   {"Content-Encoding", MIME_SLOTID_CONTENT_ENCODING, MIME_PRESENCE_CONTENT_ENCODING, (HTIF_COMMAS | HTIF_MULTVALS)},
00263   {"Content-Language", MIME_SLOTID_CONTENT_LANGUAGE, MIME_PRESENCE_CONTENT_LANGUAGE, (HTIF_COMMAS | HTIF_MULTVALS)},
00264   {"Content-Length", MIME_SLOTID_CONTENT_LENGTH, MIME_PRESENCE_CONTENT_LENGTH, HTIF_NONE},
00265   {"Content-Location", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_LOCATION, HTIF_NONE},
00266   {"Content-MD5", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_MD5, HTIF_NONE},
00267   {"Content-Range", MIME_SLOTID_NONE, MIME_PRESENCE_CONTENT_RANGE, HTIF_NONE},
00268   {"Content-Type", MIME_SLOTID_CONTENT_TYPE, MIME_PRESENCE_CONTENT_TYPE, HTIF_NONE},
00269   {"Control", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00270   {"Cookie", MIME_SLOTID_COOKIE, MIME_PRESENCE_COOKIE, (HTIF_MULTVALS)},
00271   {"Date", MIME_SLOTID_DATE, MIME_PRESENCE_DATE, HTIF_NONE},
00272   {"Distribution", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00273   {"Etag", MIME_SLOTID_NONE, MIME_PRESENCE_ETAG, HTIF_NONE},
00274   {"Expires", MIME_SLOTID_EXPIRES, MIME_PRESENCE_EXPIRES, HTIF_NONE},
00275   {"Followup-To", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00276   {"From", MIME_SLOTID_NONE, MIME_PRESENCE_FROM, HTIF_NONE},
00277   {"Host", MIME_SLOTID_NONE, MIME_PRESENCE_HOST, HTIF_NONE},
00278   {"If-Match", MIME_SLOTID_IF_MATCH, MIME_PRESENCE_IF_MATCH, (HTIF_COMMAS | HTIF_MULTVALS)},
00279   {"If-Modified-Since", MIME_SLOTID_IF_MODIFIED_SINCE, MIME_PRESENCE_IF_MODIFIED_SINCE, HTIF_NONE},
00280   {"If-None-Match", MIME_SLOTID_IF_NONE_MATCH, MIME_PRESENCE_IF_NONE_MATCH, (HTIF_COMMAS | HTIF_MULTVALS)},
00281   {"If-Range", MIME_SLOTID_IF_RANGE, MIME_PRESENCE_IF_RANGE, HTIF_NONE},
00282   {"If-Unmodified-Since", MIME_SLOTID_IF_UNMODIFIED_SINCE, MIME_PRESENCE_IF_UNMODIFIED_SINCE, HTIF_NONE},
00283   {"Keep-Alive", MIME_SLOTID_NONE, MIME_PRESENCE_KEEP_ALIVE, (HTIF_HOPBYHOP)},
00284   {"Keywords", MIME_SLOTID_NONE, MIME_PRESENCE_KEYWORDS, HTIF_NONE},
00285   {"Last-Modified", MIME_SLOTID_LAST_MODIFIED, MIME_PRESENCE_LAST_MODIFIED, HTIF_NONE},
00286   {"Lines", MIME_SLOTID_NONE, MIME_PRESENCE_LINES, HTIF_NONE},
00287   {"Location", MIME_SLOTID_NONE, MIME_PRESENCE_LOCATION, (HTIF_MULTVALS)},
00288   {"Max-Forwards", MIME_SLOTID_NONE, MIME_PRESENCE_MAX_FORWARDS, HTIF_NONE},
00289   {"Message-ID", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00290   {"Newsgroups", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00291   {"Organization", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00292   {"Path", MIME_SLOTID_NONE, MIME_PRESENCE_PATH, HTIF_NONE},
00293   {"Pragma", MIME_SLOTID_PRAGMA, MIME_PRESENCE_PRAGMA, (HTIF_COMMAS | HTIF_MULTVALS)},
00294   {"Proxy-Authenticate", MIME_SLOTID_NONE, MIME_PRESENCE_PROXY_AUTHENTICATE, (HTIF_HOPBYHOP | HTIF_PROXYAUTH)},
00295   {"Proxy-Authorization", MIME_SLOTID_NONE, MIME_PRESENCE_PROXY_AUTHORIZATION, (HTIF_HOPBYHOP | HTIF_PROXYAUTH)},
00296   {"Proxy-Connection", MIME_SLOTID_PROXY_CONNECTION, MIME_PRESENCE_PROXY_CONNECTION,
00297    (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
00298   {"Public", MIME_SLOTID_NONE, MIME_PRESENCE_PUBLIC, (HTIF_COMMAS | HTIF_MULTVALS)},
00299   {"Range", MIME_SLOTID_RANGE, MIME_PRESENCE_RANGE, (HTIF_COMMAS | HTIF_MULTVALS)},
00300   {"References", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00301   {"Referer", MIME_SLOTID_NONE, MIME_PRESENCE_REFERER, HTIF_NONE},
00302   {"Reply-To", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00303   {"Retry-After", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00304   {"Sender", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00305   {"Server", MIME_SLOTID_NONE, MIME_PRESENCE_SERVER, HTIF_NONE},
00306   {"Set-Cookie", MIME_SLOTID_SET_COOKIE, MIME_PRESENCE_SET_COOKIE, (HTIF_MULTVALS)},
00307   {"Strict-Transport-Security", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_MULTVALS)},
00308   {"Subject", MIME_SLOTID_NONE, MIME_PRESENCE_SUBJECT, HTIF_NONE},
00309   {"Summary", MIME_SLOTID_NONE, MIME_PRESENCE_SUMMARY, HTIF_NONE},
00310   {"TE", MIME_SLOTID_TE, MIME_PRESENCE_TE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
00311   {"Transfer-Encoding", MIME_SLOTID_TRANSFER_ENCODING, MIME_PRESENCE_TRANSFER_ENCODING, (HTIF_COMMAS | HTIF_MULTVALS)},
00312   {"Upgrade", MIME_SLOTID_NONE, MIME_PRESENCE_UPGRADE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
00313   {"User-Agent", MIME_SLOTID_USER_AGENT, MIME_PRESENCE_USER_AGENT, HTIF_NONE},
00314   {"Vary", MIME_SLOTID_VARY, MIME_PRESENCE_VARY, (HTIF_COMMAS | HTIF_MULTVALS)},
00315   {"Via", MIME_SLOTID_VIA, MIME_PRESENCE_VIA, (HTIF_COMMAS | HTIF_MULTVALS)},
00316   {"Warning", MIME_SLOTID_NONE, MIME_PRESENCE_WARNING, (HTIF_COMMAS | HTIF_MULTVALS)},
00317   {"Www-Authenticate", MIME_SLOTID_WWW_AUTHENTICATE, MIME_PRESENCE_WWW_AUTHENTICATE, HTIF_NONE},
00318   {"Xref", MIME_SLOTID_NONE, MIME_PRESENCE_XREF, HTIF_NONE},
00319   {"@DataInfo", MIME_SLOTID_NONE, MIME_PRESENCE_INT_DATA_INFO, HTIF_NONE},
00320   {"X-ID", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS | HTIF_HOPBYHOP)},
00321   {"X-Forwarded-For", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, (HTIF_COMMAS | HTIF_MULTVALS)},
00322   {"Sec-WebSocket-Key", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00323   {"Sec-WebSocket-Version", MIME_SLOTID_NONE, MIME_PRESENCE_NONE, HTIF_NONE},
00324   {NULL, 0, 0, 0}
00325 };
00326 
00327 const char *_hdrtoken_strs_heap_f = NULL;       // storage first byte
00328 const char *_hdrtoken_strs_heap_l = NULL;       // storage last byte
00329 
00330 int hdrtoken_num_wks = SIZEOF(_hdrtoken_strs);  // # of well-known strings
00331 
00332 const char *hdrtoken_strs[SIZEOF(_hdrtoken_strs)];      // wks_idx -> heap ptr
00333 int hdrtoken_str_lengths[SIZEOF(_hdrtoken_strs)];       // wks_idx -> length
00334 HdrTokenType hdrtoken_str_token_types[SIZEOF(_hdrtoken_strs)];  // wks_idx -> token type
00335 int32_t hdrtoken_str_slotids[SIZEOF(_hdrtoken_strs)];     // wks_idx -> slot id
00336 uint64_t hdrtoken_str_masks[SIZEOF(_hdrtoken_strs)];      // wks_idx -> presence mask
00337 uint32_t hdrtoken_str_flags[SIZEOF(_hdrtoken_strs)];      // wks_idx -> flags
00338 
00339 DFA *hdrtoken_strs_dfa = NULL;
00340 
00341 /***********************************************************************
00342  *                                                                     *
00343  *                        H A S H    T A B L E                         *
00344  *                                                                     *
00345  ***********************************************************************/
00346 
00347 #define HDRTOKEN_HASH_TABLE_SIZE        65536
00348 #define HDRTOKEN_HASH_TABLE_MASK        HDRTOKEN_HASH_TABLE_SIZE-1
00349 
00350 struct HdrTokenHashBucket
00351 {
00352   const char *wks;
00353   uint32_t hash;
00354 };
00355 
00356 HdrTokenHashBucket hdrtoken_hash_table[HDRTOKEN_HASH_TABLE_SIZE];
00357 
00358 /**
00359   basic FNV hash
00360 **/
00361 #define TINY_MASK(x) (((u_int32_t)1<<(x))-1)
00362 
00363 inline uint32_t
00364 hash_to_slot(uint32_t hash)
00365 {
00366   return ((hash>>15) ^ hash) & TINY_MASK(15);
00367 }
00368 
00369 inline uint32_t
00370 hdrtoken_hash(const unsigned char *string, unsigned int length)
00371 {
00372   static const uint32_t InitialFNV = 2166136261U;
00373   static const int32_t FNVMultiple = 16777619;
00374 
00375   uint32_t hash = InitialFNV;
00376 
00377   for (size_t i = 0; i < length; i++)  {
00378       hash = hash ^ (toupper(string[i])); 
00379       hash = hash * FNVMultiple;          
00380   }
00381 
00382   return hash;
00383 }
00384 
00385 /*-------------------------------------------------------------------------
00386   -------------------------------------------------------------------------*/
00387 
00388 static const char *_hdrtoken_commonly_tokenized_strs[] = {
00389   // MIME Field names
00390   "Accept-Charset",
00391   "Accept-Encoding",
00392   "Accept-Language",
00393   "Accept-Ranges",
00394   "Accept",
00395   "Age",
00396   "Allow",
00397   "Approved",                   // NNTP
00398   "Authorization",
00399   "Bytes",                      // NNTP
00400   "Cache-Control",
00401   "Client-ip",
00402   "Connection",
00403   "Content-Base",
00404   "Content-Encoding",
00405   "Content-Language",
00406   "Content-Length",
00407   "Content-Location",
00408   "Content-MD5",
00409   "Content-Range",
00410   "Content-Type",
00411   "Control",                    // NNTP
00412   "Cookie",
00413   "Date",
00414   "Distribution",               // NNTP
00415   "Etag",
00416   "Expect",
00417   "Expires",
00418   "Followup-To",                // NNTP
00419   "From",
00420   "Host",
00421   "If-Match",
00422   "If-Modified-Since",
00423   "If-None-Match",
00424   "If-Range",
00425   "If-Unmodified-Since",
00426   "Keep-Alive",
00427   "Keywords",                   // NNTP
00428   "Last-Modified",
00429   "Lines",                      // NNTP
00430   "Location",
00431   "Max-Forwards",
00432   "Message-ID",                 // NNTP
00433   "MIME-Version",
00434   "Newsgroups",                 // NNTP
00435   "Organization",               // NNTP
00436   "Path",                       // NNTP
00437   "Pragma",
00438   "Proxy-Authenticate",
00439   "Proxy-Authorization",
00440   "Proxy-Connection",
00441   "Public",
00442   "Range",
00443   "References",                 // NNTP
00444   "Referer",
00445   "Reply-To",                   // NNTP
00446   "Retry-After",
00447   "Sender",                     // NNTP
00448   "Server",
00449   "Set-Cookie",
00450   "Subject",                    // NNTP
00451   "Summary",                    // NNTP
00452   "Transfer-Encoding",
00453   "Upgrade",
00454   "User-Agent",
00455   "Vary",
00456   "Via",
00457   "Warning",
00458   "Www-Authenticate",
00459   "Xref",                       // NNTP
00460   "@DataInfo",                  // Internal Hack
00461   
00462   // Accept-Encoding
00463   "compress",
00464   "deflate",
00465   "gzip",
00466   "identity",
00467   
00468   // Cache-Control flags
00469   "max-age",
00470   "max-stale",
00471   "min-fresh",
00472   "must-revalidate",
00473   "no-cache",
00474   "no-store",
00475   "no-transform",
00476   "only-if-cached",
00477   "private",
00478   "proxy-revalidate",
00479   "s-maxage",
00480   "need-revalidate-once",
00481   
00482   // HTTP miscellaneous
00483   "none",
00484   "chunked",
00485   "close",
00486   
00487   // WS
00488   "websocket",
00489   "Sec-WebSocket-Key",
00490   "Sec-WebSocket-Version",
00491 
00492   // URL schemes
00493   "file",
00494   "ftp",
00495   "gopher",
00496   "https",
00497   "http",
00498   "mailto",
00499   "news",
00500   "nntp",
00501   "prospero",
00502   "telnet",
00503   "tunnel",
00504   "wais",
00505   "pnm",
00506   "rtspu",
00507   "rtsp",
00508   "mmsu",
00509   "mmst",
00510   "mms",
00511   "wss",
00512   "ws",
00513   
00514   // HTTP methods
00515   "CONNECT",
00516   "DELETE",
00517   "GET",
00518   "POST",
00519   "HEAD",
00520   "ICP_QUERY",
00521   "OPTIONS",
00522   "PURGE",
00523   "PUT",
00524   "TRACE",
00525   "PUSH",
00526   
00527   // Header extensions
00528   "X-ID",
00529   "X-Forwarded-For",
00530   "TE",
00531   "Strict-Transport-Security",
00532   "100-continue"
00533 };
00534 
00535 /*-------------------------------------------------------------------------
00536   -------------------------------------------------------------------------*/
00537 
00538 void
00539 hdrtoken_hash_init()
00540 {
00541   uint32_t i;
00542   int num_collisions;
00543 
00544   memset(hdrtoken_hash_table, 0, sizeof(hdrtoken_hash_table));
00545   num_collisions = 0;
00546 
00547   for (i = 0; i < (int) SIZEOF(_hdrtoken_commonly_tokenized_strs); i++) {
00548     // convert the common string to the well-known token
00549     unsigned const char *wks;
00550     int wks_idx = hdrtoken_tokenize_dfa(_hdrtoken_commonly_tokenized_strs[i],
00551                                         (int) strlen(_hdrtoken_commonly_tokenized_strs[i]),
00552                                         (const char **) &wks);
00553     ink_release_assert(wks_idx >= 0);
00554 
00555     uint32_t hash = hdrtoken_hash(wks, hdrtoken_str_lengths[wks_idx]);
00556     uint32_t slot = hash_to_slot(hash);
00557 
00558     if (hdrtoken_hash_table[slot].wks) {
00559       printf("ERROR: hdrtoken_hash_table[%u] collision: '%s' replacing '%s'\n",
00560              slot, (const char*)wks, hdrtoken_hash_table[slot].wks);
00561       ++num_collisions;
00562     }
00563     hdrtoken_hash_table[slot].wks = (const char *)wks;
00564     hdrtoken_hash_table[slot].hash = hash;
00565   }
00566 
00567   if (num_collisions > 0)
00568     abort();
00569 }
00570 
00571 
00572 /***********************************************************************
00573  *                                                                     *
00574  *                 M A I N    H D R T O K E N    C O D E               *
00575  *                                                                     *
00576  ***********************************************************************/
00577 
00578 /**
00579   @return returns 0 for n=0, unit*n for n <= unit
00580 */
00581 
00582 static inline unsigned int
00583 snap_up_to_multiple(unsigned int n, unsigned int unit)
00584 {
00585   return ((n + (unit - 1)) / unit) * unit;
00586 }
00587 
00588 /**
00589 */
00590 void
00591 hdrtoken_init()
00592 {
00593   static int inited = 0;
00594 
00595   int i;
00596 
00597   if (!inited) {
00598     inited = 1;
00599 
00600     hdrtoken_strs_dfa = new DFA;
00601     hdrtoken_strs_dfa->compile(_hdrtoken_strs, SIZEOF(_hdrtoken_strs), (REFlags) (RE_CASE_INSENSITIVE));
00602 
00603     // all the tokenized hdrtoken strings are placed in a special heap,
00604     // and each string is prepended with a HdrTokenHeapPrefix ---
00605     // this makes it easy to tell that a string is a tokenized
00606     // string (because its address is within the heap), and
00607     // makes it easy to find the length, index, flags, mask, and
00608     // other info from the prefix.
00609 
00610     int heap_size = 0;
00611     for (i = 0; i < (int) SIZEOF(_hdrtoken_strs); i++) {
00612       hdrtoken_str_lengths[i] = (int) strlen(_hdrtoken_strs[i]);
00613       int sstr_len = snap_up_to_multiple(hdrtoken_str_lengths[i] + 1, sizeof(HdrTokenHeapPrefix));
00614       int packed_prefix_str_len = sizeof(HdrTokenHeapPrefix) + sstr_len;
00615       heap_size += packed_prefix_str_len;
00616     }
00617 
00618     _hdrtoken_strs_heap_f = (const char *)ats_malloc(heap_size);
00619     _hdrtoken_strs_heap_l = _hdrtoken_strs_heap_f + heap_size - 1;
00620 
00621     char *heap_ptr = (char *) _hdrtoken_strs_heap_f;
00622 
00623     for (i = 0; i < (int) SIZEOF(_hdrtoken_strs); i++) {
00624       HdrTokenHeapPrefix prefix;
00625 
00626       memset(&prefix, 0, sizeof(HdrTokenHeapPrefix));
00627 
00628       prefix.wks_idx = i;
00629       prefix.wks_length = hdrtoken_str_lengths[i];
00630       prefix.wks_token_type = HDRTOKEN_TYPE_OTHER;      // default, can override later
00631       prefix.wks_info.name = NULL;      // default, can override later
00632       prefix.wks_info.slotid = MIME_SLOTID_NONE;        // default, can override later
00633       prefix.wks_info.mask = TOK_64_CONST(0);   // default, can override later
00634       prefix.wks_info.flags = MIME_FLAGS_MULTVALS;      // default, can override later
00635 
00636       int sstr_len = snap_up_to_multiple(hdrtoken_str_lengths[i] + 1, sizeof(HdrTokenHeapPrefix));
00637 
00638       *(HdrTokenHeapPrefix *) heap_ptr = prefix;        // set string prefix
00639       heap_ptr += sizeof(HdrTokenHeapPrefix);   // advance heap ptr past index
00640       hdrtoken_strs[i] = heap_ptr;      // record string pointer
00641       // coverity[secure_coding]
00642       ink_strlcpy((char *) hdrtoken_strs[i], _hdrtoken_strs[i], heap_size - sizeof(HdrTokenHeapPrefix));     // copy string into heap
00643       heap_ptr += sstr_len;     // advance heap ptr past string
00644       heap_size -= sstr_len;
00645     }
00646 
00647     // Set the token types for certain tokens
00648     for (i = 0; _hdrtoken_strs_type_initializers[i].name != NULL; i++) {
00649       int wks_idx;
00650       HdrTokenHeapPrefix *prefix;
00651 
00652       wks_idx = hdrtoken_tokenize_dfa(_hdrtoken_strs_type_initializers[i].name,
00653                                       (int) strlen(_hdrtoken_strs_type_initializers[i].name));
00654 
00655       ink_assert((wks_idx >= 0) && (wks_idx < (int) SIZEOF(hdrtoken_strs)));
00656       // coverity[negative_returns]
00657       prefix = hdrtoken_index_to_prefix(wks_idx);
00658       prefix->wks_token_type = _hdrtoken_strs_type_initializers[i].type;
00659     }
00660 
00661     // Set special data for field names
00662     for (i = 0; _hdrtoken_strs_field_initializers[i].name != NULL; i++) {
00663       int wks_idx;
00664       HdrTokenHeapPrefix *prefix;
00665 
00666       wks_idx = hdrtoken_tokenize_dfa(_hdrtoken_strs_field_initializers[i].name,
00667                                       (int) strlen(_hdrtoken_strs_field_initializers[i].name));
00668 
00669       ink_assert((wks_idx >= 0) && (wks_idx < (int) SIZEOF(hdrtoken_strs)));
00670       prefix = hdrtoken_index_to_prefix(wks_idx);
00671       prefix->wks_info.slotid = _hdrtoken_strs_field_initializers[i].slotid;
00672       prefix->wks_info.flags = _hdrtoken_strs_field_initializers[i].flags;
00673       prefix->wks_info.mask = _hdrtoken_strs_field_initializers[i].mask;
00674     }
00675 
00676     for (i = 0; i < (int) SIZEOF(_hdrtoken_strs); i++) {
00677       HdrTokenHeapPrefix *prefix = hdrtoken_index_to_prefix(i);
00678       prefix->wks_info.name = hdrtoken_strs[i];
00679       hdrtoken_str_token_types[i] = prefix->wks_token_type;     // parallel array for speed
00680       hdrtoken_str_slotids[i] = prefix->wks_info.slotid;        // parallel array for speed
00681       hdrtoken_str_masks[i] = prefix->wks_info.mask;    // parallel array for speed
00682       hdrtoken_str_flags[i] = prefix->wks_info.flags;   // parallel array for speed
00683     }
00684 
00685     hdrtoken_hash_init();
00686   }
00687 }
00688 
00689 /*-------------------------------------------------------------------------
00690   -------------------------------------------------------------------------*/
00691 
00692 int
00693 hdrtoken_tokenize_dfa(const char *string, int string_len, const char **wks_string_out)
00694 {
00695   int wks_idx;
00696   
00697   wks_idx = hdrtoken_strs_dfa->match(string, string_len);
00698 
00699   if (wks_idx < 0)
00700     wks_idx = -1;
00701   if (wks_string_out) {
00702     if (wks_idx >= 0)
00703       *wks_string_out = hdrtoken_index_to_wks(wks_idx);
00704     else
00705       *wks_string_out = NULL;
00706   }
00707   //printf("hdrtoken_tokenize_dfa(%d,*s) - return %d\n",string_len,string,wks_idx);
00708 
00709   return wks_idx;
00710 }
00711 
00712 /*-------------------------------------------------------------------------
00713   -------------------------------------------------------------------------*/
00714 
00715 int
00716 hdrtoken_tokenize(const char *string, int string_len, const char **wks_string_out)
00717 {
00718   int wks_idx;
00719   HdrTokenHashBucket *bucket;
00720 
00721   ink_assert(string != NULL);
00722 
00723   if (hdrtoken_is_wks(string)) {
00724     wks_idx = hdrtoken_wks_to_index(string);
00725     if (wks_string_out)
00726       *wks_string_out = string;
00727     return wks_idx;
00728   }
00729 
00730   uint32_t hash = hdrtoken_hash((const unsigned char *) string, (unsigned int) string_len);
00731   uint32_t slot = hash_to_slot(hash);
00732 
00733   bucket = &(hdrtoken_hash_table[slot]);
00734   if ((bucket->wks != NULL) &&
00735       (bucket->hash == hash) &&
00736       (hdrtoken_wks_to_length(bucket->wks) == string_len)) {
00737     wks_idx = hdrtoken_wks_to_index(bucket->wks);
00738     if (wks_string_out)
00739       *wks_string_out = bucket->wks;
00740     return wks_idx;
00741   }
00742 
00743   Debug("hdr_token", "Did not find a WKS for '%.*s'", string_len, string);
00744   return -1;
00745 }
00746 
00747 /*-------------------------------------------------------------------------
00748   -------------------------------------------------------------------------*/
00749 
00750 const char *
00751 hdrtoken_string_to_wks(const char *string)
00752 {
00753   const char *wks = NULL;
00754   hdrtoken_tokenize(string, (int) strlen(string), &wks);
00755   return wks;
00756 }
00757 
00758 /*-------------------------------------------------------------------------
00759   -------------------------------------------------------------------------*/
00760 
00761 const char *
00762 hdrtoken_string_to_wks(const char *string, int length)
00763 {
00764   const char *wks = NULL;
00765   hdrtoken_tokenize(string, length, &wks);
00766   return wks;
00767 }

Generated by  doxygen 1.7.1