00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 #include "ink_config.h"
00030 
00031 #include <stdio.h>
00032 #include <string.h>
00033 #include <stdlib.h>
00034 
00035 #include "INK_MD5.h"
00036 
00037 #include "Error.h"
00038 #include "SimpleTokenizer.h"
00039 
00040 #include "LogUtils.h"
00041 #include "LogFile.h"
00042 #include "LogField.h"
00043 #include "LogFilter.h"
00044 #include "LogFormat.h"
00045 #include "LogHost.h"
00046 #include "LogBuffer.h"
00047 #include "LogObject.h"
00048 #include "LogConfig.h"
00049 #include "Log.h"
00050 
00051 
00052 
00053 bool LogFormat::m_tagging_on = false;
00054 
00055 
00056 
00057 
00058 
00059 bool
00060 LogFormat::setup(const char *name, const char *format_str, unsigned interval_sec)
00061 {
00062   if (name == NULL) {
00063     Note("missing log format name");
00064     return false;
00065   }
00066 
00067   if (format_str) {
00068     const char *tag = " %<phn>";
00069     const size_t m_format_str_size = strlen(format_str) + (m_tagging_on ? strlen(tag) : 0) + 1;
00070     m_format_str = (char *)ats_malloc(m_format_str_size);
00071     ink_strlcpy(m_format_str, format_str, m_format_str_size);
00072     if (m_tagging_on) {
00073       Note("Log tagging enabled, adding %%<phn> field at the end of " "format %s", name);
00074       ink_strlcat(m_format_str, tag, m_format_str_size);
00075     };
00076 
00077     char *printf_str = NULL;
00078     char *fieldlist_str = NULL;
00079     int nfields = parse_format_string(m_format_str, &printf_str,
00080                                       &fieldlist_str);
00081     if (nfields > (m_tagging_on ? 1 : 0)) {
00082       init_variables(name, fieldlist_str, printf_str, interval_sec);
00083     } else {
00084       Note("Format %s encountered an error parsing the symbol string "
00085            "\"%s\", symbol string contains no fields", ((name) ? name : "no-name"), format_str);
00086       m_valid = false;
00087     }
00088 
00089     ats_free(fieldlist_str);
00090     ats_free(printf_str);
00091 
00092     
00093     return true;
00094   }
00095 
00096   
00097   m_valid = true;
00098   return true;
00099 }
00100 
00101 
00102 
00103 
00104 
00105 int32_t LogFormat::id_from_name(const char *name)
00106 {
00107   int32_t id = 0;
00108   if (name) {
00109     CryptoHash hash;
00110     MD5Context().hash_immediate(hash, name, static_cast<int>(strlen(name)));
00111 #if defined(linux)
00112     
00113 
00114 
00115 
00116 
00117     id = (int32_t) hash.fold() & 0x7fffffff;
00118 #else
00119     id = (int32_t) hash.fold();
00120 #endif
00121   }
00122   return id;
00123 }
00124 
00125 
00126 
00127 
00128 
00129 void
00130 LogFormat::init_variables(const char *name, const char *fieldlist_str, const char *printf_str, unsigned interval_sec)
00131 {
00132   m_field_count = parse_symbol_string(fieldlist_str, &m_field_list, &m_aggregate);
00133 
00134   if (m_field_count == 0) {
00135     m_valid = false;
00136   } else if (m_aggregate && !interval_sec) {
00137     Note("Format for aggregate operators but no interval " "was specified");
00138     m_valid = false;
00139   } else {
00140     if (m_aggregate) {
00141       m_agg_marshal_space = (char *)ats_malloc(m_field_count * INK_MIN_ALIGN);
00142     }
00143 
00144     if (m_name_str) {
00145       ats_free(m_name_str);
00146       m_name_str = NULL;
00147       m_name_id = 0;
00148     }
00149     if (name) {
00150       m_name_str = ats_strdup(name);
00151       m_name_id = id_from_name(m_name_str);
00152     }
00153 
00154     if (m_fieldlist_str) {
00155       ats_free(m_fieldlist_str);
00156       m_fieldlist_str = NULL;
00157       m_fieldlist_id = 0;
00158     }
00159     if (fieldlist_str) {
00160       m_fieldlist_str = ats_strdup(fieldlist_str);
00161       m_fieldlist_id = id_from_name(m_fieldlist_str);
00162     }
00163 
00164     m_printf_str = ats_strdup(printf_str);
00165     m_interval_sec = interval_sec;
00166     m_interval_next = LogUtils::timestamp();
00167 
00168     m_valid = true;
00169   }
00170 }
00171 
00172 
00173 
00174 
00175 
00176 
00177 
00178 
00179 
00180 
00181 LogFormat::LogFormat(const char *name, const char *format_str, unsigned interval_sec)
00182   : m_interval_sec(0),
00183     m_interval_next(0),
00184     m_agg_marshal_space(NULL),
00185     m_valid(false),
00186     m_name_str(NULL),
00187     m_name_id(0),
00188     m_fieldlist_str(NULL),
00189     m_fieldlist_id(0),
00190     m_field_count(0),
00191     m_printf_str(NULL),
00192     m_aggregate(false),
00193     m_format_str(NULL)
00194 {
00195   setup(name, format_str, interval_sec);
00196 
00197   
00198   
00199   m_format_type = format_str ? LOG_FORMAT_CUSTOM : LOG_FORMAT_TEXT;
00200 }
00201 
00202 
00203 
00204 
00205 
00206 
00207 
00208 
00209 
00210 LogFormat::LogFormat(const char *name, const char *fieldlist_str, const char *printf_str, unsigned interval_sec)
00211   : m_interval_sec(0),
00212     m_interval_next(0),
00213     m_agg_marshal_space(NULL),
00214     m_valid(false),
00215     m_name_str(NULL),
00216     m_name_id(0),
00217     m_fieldlist_str(NULL),
00218     m_fieldlist_id(0),
00219     m_field_count(0),
00220     m_printf_str(NULL),
00221     m_aggregate(false),
00222     m_format_str(NULL)
00223 {
00224   init_variables(name, fieldlist_str, printf_str, interval_sec);
00225   m_format_type = LOG_FORMAT_CUSTOM;
00226 }
00227 
00228 
00229 
00230 
00231 
00232 
00233 
00234 LogFormat::LogFormat(const LogFormat & rhs)
00235   : m_interval_sec(0),
00236     m_interval_next(0),
00237     m_agg_marshal_space(NULL),
00238     m_valid(rhs.m_valid),
00239     m_name_str(NULL),
00240     m_name_id(0),
00241     m_fieldlist_str(NULL),
00242     m_fieldlist_id(0),
00243     m_field_count(0),
00244     m_printf_str(NULL),
00245     m_aggregate(false),
00246     m_format_str(NULL),
00247     m_format_type(rhs.m_format_type)
00248 {
00249   if (m_valid) {
00250     if (m_format_type == LOG_FORMAT_TEXT) {
00251       m_name_str = ats_strdup(rhs.m_name_str);
00252     } else {
00253       m_format_str = rhs.m_format_str ? ats_strdup(rhs.m_format_str) : 0;
00254       init_variables(rhs.m_name_str, rhs.m_fieldlist_str, rhs.m_printf_str, rhs.m_interval_sec);
00255     }
00256   }
00257 }
00258 
00259 
00260 
00261 
00262 
00263 LogFormat::~LogFormat()
00264 {
00265   ats_free(m_name_str);
00266   ats_free(m_fieldlist_str);
00267   ats_free(m_printf_str);
00268   ats_free(m_agg_marshal_space);
00269   ats_free(m_format_str);
00270   m_valid = false;
00271 }
00272 
00273 
00274 
00275 
00276 
00277 
00278 
00279 
00280 
00281 
00282 
00283 
00284 
00285 LogFormat *
00286 LogFormat::format_from_specification(char *spec, char **file_name, char **file_header, LogFileFormat * file_type)
00287 {
00288   LogFormat *format;
00289   char *token;
00290   int format_id;
00291   char *format_name, *format_str;
00292 
00293   ink_assert(file_name != NULL);
00294   ink_assert(file_header != NULL);
00295   ink_assert(file_type != NULL);
00296 
00297   SimpleTokenizer tok(spec, ':');
00298 
00299   
00300   
00301   
00302   
00303   
00304   
00305   
00306   
00307   token = tok.getNext();
00308   if (token == NULL) {
00309     Debug("log-format", "token expected");
00310     return NULL;
00311   }
00312   if (strcasecmp(token, "format") == 0) {
00313     Debug("log-format", "this is a format");
00314   } else {
00315     Debug("log-format", "should be 'format'");
00316     return NULL;
00317   }
00318 
00319   
00320   
00321   
00322   
00323   token = tok.getNext();
00324   if (token == NULL) {
00325     Debug("log-format", "token expected");
00326     return NULL;
00327   }
00328   if (!strcasecmp(token, "disabled")) {
00329     Debug("log-format", "format not enabled, skipping ...");
00330     return NULL;
00331   } else if (!strcasecmp(token, "enabled")) {
00332     Debug("log-format", "enabled format");
00333   } else {
00334     Debug("log-format", "should be 'enabled' or 'disabled', not %s", token);
00335     return NULL;
00336   }
00337 
00338   
00339   
00340   
00341   token = tok.getNext();
00342   if (token == NULL) {
00343     Debug("log-format", "token expected");
00344     return NULL;
00345   }
00346   format_id = atoi(token);
00347   
00348 
00349   
00350   
00351   
00352   token = tok.getNext();
00353   if (token == NULL) {
00354     Debug("log-format", "token expected");
00355     return NULL;
00356   }
00357   format_name = token;
00358 
00359   
00360   
00361   
00362   token = tok.getNext();
00363   if (token == NULL) {
00364     Debug("log-format", "token expected");
00365     return NULL;
00366   }
00367   format_str = token;
00368 
00369   
00370   
00371   
00372   token = tok.getNext();
00373   if (token == NULL) {
00374     Debug("log-format", "token expected");
00375     return NULL;
00376   }
00377   *file_name = ats_strdup(token);
00378 
00379   
00380   
00381   
00382   token = tok.getNext();
00383   if (token == NULL) {
00384     Debug("log-format", "token expected");
00385     return NULL;
00386   }
00387   if (!strcasecmp(token, "ASCII")) {
00388     *file_type = LOG_FILE_ASCII;
00389   } else if (!strcasecmp(token, "BINARY")) {
00390     *file_type = LOG_FILE_BINARY;
00391   } else {
00392     Debug("log-format", "%s is not a valid file format (ASCII or BINARY)", token);
00393     return NULL;
00394   }
00395 
00396   
00397   
00398   
00399   token = tok.getRest();
00400   if (token == NULL) {
00401     Debug("log-format", "token expected");
00402     return NULL;
00403   }
00404   
00405   
00406   
00407   *file_header = strcmp(token, "none") == 0 ? NULL : ats_strdup(token);
00408 
00409   Debug("log-format", "custom:%d:%s:%s:%s:%d:%s", format_id, format_name, format_str, *file_name, *file_type, token);
00410 
00411   format = new LogFormat(format_name, format_str);
00412   ink_assert(format != NULL);
00413   if (!format->valid()) {
00414     delete format;
00415     return NULL;
00416   }
00417 
00418   return format;
00419 }
00420 
00421 
00422 
00423 
00424 
00425 
00426 
00427 
00428 
00429 int
00430 LogFormat::parse_symbol_string(const char *symbol_string, LogFieldList *field_list, bool *contains_aggregates)
00431 {
00432   char *sym_str;
00433   int field_count = 0;
00434   LogField *f;
00435   char *symbol, *name, *sym;
00436   LogField::Container container;
00437   LogField::Aggregate aggregate;
00438 
00439   if (symbol_string == NULL)
00440     return 0;
00441   ink_assert(field_list != NULL);
00442   ink_assert(contains_aggregates != NULL);
00443 
00444   *contains_aggregates = false; 
00445 
00446   
00447   
00448   
00449   sym_str = ats_strdup(symbol_string);
00450   symbol = strtok(sym_str, ",");
00451 
00452   while (symbol != NULL) {
00453     
00454     
00455     
00456     char *begin_paren = strchr(symbol, '(');
00457     if (begin_paren) {
00458       char *end_paren = strchr(symbol, ')');
00459       if (end_paren) {
00460         Debug("log-agg", "Aggregate symbol: %s", symbol);
00461         *begin_paren = '\0';
00462         *end_paren = '\0';
00463         name = begin_paren + 1;
00464         sym = symbol;
00465         Debug("log-agg", "Aggregate = %s, field = %s", sym, name);
00466         aggregate = LogField::valid_aggregate_name(sym);
00467         if (aggregate == LogField::NO_AGGREGATE) {
00468           Note("Invalid aggregate specification: %s", sym);
00469         } else {
00470           if (aggregate == LogField::eCOUNT && strcmp(name, "*") == 0) {
00471             f = Log::global_field_list.find_by_symbol("psql");
00472           } else {
00473             f = Log::global_field_list.find_by_symbol(name);
00474           }
00475           if (!f) {
00476             Note("Invalid field symbol %s used in aggregate " "operation", name);
00477           } else if (f->type() != LogField::sINT) {
00478             Note("Only single integer field types may be aggregated");
00479           } else {
00480             LogField *new_f = new LogField(*f);
00481             new_f->set_aggregate_op(aggregate);
00482             field_list->add(new_f, false);
00483             field_count++;
00484             *contains_aggregates = true;
00485             Debug("log-agg", "Aggregate field %s(%s) added", sym, name);
00486           }
00487         }
00488       } else {
00489         Note("Invalid aggregate field specification: no trailing " "')' in %s", symbol);
00490       }
00491     }
00492     
00493     
00494     
00495     else if (*symbol == '{') {
00496       Debug("log-format", "Container symbol: %s", symbol);
00497       f = NULL;
00498       char *name_end = strchr(symbol, '}');
00499       if (name_end != NULL) {
00500         name = symbol + 1;
00501         *name_end = 0;          
00502         sym = name_end + 1;     
00503         LogSlice slice(sym);
00504         Debug("log-format", "Name = %s, symbol = %s", name, sym);
00505         container = LogField::valid_container_name(sym);
00506         if (container == LogField::NO_CONTAINER) {
00507           Note("Invalid container specification: %s", sym);
00508         } else {
00509           f = new LogField(name, container);
00510           ink_assert(f != NULL);
00511           if (slice.m_enable) {
00512             f->m_slice = slice;
00513             Debug("log-slice", "symbol = %s, [%d:%d]", sym,
00514                   f->m_slice.m_start, f->m_slice.m_end);
00515           }
00516           field_list->add(f, false);
00517           field_count++;
00518           Debug("log-format", "Container field {%s}%s added", name, sym);
00519         }
00520       } else {
00521         Note("Invalid container field specification: no trailing " "'}' in %s", symbol);
00522       }
00523     }
00524     
00525     
00526     
00527     else {
00528       LogSlice slice(symbol);
00529       Debug("log-format", "Regular field symbol: %s", symbol);
00530       f = Log::global_field_list.find_by_symbol(symbol);
00531       if (f != NULL) {
00532         LogField *cpy = new LogField(*f);
00533         if (slice.m_enable) {
00534           cpy->m_slice = slice;
00535           Debug("log-slice", "symbol = %s, [%d:%d]", symbol,
00536                 cpy->m_slice.m_start, cpy->m_slice.m_end);
00537         }
00538         field_list->add(cpy, false);
00539         field_count++;
00540         Debug("log-format", "Regular field %s added", symbol);
00541       } else {
00542         Note("The log format symbol %s was not found in the " "list of known symbols.", symbol);
00543       }
00544     }
00545 
00546     
00547     
00548     
00549     symbol = strtok(NULL, ",");
00550   }
00551 
00552   ats_free(sym_str);
00553   return field_count;
00554 }
00555 
00556 
00557 
00558 
00559 
00560 
00561 
00562 
00563 
00564 
00565 
00566 
00567 
00568 
00569 
00570 
00571 
00572 
00573 
00574 int
00575 LogFormat::parse_escape_string(const char *str, int len)
00576 {
00577   int sum, start = 0;
00578   unsigned char a, b, c;
00579 
00580   if (str[start] != '\\' || len < 2)
00581     return -1;
00582 
00583   if (str[start + 1] == '\\')
00584     return '\\';
00585 
00586   if (len < 4)
00587     return -1;
00588 
00589   a = (unsigned char)str[start + 1];
00590   b = (unsigned char)str[start + 2];
00591   c = (unsigned char)str[start + 3];
00592 
00593   if (isdigit(a) && isdigit(b) && isdigit(b)) {
00594 
00595     sum = (a - '0')*64 + (b - '0')*8 + (c - '0');
00596 
00597     if (sum == 0 || sum >= 255) {
00598       Warning("Octal escape sequence out of range: \\%c%c%c, treat it as normal string\n", a, b, c);
00599       return -1;
00600     } else
00601       return sum;
00602 
00603   } else if (tolower(a) == 'x' && isxdigit(b) && isxdigit(c)) {
00604     int i, j;
00605     if (isdigit(b))
00606       i = b - '0';
00607     else
00608       i = toupper(b) - 'A' + 10;
00609 
00610     if (isdigit(c))
00611       j = c - '0';
00612     else
00613       j = toupper(c) - 'A' + 10;
00614 
00615     sum = i*16 + j;
00616 
00617     if (sum == 0 || sum >= 255) {
00618       Warning("Hex escape sequence out of range: \\%c%c%c, treat it as normal string\n", a, b, c);
00619       return -1;
00620     } else
00621       return sum;
00622   }
00623 
00624   return -1;
00625 }
00626 
00627 
00628 
00629 
00630 
00631 
00632 
00633 
00634 
00635 
00636 
00637 
00638 
00639 
00640 
00641 int
00642 LogFormat::parse_format_string(const char *format_str, char **printf_str, char **fields_str)
00643 {
00644   ink_assert(printf_str != NULL);
00645   ink_assert(fields_str != NULL);
00646 
00647   if (format_str == NULL) {
00648     *printf_str = *fields_str = NULL;
00649     return 0;
00650   }
00651   
00652   
00653   
00654   
00655   
00656   
00657   unsigned len = (unsigned)::strlen(format_str);
00658   *printf_str = (char *)ats_malloc(len + 1);
00659   *fields_str = (char *)ats_malloc(len + 1);
00660 
00661   unsigned printf_pos = 0;
00662   unsigned fields_pos = 0;
00663   unsigned field_count = 0;
00664   unsigned field_len;
00665   unsigned start, stop;
00666   int escape_char;
00667 
00668   for (start = 0; start < len; start++) {
00669     
00670     
00671     
00672     if ((format_str[start] == '%') && (start + 1 < len) && (format_str[start + 1] == '<')) {
00673       
00674       
00675       
00676       
00677       if (fields_pos > 0) {
00678         (*fields_str)[fields_pos++] = ',';
00679       }
00680       for (stop = start + 2; stop < len; stop++) {
00681         if (format_str[stop] == '>') {
00682           break;
00683         }
00684       }
00685       if (format_str[stop] == '>') {
00686         
00687         
00688         
00689         
00690         
00691         field_len = stop - start - 2;
00692         memcpy(&(*fields_str)[fields_pos], &format_str[start + 2], field_len);
00693         fields_pos += field_len;
00694         (*printf_str)[printf_pos++] = LOG_FIELD_MARKER;
00695         ++field_count;
00696         start = stop;
00697       } else {
00698         
00699         
00700         
00701         
00702         escape_char = parse_escape_string(&format_str[start], (len - start));
00703 
00704         if (escape_char == '\\') {
00705           start += 1;
00706           (*printf_str)[printf_pos++] = (char)escape_char;
00707         } else if (escape_char >= 0) {
00708           start += 3;
00709           (*printf_str)[printf_pos++] = (char)escape_char;
00710         } else {
00711           memcpy(&(*printf_str)[printf_pos], &format_str[start], stop - start + 1);
00712           printf_pos += stop - start + 1;
00713         }
00714       }
00715     } else {
00716       
00717       
00718       
00719       
00720       escape_char = parse_escape_string(&format_str[start], (len - start));
00721 
00722       if (escape_char == '\\') {
00723         start += 1;
00724         (*printf_str)[printf_pos++] = (char)escape_char;
00725       } else if (escape_char >= 0) {
00726         start += 3;
00727         (*printf_str)[printf_pos++] = (char)escape_char;
00728       } else {
00729         (*printf_str)[printf_pos++] = format_str[start];
00730       }
00731     }
00732   }
00733 
00734   
00735   
00736   
00737   
00738   (*fields_str)[fields_pos] = '\0';
00739   (*printf_str)[printf_pos] = '\0';
00740 
00741   Debug("log-format", "LogFormat::parse_format_string: field_count=%d, \"%s\", \"%s\"", field_count, *fields_str,
00742         *printf_str);
00743   return field_count;
00744 }
00745 
00746 
00747 
00748 
00749 
00750 
00751 
00752 void
00753 LogFormat::display(FILE * fd)
00754 {
00755   static const char *types[] = {
00756     "SQUID_LOG",
00757     "COMMON_LOG",
00758     "EXTENDED_LOG",
00759     "EXTENDED2_LOG",
00760     "LOG_FORMAT_CUSTOM",
00761     "LOG_FORMAT_TEXT"
00762   };
00763 
00764   fprintf(fd, "--------------------------------------------------------\n");
00765   fprintf(fd, "Format : %s (%s) (%p), %u fields.\n", m_name_str, types[m_format_type], this, m_field_count);
00766   if (m_fieldlist_str) {
00767     fprintf(fd, "Symbols: %s\n", m_fieldlist_str);
00768     fprintf(fd, "Fields :\n");
00769     m_field_list.display(fd);
00770   } else {
00771     fprintf(fd, "Fields : None\n");
00772   }
00773   fprintf(fd, "--------------------------------------------------------\n");
00774 }
00775 
00776 void
00777 LogFormat::displayAsXML(FILE * fd)
00778 {
00779   if (valid()) {
00780     fprintf(fd,
00781             "<LogFormat>\n"
00782             "  <Name     = \"%s\"/>\n"
00783             "  <Format   = \"%s\"/>\n"
00784             "  <Interval = \"%ld\"/>\n" "</LogFormat>\n", m_name_str, m_format_str, m_interval_sec);
00785   } else {
00786     fprintf(fd, "INVALID FORMAT\n");
00787   }
00788 }
00789 
00790 
00791 
00792 
00793 
00794 LogFormatList::LogFormatList()
00795 {
00796 }
00797 
00798 LogFormatList::~LogFormatList()
00799 {
00800   clear();
00801 }
00802 
00803 void
00804 LogFormatList::clear()
00805 {
00806   LogFormat *f;
00807   while ((f = m_format_list.dequeue())) {
00808     delete f;
00809   }
00810 }
00811 
00812 void
00813 LogFormatList::add(LogFormat * format, bool copy)
00814 {
00815   ink_assert(format != NULL);
00816 
00817   if (copy) {
00818     m_format_list.enqueue(new LogFormat(*format));
00819   } else {
00820     m_format_list.enqueue(format);
00821   }
00822 }
00823 
00824 LogFormat *
00825 LogFormatList::find_by_name(const char *name) const
00826 {
00827   for (LogFormat * f = first(); f; f = next(f)) {
00828     if (!strcmp(f->name(), name)) {
00829       return f;
00830     }
00831   }
00832   return NULL;
00833 }
00834 
00835 unsigned
00836 LogFormatList::count()
00837 {
00838   unsigned cnt = 0;
00839   for (LogFormat * f = first(); f; f = next(f)) {
00840     cnt++;
00841   }
00842   return cnt;
00843 }
00844 
00845 void
00846 LogFormatList::display(FILE * fd)
00847 {
00848   for (LogFormat * f = first(); f; f = next(f)) {
00849     f->display(fd);
00850   }
00851 }