00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 
00021 
00022 
00023 
00024 
00025 
00026 
00027 
00028 
00029 
00030 #include "ink_config.h"
00031 #include <assert.h>
00032 #include <memory.h>
00033 #include <stdlib.h>
00034 #include <unistd.h>
00035 #include <sys/types.h>
00036 #include <sys/mman.h>
00037 #include "ink_thread.h"
00038 #include "ink_atomic.h"
00039 #include "ink_queue.h"
00040 #include "ink_memory.h"
00041 #include "ink_error.h"
00042 #include "ink_assert.h"
00043 #include "ink_stack_trace.h"
00044 #include "ink_queue_ext.h"
00045 
00046 #if TS_USE_RECLAIMABLE_FREELIST
00047 
00048 #define CEIL(x,y)   (((x) + (y) - 1L) / (y))
00049 #define ROUND(x,l)  (((x) + ((l) - 1L)) & ~((l) - 1L))
00050 #define ITEM_MAGIC 0xFF
00051 
00052 #define MAX_NUM_FREELIST  1024
00053 
00054 
00055 
00056 
00057 float cfg_reclaim_factor = 0.3;
00058 int64_t cfg_max_overage = 10;
00059 int64_t cfg_enable_reclaim = 0;
00060 
00061 
00062 
00063 
00064 
00065 
00066 int64_t cfg_debug_filter;
00067 
00068 static uint32_t nr_freelist;
00069 static uint64_t total_mem_in_byte;
00070 static __thread InkThreadCache *ThreadCaches[MAX_NUM_FREELIST];
00071 
00072 #define MAX_CHUNK_BYTE_SIZE (ats_pagesize() << 8)
00073 
00074 
00075 
00076 
00077 #define show_info(tag, f, pCache) \
00078   __show_info(stdout, __FILE__, __LINE__, tag, f, pCache)
00079 #define error_info(tag, f, pCache) \
00080   __show_info(stderr, __FILE__, __LINE__, tag, f, pCache)
00081 
00082 static inline void
00083 __show_info(FILE *fp, const char *file, int line,
00084             const char *tag, InkFreeList *f, InkThreadCache *pCache)
00085 {
00086 
00087   fprintf(fp, "[%lx:%02u][%s:%05d][%s] %6.2fM t:%-8uf:%-4u m:%-4u avg:%-6.1f"
00088           " M:%-4u csbase:%-4u csize:%-4u tsize:%-6u cbsize:%u\n",
00089          (long)ink_thread_self(), f->thread_cache_idx, file, line, tag,
00090          ((double)total_mem_in_byte/1024/1024),
00091          pCache->nr_total,
00092          pCache->nr_free,
00093          pCache->nr_min,
00094          pCache->nr_average,
00095          pCache->nr_malloc,
00096          f->chunk_size_base,
00097          f->chunk_size,
00098          f->type_size,
00099          f->chunk_byte_size);
00100 }
00101 
00102 static inline void
00103 memory_alignment_init(InkFreeList *f, uint32_t type_size, uint32_t chunk_size,
00104                       uint32_t alignment)
00105 {
00106   uint32_t chunk_byte_size, user_alignment, user_type_size;
00107 
00108   f->chunk_size_base = chunk_size;
00109   user_alignment = alignment;
00110   user_type_size = type_size;
00111   chunk_size = 1;
00112 
00113 #ifdef DEBUG
00114   
00115 
00116 
00117   type_size += sizeof(unsigned char);
00118 #endif
00119 
00120   
00121 
00122 
00123 
00124 
00125 
00126 
00127 
00128   alignment = ats_pagesize();
00129   chunk_byte_size = ROUND(type_size + sizeof(InkChunkInfo), ats_pagesize());
00130   if (chunk_byte_size <= MAX_CHUNK_BYTE_SIZE) {
00131 
00132     chunk_byte_size = ROUND(type_size * f->chunk_size_base
00133                             + sizeof(InkChunkInfo), ats_pagesize());
00134 
00135     if (chunk_byte_size > MAX_CHUNK_BYTE_SIZE) {
00136       chunk_size = (MAX_CHUNK_BYTE_SIZE - sizeof(InkChunkInfo)) / type_size;
00137       chunk_byte_size = ROUND(type_size * chunk_size + sizeof(InkChunkInfo),
00138                               ats_pagesize());
00139     } else
00140       chunk_size = (chunk_byte_size - sizeof(InkChunkInfo)) / type_size;
00141 
00142     if (chunk_size > 1) {
00143       
00144 
00145       while (alignment < chunk_byte_size)
00146         alignment <<= 1;
00147     }
00148   }
00149 
00150   if (user_alignment > alignment) {
00151     alignment = ats_pagesize();
00152     while (alignment < user_alignment)
00153       alignment <<= 1;
00154   }
00155   ink_release_assert(alignment <= MAX_CHUNK_BYTE_SIZE);
00156 
00157   f->alignment = alignment;
00158   f->type_size = user_type_size;
00159   f->chunk_size = chunk_size;
00160   f->chunk_addr_mask = ~((uintptr_t)(alignment - 1));
00161   f->chunk_byte_size = chunk_byte_size;
00162 
00163   return;
00164 }
00165 
00166 
00167 
00168 
00169 
00170 
00171 
00172 static void*
00173 mmap_align(size_t size, size_t alignment) {
00174   uintptr_t ptr;
00175   size_t adjust, extra = 0;
00176 
00177   ink_assert(size % ats_pagesize() == 0);
00178 
00179   
00180   if (alignment > ats_pagesize()) {
00181     extra = alignment - ats_pagesize();
00182   }
00183   void* result = mmap(NULL, size + extra,
00184                       PROT_READ|PROT_WRITE,
00185                       MAP_PRIVATE|MAP_ANON,
00186                       -1, 0);
00187   if (result == MAP_FAILED) {
00188     ink_stack_trace_dump();
00189     const char *err_str = "Out of memory, or the process's maximum number of "
00190                           "mappings would have been exceeded(if so, you can "
00191                           "enlarge 'vm.max_map_count' by sysctl in linux).";
00192     ink_fatal(1, "Failed to mmap %zu bytes, %s", size,
00193               (errno == ENOMEM) ? err_str : strerror(errno));
00194   }
00195 
00196   
00197   adjust = 0;
00198   ptr = (uintptr_t)result;
00199   if ((ptr & (alignment - 1)) != 0) {
00200     adjust = alignment - (ptr & (alignment - 1));
00201   }
00202 
00203   
00204   if (adjust > 0) {
00205     munmap((void*)ptr, adjust);
00206   }
00207   if (adjust < extra) {
00208     munmap((void*)(ptr + adjust + size), extra - adjust);
00209   }
00210 
00211   ptr += adjust;
00212   ink_assert((ptr & (alignment -1)) == 0);
00213   return (void*)ptr;
00214 }
00215 
00216 #ifdef DEBUG
00217 static inline uint32_t
00218 get_chunk_item_magic_idx(InkFreeList *f, void *item, InkChunkInfo **ppChunk,
00219                           bool do_check = false)
00220 {
00221   uint32_t idx;
00222   uintptr_t chunk_addr;
00223 
00224   if (f->chunk_size > 1)
00225     chunk_addr = (uintptr_t)item & f->chunk_addr_mask;
00226   else
00227     chunk_addr = (uintptr_t)item;
00228 
00229   if (*ppChunk == NULL)
00230     *ppChunk = (InkChunkInfo *)(chunk_addr + f->type_size * f->chunk_size);
00231 
00232   idx = ((uintptr_t)item - chunk_addr) / f->type_size;
00233 
00234   if (do_check && (idx >= f->chunk_size
00235                    || ((uintptr_t)item - chunk_addr) % f->type_size)) {
00236     ink_stack_trace_dump();
00237     ink_fatal(1, "Invalid address:%p, chunk_addr:%p, type_size:%d, chunk_size:%u, idx:%u",
00238               item, (void *)chunk_addr, f->type_size, f->chunk_size, idx);
00239   }
00240 
00241   return idx;
00242 }
00243 
00244 static inline void
00245 set_chunk_item_magic(InkFreeList *f, InkChunkInfo *pChunk, void *item)
00246 {
00247   uint32_t idx;
00248 
00249   idx = get_chunk_item_magic_idx(f, item, &pChunk);
00250 
00251   ink_release_assert(pChunk->item_magic[idx] == 0);
00252 
00253   pChunk->item_magic[idx] = ITEM_MAGIC;
00254 }
00255 
00256 static inline void
00257 clear_chunk_item_magic(InkFreeList *f, InkChunkInfo *pChunk, void *item)
00258 {
00259   uint32_t idx;
00260 
00261   idx = get_chunk_item_magic_idx(f, item, &pChunk, true);
00262 
00263   ink_release_assert(pChunk->item_magic[idx] == ITEM_MAGIC);
00264 
00265   pChunk->item_magic[idx] = 0;
00266 }
00267 #else
00268 #define set_chunk_item_magic(a, b, c)
00269 #define clear_chunk_item_magic(a, b, c)
00270 #endif
00271 
00272 static inline InkChunkInfo *
00273 get_chunk_info_addr(InkFreeList *f, void *item)
00274 {
00275   uintptr_t chunk_addr;
00276 
00277   if (f->chunk_size > 1)
00278     chunk_addr = (uintptr_t)item & f->chunk_addr_mask;
00279   else
00280     chunk_addr = (uintptr_t)item;
00281 
00282   return (InkChunkInfo *)(chunk_addr + f->type_size * f->chunk_size);
00283 }
00284 
00285 static inline InkChunkInfo *
00286 ink_chunk_create(InkFreeList *f, InkThreadCache *pCache)
00287 {
00288   uint32_t i;
00289   uint32_t type_size, chunk_size;
00290   void *chunk_addr, *curr, *next;
00291   InkChunkInfo *pChunk;
00292 
00293   chunk_addr = mmap_align(f->chunk_byte_size, f->alignment);
00294   pChunk = (InkChunkInfo *)((char *)chunk_addr + f->type_size * f->chunk_size);
00295 
00296   type_size = f->type_size;
00297   chunk_size = f->chunk_size;
00298 
00299   pChunk->tid = ink_thread_self();
00300   pChunk->head = chunk_addr;
00301   pChunk->type_size = type_size;
00302   pChunk->chunk_size = chunk_size;
00303   pChunk->length = f->chunk_byte_size;
00304   pChunk->allocated = 0;
00305   pChunk->pThreadCache = pCache;
00306   pChunk->link = Link<InkChunkInfo>();
00307 
00308 #ifdef DEBUG
00309   
00310 
00311 
00312 
00313 
00314 #if !defined(linux)
00315   memset(pChunk->item_magic, 0, chunk_size * sizeof(unsigned char));
00316 #endif
00317 #endif
00318 
00319   curr = pChunk->head;
00320   pChunk->inner_free_list = curr;
00321   for (i = 1; i < chunk_size; i++) {
00322     next = (void *)((char *)curr + type_size);
00323     *(void **)curr = next;
00324     curr = next;
00325   }
00326   *(void **)curr = NULL;
00327 
00328   ink_atomic_increment(&f->allocated, chunk_size);
00329   ink_atomic_increment(&total_mem_in_byte, f->chunk_byte_size);
00330 
00331   pCache->free_chunk_list.push(pChunk);
00332   pCache->nr_free_chunks++;
00333   return pChunk;
00334 }
00335 
00336 static inline void
00337 ink_chunk_delete(InkFreeList *f, InkThreadCache *pCache, InkChunkInfo *pChunk)
00338 {
00339   void *chunk_addr = pChunk->head;
00340 
00341   ink_assert(pChunk->allocated == 0);
00342 
00343   pCache->free_chunk_list.remove(pChunk);
00344   pCache->nr_free_chunks--;
00345 
00346   if (unlikely(munmap(chunk_addr, f->chunk_byte_size))) {
00347     ink_stack_trace_dump();
00348     ink_fatal(1, "Failed to munmap %u bytes, %s", f->chunk_byte_size, strerror(errno));
00349   }
00350 
00351   ink_atomic_increment((int *)&f->allocated, -f->chunk_size);
00352 
00353   
00354 
00355 
00356 
00357 
00358 
00359 
00360 
00361   ink_atomic_decrement(&total_mem_in_byte, f->chunk_byte_size);
00362 }
00363 
00364 static inline void *
00365 malloc_whole_chunk(InkFreeList *f, InkThreadCache *pCache, InkChunkInfo *pChunk)
00366 {
00367   uint32_t i;
00368   uint32_t type_size, chunk_size;
00369   void *next, *item;
00370 
00371   ink_assert(pChunk->allocated == 0);
00372 
00373   type_size = f->type_size;
00374   chunk_size = f->chunk_size;
00375 
00376   item = pChunk->head;
00377   for (i = 1; i < chunk_size; i++) {
00378     next = (void *)((char *)item + i * type_size);
00379     ink_atomic_increment(&pCache->nr_free, 1);
00380     ink_atomiclist_push(&pCache->outer_free_list, next);
00381   }
00382 
00383   pChunk->allocated += chunk_size;
00384   pChunk->inner_free_list = NULL;
00385   pCache->nr_total += chunk_size;
00386 
00387   return item;
00388 }
00389 
00390 static inline void *
00391 malloc_from_chunk(InkFreeList * ,
00392                   InkThreadCache *pCache, InkChunkInfo *pChunk)
00393 {
00394   void *item;
00395 
00396   if ((item = pChunk->inner_free_list)) {
00397     pChunk->inner_free_list  = *(void **)item;
00398     pChunk->allocated++;
00399     pCache->nr_total++;
00400   }
00401 
00402   return item;
00403 }
00404 
00405 static inline void
00406 free_to_chunk(InkFreeList *f, InkThreadCache *pCache, void *item)
00407 {
00408   InkChunkInfo *pChunk;
00409 
00410   pChunk = get_chunk_info_addr(f, item);
00411   pChunk->allocated--;
00412   pCache->nr_total--;
00413 
00414   *(void **)item = pChunk->inner_free_list;
00415   pChunk->inner_free_list = item;
00416 
00417   if (pChunk->allocated == 0)
00418     ink_chunk_delete(f, pCache, pChunk);
00419 }
00420 
00421 static inline void *
00422 malloc_from_cache(InkFreeList *f, InkThreadCache *pCache, uint32_t nr)
00423 {
00424   void *item;
00425   InkChunkInfo *pChunk;
00426 
00427   pChunk = pCache->free_chunk_list.head;
00428   while (pChunk) {
00429     while ((item = malloc_from_chunk(f, pCache, pChunk))) {
00430       if (--nr == 0)
00431         return item;
00432 
00433       ink_atomic_increment(&pCache->nr_free, 1);
00434       ink_atomiclist_push(&pCache->outer_free_list, item);
00435     }
00436     pChunk = pChunk->link.next;
00437   }
00438 
00439   pChunk = ink_chunk_create(f, pCache);
00440   if (nr == f->chunk_size)
00441     return malloc_whole_chunk(f, pCache, pChunk);
00442 
00443   while ((item = malloc_from_chunk(f, pCache, pChunk))) {
00444     if (--nr == 0)
00445       return item;
00446 
00447     ink_atomic_increment(&pCache->nr_free, 1);
00448     ink_atomiclist_push(&pCache->outer_free_list, item);
00449   }
00450 
00451   ink_assert(0);
00452   return NULL;
00453 }
00454 
00455 static inline void
00456 free_to_cache(InkFreeList *f, InkThreadCache *pCache, void *item, uint32_t nr)
00457 {
00458   uint32_t n = nr;
00459 
00460   if (item)
00461     free_to_chunk(f, pCache, item);
00462 
00463   while (n && (item = ink_atomiclist_pop(&pCache->outer_free_list))) {
00464     free_to_chunk(f, pCache, item);
00465     n--;
00466   }
00467   ink_atomic_increment((int *)&pCache->nr_free, -(nr - n));
00468 }
00469 
00470 static inline void
00471 refresh_average_info(InkThreadCache *pCache)
00472 {
00473   uint32_t nr_free;
00474   float nr_average;
00475 
00476   nr_free = pCache->nr_free;
00477   nr_average = pCache->nr_average;
00478 
00479   if (pCache->status == 1 || nr_free < pCache->nr_min)
00480     pCache->nr_min = nr_free;
00481 
00482   pCache->nr_average = (nr_average * (1 - cfg_reclaim_factor)) +
00483                        (nr_free * cfg_reclaim_factor);
00484 }
00485 
00486 static inline bool
00487 need_to_reclaim(InkFreeList *f, InkThreadCache *pCache)
00488 {
00489   if (!cfg_enable_reclaim)
00490     return false;
00491 
00492   if(pCache->nr_free >= pCache->nr_average &&
00493      pCache->nr_total > f->chunk_size_base) {
00494     if (pCache->nr_overage++ >= cfg_max_overage) {
00495       pCache->nr_overage = 0;
00496       return true;
00497     }
00498     return false;
00499   }
00500 
00501   pCache->nr_overage = 0;
00502   return false;
00503 }
00504 
00505 void
00506 reclaimable_freelist_init(InkFreeList **fl, const char *name,
00507                           uint32_t type_size, uint32_t chunk_size,
00508                           uint32_t alignment)
00509 {
00510   InkFreeList *f;
00511   ink_freelist_list *fll = freelists;
00512 
00513   
00514   ink_assert(!(alignment & (alignment - 1)));
00515 
00516   
00517 
00518 
00519   while (fll) {
00520     
00521     if (fll->fl->type_size == type_size) {
00522       fll->fl->refcnt++;
00523       *fl = fll->fl;
00524       return;
00525     }
00526     fll = fll->next;
00527   }
00528 
00529   f = (InkFreeList *)ats_memalign(alignment, sizeof(InkFreeList));
00530   fll = (ink_freelist_list *)ats_memalign(alignment, sizeof(ink_freelist_list));
00531   fll->fl = f;
00532   fll->next = freelists;
00533   freelists = fll;
00534 
00535   f->name = name;
00536   f->used = 0;
00537   f->allocated = 0;
00538   f->allocated_base = 0;
00539   f->used_base = 0;
00540 
00541   memory_alignment_init(f, type_size, chunk_size, alignment);
00542 
00543   f->refcnt = 1;
00544   f->pThreadCache = NULL;
00545   f->nr_thread_cache = 0;
00546   f->thread_cache_idx = nr_freelist++;
00547   ink_assert(f->thread_cache_idx < MAX_NUM_FREELIST);
00548   ink_mutex_init(&f->lock, "InkFreeList Lock");
00549 
00550   *fl = f;
00551 }
00552 
00553 void *
00554 reclaimable_freelist_new(InkFreeList *f)
00555 {
00556   void *ptr;
00557   uint32_t i, nr;
00558   uint32_t old_value;
00559   uint32_t num_to_move;
00560   InkChunkInfo *pChunk = NULL;
00561   InkThreadCache *pCache, *pNextCache;
00562 
00563   ink_atomic_increment(&f->used, 1);
00564 
00565   
00566   if (unlikely((pCache = ThreadCaches[f->thread_cache_idx]) == NULL)) {
00567     pCache = (InkThreadCache *) ats_calloc(1, sizeof(InkThreadCache));
00568 
00569     pCache->f = f;
00570     pCache->free_chunk_list = DLL<InkChunkInfo>();
00571 
00572     
00573 
00574     ink_mutex_acquire(&f->lock);
00575     ink_atomiclist_init(&pCache->outer_free_list, f->name, 0);
00576 
00577     nr = CEIL(f->chunk_size_base, f->chunk_size);
00578     for (i = 0; i < nr; i++) {
00579       pChunk = ink_chunk_create(f, pCache);
00580     }
00581 
00582     pCache->nr_malloc = 1;
00583 
00584     ThreadCaches[f->thread_cache_idx] = pCache;
00585 
00586     if (f->pThreadCache) {
00587       
00588 
00589       pCache->next = f->pThreadCache;
00590       pCache->prev = f->pThreadCache->prev;
00591       pCache->next->prev = pCache;
00592       pCache->prev->next = pCache;
00593     } else {
00594       pCache->next = pCache;
00595       pCache->prev = pCache;
00596     }
00597 
00598     f->pThreadCache = pCache;
00599     f->nr_thread_cache++;
00600 
00601     ink_mutex_release(&f->lock);
00602 
00603     ptr = malloc_whole_chunk(f, pCache, pChunk);
00604     set_chunk_item_magic(f, pChunk, ptr);
00605     return ptr;
00606   }
00607 
00608   pCache->status = 0;
00609 
00610   
00611   if ((ptr = ink_atomiclist_pop(&pCache->outer_free_list))) {
00612     old_value = ink_atomic_increment((int *)&pCache->nr_free, -1);
00613     ink_release_assert(old_value > 0);
00614     ink_atomic_increment(&pCache->nr_malloc, 1);
00615     set_chunk_item_magic(f, NULL, ptr);
00616     return ptr;
00617   }
00618 
00619   
00620   pNextCache = pCache->next;
00621   while (pNextCache != pCache) {
00622     if ((ptr = ink_atomiclist_pop(&pNextCache->outer_free_list))) {
00623       old_value = ink_atomic_increment((int *)&pNextCache->nr_free, -1);
00624       ink_release_assert(old_value > 0);
00625       ink_atomic_increment(&pNextCache->nr_malloc, 1);
00626       set_chunk_item_magic(f, NULL, ptr);
00627       return ptr;
00628     }
00629     pNextCache = pNextCache->next;
00630   }
00631 
00632   
00633   for (i = 0; i < nr_freelist; i++) {
00634     if ((pNextCache = ThreadCaches[i]) == NULL)
00635       continue;
00636 
00637     if (need_to_reclaim(pNextCache->f, pNextCache)) {
00638       if (cfg_debug_filter & 0x1)
00639         show_info("F", pNextCache->f, pNextCache);
00640 
00641       num_to_move = MIN(pNextCache->nr_average, pNextCache->nr_free);
00642 
00643       free_to_cache(pNextCache->f, pNextCache, NULL, num_to_move);
00644 
00645       if (cfg_debug_filter & 0x1)
00646         show_info("-", pNextCache->f, pNextCache);
00647 
00648       refresh_average_info(pNextCache);
00649     }
00650   }
00651 
00652   
00653   if (cfg_debug_filter & 0x2)
00654     show_info("M", f, pCache);
00655   ptr = malloc_from_cache(f, pCache, f->chunk_size);
00656   if (cfg_debug_filter & 0x2)
00657     show_info("+", f, pCache);
00658 
00659   refresh_average_info(pCache);
00660   ink_atomic_increment(&pCache->nr_malloc, 1);
00661   set_chunk_item_magic(f, NULL, ptr);
00662   return ptr;
00663 }
00664 
00665 void
00666 reclaimable_freelist_free(InkFreeList *f, void *item)
00667 {
00668   InkChunkInfo *pChunk;
00669   InkThreadCache *pCache;
00670 
00671   if (item == NULL)
00672     return;
00673 
00674   pChunk = get_chunk_info_addr(f, item);
00675   clear_chunk_item_magic(f, pChunk, item);
00676   pCache = pChunk->pThreadCache;
00677 
00678   ink_atomic_increment((int *)&pCache->nr_malloc, -1);
00679   if (ink_atomic_cas((int *)&pCache->status, 0, 1))
00680     refresh_average_info(pCache);
00681 
00682   ink_atomic_increment(&pCache->nr_free, 1);
00683   ink_atomiclist_push(&pCache->outer_free_list, item);
00684   ink_atomic_increment(&f->used, -1);
00685 }
00686 #endif