| 1 | #ifndef MARKSWEEP_HEAP_H
 | 
| 2 | #define MARKSWEEP_HEAP_H
 | 
| 3 | 
 | 
| 4 | #include <stdlib.h>
 | 
| 5 | 
 | 
| 6 | #include <vector>
 | 
| 7 | 
 | 
| 8 | #include "mycpp/common.h"
 | 
| 9 | #include "mycpp/gc_obj.h"
 | 
| 10 | 
 | 
| 11 | class MarkSet {
 | 
| 12 |  public:
 | 
| 13 |   MarkSet() : bits_() {
 | 
| 14 |   }
 | 
| 15 | 
 | 
| 16 |   // ReInit() must be called at the start of MarkObjects().  Allocate() should
 | 
| 17 |   // keep track of the maximum object ID.
 | 
| 18 |   void ReInit(int max_obj_id) {
 | 
| 19 |     // https://stackoverflow.com/questions/8848575/fastest-way-to-reset-every-value-of-stdvectorint-to-0
 | 
| 20 |     std::fill(bits_.begin(), bits_.end(), 0);
 | 
| 21 |     int max_byte_index = (max_obj_id >> 3) + 1;  // round up
 | 
| 22 |     // log("ReInit max_byte_index %d", max_byte_index);
 | 
| 23 |     bits_.resize(max_byte_index);
 | 
| 24 |   }
 | 
| 25 | 
 | 
| 26 |   // Called by MarkObjects()
 | 
| 27 |   void Mark(int obj_id) {
 | 
| 28 |     DCHECK(obj_id >= 0);
 | 
| 29 |     // log("obj id %d", obj_id);
 | 
| 30 |     DCHECK(!IsMarked(obj_id));
 | 
| 31 |     int byte_index = obj_id >> 3;  // 8 bits per byte
 | 
| 32 |     int bit_index = obj_id & 0b111;
 | 
| 33 |     // log("byte_index %d %d", byte_index, bit_index);
 | 
| 34 |     bits_[byte_index] |= (1 << bit_index);
 | 
| 35 |   }
 | 
| 36 | 
 | 
| 37 |   // Called by Sweep()
 | 
| 38 |   bool IsMarked(int obj_id) {
 | 
| 39 |     DCHECK(obj_id >= 0);
 | 
| 40 |     int byte_index = obj_id >> 3;
 | 
| 41 |     int bit_index = obj_id & 0b111;
 | 
| 42 |     return bits_[byte_index] & (1 << bit_index);
 | 
| 43 |   }
 | 
| 44 | 
 | 
| 45 |   void Debug() {
 | 
| 46 |     int n = bits_.size();
 | 
| 47 |     dprintf(2, "[ ");
 | 
| 48 |     for (int i = 0; i < n; ++i) {
 | 
| 49 |       dprintf(2, "%02x ", bits_[i]);
 | 
| 50 |     }
 | 
| 51 |     dprintf(2, "] (%d bytes) \n", n);
 | 
| 52 |     dprintf(2, "[ ");
 | 
| 53 |     int num_bits = 0;
 | 
| 54 |     for (int i = 0; i < n; ++i) {
 | 
| 55 |       for (int j = 0; j < 8; ++j) {
 | 
| 56 |         int bit = (bits_[i] & (1 << j)) != 0;
 | 
| 57 |         dprintf(2, "%d", bit);
 | 
| 58 |         num_bits += bit;
 | 
| 59 |       }
 | 
| 60 |     }
 | 
| 61 |     dprintf(2, " ] (%d bits set)\n", num_bits);
 | 
| 62 |   }
 | 
| 63 | 
 | 
| 64 |   std::vector<uint8_t> bits_;  // bit vector indexed by obj_id
 | 
| 65 | };
 | 
| 66 | 
 | 
| 67 | // A simple Pool allocator for allocating small objects. It maintains an ever
 | 
| 68 | // growing number of Blocks each consisting of a number of fixed size Cells.
 | 
| 69 | // Memory is handed out one Cell at a time.
 | 
| 70 | // Note: within the context of the Pool allocator we refer to object IDs as cell
 | 
| 71 | // IDs because in addition to identifying an object they're also used to index
 | 
| 72 | // into the Cell storage.
 | 
| 73 | template <int CellsPerBlock, size_t CellSize>
 | 
| 74 | class Pool {
 | 
| 75 |  public:
 | 
| 76 |   static constexpr size_t kMaxObjSize = CellSize;
 | 
| 77 |   static constexpr int kBlockSize = CellSize * CellsPerBlock;
 | 
| 78 | 
 | 
| 79 |   Pool() = default;
 | 
| 80 | 
 | 
| 81 |   void* Allocate(int* obj_id) {
 | 
| 82 |     num_allocated_++;
 | 
| 83 | 
 | 
| 84 |     if (!free_list_) {
 | 
| 85 |       // Allocate a new Block and add every new Cell to the free list.
 | 
| 86 |       Block* block = static_cast<Block*>(malloc(sizeof(Block)));
 | 
| 87 |       blocks_.push_back(block);
 | 
| 88 |       bytes_allocated_ += kBlockSize;
 | 
| 89 |       num_free_ += CellsPerBlock;
 | 
| 90 | 
 | 
| 91 |       // The starting cell_id for Cells in this block.
 | 
| 92 |       int cell_id = (blocks_.size() - 1) * CellsPerBlock;
 | 
| 93 |       for (Cell& cell : block->cells) {
 | 
| 94 |         FreeCell* free_cell = reinterpret_cast<FreeCell*>(cell);
 | 
| 95 |         free_cell->id = cell_id++;
 | 
| 96 |         free_cell->next = free_list_;
 | 
| 97 |         free_list_ = free_cell;
 | 
| 98 |       }
 | 
| 99 |     }
 | 
| 100 | 
 | 
| 101 |     FreeCell* cell = free_list_;
 | 
| 102 |     free_list_ = free_list_->next;
 | 
| 103 |     num_free_--;
 | 
| 104 |     *obj_id = cell->id;
 | 
| 105 |     return cell;
 | 
| 106 |   }
 | 
| 107 | 
 | 
| 108 |   void PrepareForGc() {
 | 
| 109 |     DCHECK(!gc_underway_);
 | 
| 110 |     gc_underway_ = true;
 | 
| 111 |     mark_set_.ReInit(blocks_.size() * CellsPerBlock);
 | 
| 112 |   }
 | 
| 113 | 
 | 
| 114 |   bool IsMarked(int cell_id) {
 | 
| 115 |     DCHECK(gc_underway_);
 | 
| 116 |     return mark_set_.IsMarked(cell_id);
 | 
| 117 |   }
 | 
| 118 | 
 | 
| 119 |   void Mark(int cell_id) {
 | 
| 120 |     DCHECK(gc_underway_);
 | 
| 121 |     mark_set_.Mark(cell_id);
 | 
| 122 |   }
 | 
| 123 | 
 | 
| 124 |   void Sweep() {
 | 
| 125 |     DCHECK(gc_underway_);
 | 
| 126 |     // Iterate over every Cell linking the free ones into a new free list.
 | 
| 127 |     num_free_ = 0;
 | 
| 128 |     free_list_ = nullptr;
 | 
| 129 |     int cell_id = 0;
 | 
| 130 |     for (Block* block : blocks_) {
 | 
| 131 |       for (Cell& cell : block->cells) {
 | 
| 132 |         if (!mark_set_.IsMarked(cell_id)) {
 | 
| 133 |           num_free_++;
 | 
| 134 |           FreeCell* free_cell = reinterpret_cast<FreeCell*>(cell);
 | 
| 135 |           free_cell->id = cell_id;
 | 
| 136 |           free_cell->next = free_list_;
 | 
| 137 |           free_list_ = free_cell;
 | 
| 138 |         }
 | 
| 139 |         cell_id++;
 | 
| 140 |       }
 | 
| 141 |     }
 | 
| 142 |     gc_underway_ = false;
 | 
| 143 |   }
 | 
| 144 | 
 | 
| 145 |   void Free() {
 | 
| 146 |     for (Block* block : blocks_) {
 | 
| 147 |       free(block);
 | 
| 148 |     }
 | 
| 149 |     blocks_.clear();
 | 
| 150 |   }
 | 
| 151 | 
 | 
| 152 |   int num_allocated() {
 | 
| 153 |     return num_allocated_;
 | 
| 154 |   }
 | 
| 155 | 
 | 
| 156 |   int64_t bytes_allocated() {
 | 
| 157 |     return bytes_allocated_;
 | 
| 158 |   }
 | 
| 159 | 
 | 
| 160 |   int num_live() {
 | 
| 161 |     return blocks_.size() * CellsPerBlock - num_free_;
 | 
| 162 |   }
 | 
| 163 | 
 | 
| 164 |  private:
 | 
| 165 |   using Cell = uint8_t[CellSize];
 | 
| 166 | 
 | 
| 167 |   struct Block {
 | 
| 168 |     Cell cells[CellsPerBlock];
 | 
| 169 |   };
 | 
| 170 | 
 | 
| 171 |   // Unused/free cells are tracked via a linked list of FreeCells. The FreeCells
 | 
| 172 |   // are stored in the unused Cells, so it takes no extra memory to track them.
 | 
| 173 |   struct FreeCell {
 | 
| 174 |     int id;
 | 
| 175 |     FreeCell* next;
 | 
| 176 |   };
 | 
| 177 |   static_assert(CellSize >= sizeof(FreeCell), "CellSize is too small");
 | 
| 178 | 
 | 
| 179 |   // Whether a GC is underway, for asserting that calls are in order.
 | 
| 180 |   bool gc_underway_ = false;
 | 
| 181 | 
 | 
| 182 |   FreeCell* free_list_ = nullptr;
 | 
| 183 |   int num_free_ = 0;
 | 
| 184 |   int num_allocated_ = 0;
 | 
| 185 |   int64_t bytes_allocated_ = 0;
 | 
| 186 |   std::vector<Block*> blocks_;
 | 
| 187 |   MarkSet mark_set_;
 | 
| 188 | 
 | 
| 189 |   DISALLOW_COPY_AND_ASSIGN(Pool<CellsPerBlock COMMA CellSize>);
 | 
| 190 | };
 | 
| 191 | 
 | 
| 192 | class MarkSweepHeap {
 | 
| 193 |  public:
 | 
| 194 |   // reserve 32 frames to start
 | 
| 195 |   MarkSweepHeap() {
 | 
| 196 |   }
 | 
| 197 | 
 | 
| 198 |   void Init();  // use default threshold
 | 
| 199 |   void Init(int gc_threshold);
 | 
| 200 | 
 | 
| 201 |   void PushRoot(RawObject** p) {
 | 
| 202 |     roots_.push_back(p);
 | 
| 203 |   }
 | 
| 204 | 
 | 
| 205 |   void PopRoot() {
 | 
| 206 |     roots_.pop_back();
 | 
| 207 |   }
 | 
| 208 | 
 | 
| 209 |   void RootGlobalVar(void* root) {
 | 
| 210 |     global_roots_.push_back(reinterpret_cast<RawObject*>(root));
 | 
| 211 |   }
 | 
| 212 | 
 | 
| 213 |   void* Allocate(size_t num_bytes, int* obj_id, int* pool_id);
 | 
| 214 | 
 | 
| 215 | #if 0
 | 
| 216 |   void* Reallocate(void* p, size_t num_bytes);
 | 
| 217 | #endif
 | 
| 218 |   int MaybeCollect();
 | 
| 219 |   int Collect();
 | 
| 220 | 
 | 
| 221 |   void MaybeMarkAndPush(RawObject* obj);
 | 
| 222 |   void TraceChildren();
 | 
| 223 | 
 | 
| 224 |   void Sweep();
 | 
| 225 | 
 | 
| 226 |   void PrintStats(int fd);  // public for testing
 | 
| 227 | 
 | 
| 228 |   void CleanProcessExit();  // do one last GC, used in unit tests
 | 
| 229 |   void ProcessExit();       // main() lets OS clean up, except ASAN variant
 | 
| 230 | 
 | 
| 231 |   int num_live() {
 | 
| 232 |     return num_live_
 | 
| 233 | #ifndef NO_POOL_ALLOC
 | 
| 234 |            + pool1_.num_live() + pool2_.num_live()
 | 
| 235 | #endif
 | 
| 236 |         ;
 | 
| 237 |   }
 | 
| 238 | 
 | 
| 239 |   bool is_initialized_ = true;  // mark/sweep doesn't need to be initialized
 | 
| 240 | 
 | 
| 241 |   // Runtime params
 | 
| 242 | 
 | 
| 243 |   // Threshold is a number of live objects, since we aren't keeping track of
 | 
| 244 |   // total bytes
 | 
| 245 |   int gc_threshold_;
 | 
| 246 | 
 | 
| 247 |   // Show debug logging
 | 
| 248 |   bool gc_verbose_ = false;
 | 
| 249 | 
 | 
| 250 |   // Current stats
 | 
| 251 |   int num_live_ = 0;
 | 
| 252 |   // Should we keep track of sizes?
 | 
| 253 |   // int64_t bytes_live_ = 0;
 | 
| 254 | 
 | 
| 255 |   // Cumulative stats
 | 
| 256 |   int max_survived_ = 0;  // max # live after a collection
 | 
| 257 |   int num_allocated_ = 0;
 | 
| 258 |   int64_t bytes_allocated_ = 0;  // avoid overflow
 | 
| 259 |   int num_gc_points_ = 0;        // manual collection points
 | 
| 260 |   int num_collections_ = 0;
 | 
| 261 |   int num_growths_;
 | 
| 262 |   double max_gc_millis_ = 0.0;
 | 
| 263 |   double total_gc_millis_ = 0.0;
 | 
| 264 | 
 | 
| 265 | #ifndef NO_POOL_ALLOC
 | 
| 266 |   // 16,384 / 24 bytes = 682 cells (rounded), 16,368 bytes
 | 
| 267 |   // 16,384 / 48 bytes = 341 cells (rounded), 16,368 bytes
 | 
| 268 |   // Conveniently, the glibc malloc header is 16 bytes, giving exactly 16 Ki
 | 
| 269 |   // differences
 | 
| 270 |   Pool<682, 24> pool1_;
 | 
| 271 |   Pool<341, 48> pool2_;
 | 
| 272 | #endif
 | 
| 273 | 
 | 
| 274 |   std::vector<RawObject**> roots_;
 | 
| 275 |   std::vector<RawObject*> global_roots_;
 | 
| 276 | 
 | 
| 277 |   // Allocate() appends live objects, and Sweep() compacts it
 | 
| 278 |   std::vector<ObjHeader*> live_objs_;
 | 
| 279 |   // Allocate lazily frees these, and Sweep() replenishes it
 | 
| 280 |   std::vector<ObjHeader*> to_free_;
 | 
| 281 | 
 | 
| 282 |   std::vector<ObjHeader*> gray_stack_;
 | 
| 283 |   MarkSet mark_set_;
 | 
| 284 | 
 | 
| 285 |   int greatest_obj_id_ = 0;
 | 
| 286 | 
 | 
| 287 |  private:
 | 
| 288 |   void FreeEverything();
 | 
| 289 |   void MaybePrintStats();
 | 
| 290 | 
 | 
| 291 |   DISALLOW_COPY_AND_ASSIGN(MarkSweepHeap);
 | 
| 292 | };
 | 
| 293 | 
 | 
| 294 | #endif  // MARKSWEEP_HEAP_H
 |