Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 0 additions & 7 deletions src/gc-debug.c
Original file line number Diff line number Diff line change
Expand Up @@ -745,13 +745,6 @@ void gc_time_pool_end(int sweep_full)
sweep_full ? "full" : "quick");
}

void gc_time_sysimg_end(uint64_t t0)
{
double sweep_pool_sec = (jl_hrtime() - t0) / 1e9;
jl_safe_printf("GC sweep sysimg end %.2f ms\n",
sweep_pool_sec * 1000);
}

static int64_t big_total;
static int64_t big_freed;
static int64_t big_reset;
Expand Down
35 changes: 35 additions & 0 deletions src/gc-heap-snapshot.c
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ typedef struct {
size_t internal_root_idx; // node index of the internal root node
size_t _gc_root_idx; // node index of the GC roots node
size_t _gc_finlist_root_idx; // node index of the GC finlist roots node
size_t _gc_image_node_idx; // node index of the combined [image] node
} HeapSnapshot;

// global heap snapshot, mutated by garbage collector
Expand Down Expand Up @@ -339,6 +340,28 @@ static void _add_synthetic_root_entries(HeapSnapshot *snapshot) JL_NOTSAFEPOINT
snapshot->_gc_finlist_root_idx // to
};
serialize_edge(snapshot, root_to_gc_finlist_roots);

// Add a synthetic node representing all sysimage/pkgimage objects.
// Image objects are permanently marked and never traced by the GC mark
// phase, so we collapse them into a single node rather than recording
// their internal structure.
snapshot->_gc_image_node_idx = snapshot->num_nodes;
Node gc_image_node = {
(uint8_t)st_find_or_create(&snapshot->node_types, "synthetic"),
st_find_or_serialize(&snapshot->names, snapshot->strings, "[image]"), // name
snapshot->_gc_image_node_idx, // id
0, // size (image memory is not GC-managed)
0, // size_t trace_node_id (unused)
0 // int detachedness; // 0 - unknown, 1 - attached; 2 - detached
};
serialize_node(snapshot, gc_image_node);
Edge root_to_image = {
(uint8_t)st_find_or_create(&snapshot->edge_types, "internal"),
st_find_or_serialize(&snapshot->names, snapshot->strings, "[image]"), // edge label
snapshot->internal_root_idx, // from
snapshot->_gc_image_node_idx // to
};
serialize_edge(snapshot, root_to_image);
}

// mimicking https://github.com/nodejs/node/blob/5fd7a72e1c4fbaf37d3723c4c81dce35c149dc84/deps/v8/src/profiler/heap-snapshot-generator.cc#L597-L597
Expand Down Expand Up @@ -442,6 +465,18 @@ size_t record_node_to_gc_snapshot(jl_value_t *a) JL_NOTSAFEPOINT
if (ios_need_close)
ios_close(&str_);

// Image objects are permanently marked and never traced by the GC mark
// phase, so they would otherwise appear as orphan nodes. Root them under
// the synthetic [image] node with a label indicating which image they belong to.
if (jl_astaggedvalue(a)->bits.in_image) {
jl_value_t *top_mod = jl_object_top_module(a);
const char *label = "[image]";
if (top_mod != (jl_value_t*)jl_nothing && jl_is_module((jl_module_t*)top_mod))
label = jl_symbol_name_(((jl_module_t*)top_mod)->name);
_record_gc_just_edge("internal", g_snapshot->_gc_image_node_idx, idx,
st_find_or_serialize(&g_snapshot->names, g_snapshot->strings, label));
}

return idx;
}

Expand Down
48 changes: 39 additions & 9 deletions src/gc-stock.c
Original file line number Diff line number Diff line change
Expand Up @@ -1483,13 +1483,6 @@ static void gc_sweep_pool(void) JL_NOTSAFEPOINT
gc_time_pool_end(current_sweep_full);
}

static void gc_sweep_perm_alloc(void) JL_NOTSAFEPOINT
{
uint64_t t0 = jl_hrtime();
gc_sweep_sysimg();
gc_time_sysimg_end(t0);
}

// mark phase

JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
Expand All @@ -1504,6 +1497,20 @@ JL_DLLEXPORT void jl_gc_queue_root(const jl_value_t *ptr)
if (header & GC_OLD) { // write barrier has not been triggered in this object yet
arraylist_push(&ptls->gc_tls.heap.remset, (jl_value_t*)ptr);
ptls->gc_tls.heap.remset_nptr++; // conservative
// Permanently-marked image objects that are mutated need to be
// persistently tracked, since they would otherwise be skipped
// during the mark phase. The image_remset is append-only, so
// this object will be re-scanned every GC cycle hereafter.
// Deduplication via image_remset prevents unbounded growth
// from repeated mutations of the same image object.
if (__unlikely(o->bits.in_image)) {
JL_LOCK_NOGC(&image_remset_lock);
if (ptrhash_get(&image_remset, (void*)ptr) == HT_NOTFOUND) {
ptrhash_put(&image_remset, (void*)ptr, (void*)ptr);
arraylist_push(&image_remset_list, (void*)ptr);
}
JL_UNLOCK_NOGC(&image_remset_lock);
}
}
}

Expand Down Expand Up @@ -2827,6 +2834,22 @@ static void gc_queue_remset(jl_gc_markqueue_t *mq, jl_ptls_t ptls2) JL_NOTSAFEPO
ptls2->gc_tls.heap.remset_nptr = 0;
}

// Queue image objects with cross-heap references for marking.
// These are persistent (never cleared) so that image objects that reference
// non-image objects are always re-scanned, even though the image objects
// themselves are permanently marked and would otherwise be skipped.
static void gc_queue_image_remset(jl_gc_markqueue_t *mq) JL_NOTSAFEPOINT
{
size_t len = image_remset_list.len;
void **items = image_remset_list.items;
for (size_t i = 0; i < len; i++) {
void *_v = items[i];
jl_astaggedvalue(_v)->bits.gc = GC_OLD_MARKED;
jl_value_t *v = (jl_value_t *)((uintptr_t)_v | GC_REMSET_PTR_TAG);
gc_ptr_queue_push(mq, v);
}
}

static void gc_check_all_remsets_are_empty(void) JL_NOTSAFEPOINT
{
for (int i = 0; i < gc_n_threads; i++) {
Expand Down Expand Up @@ -3086,6 +3109,12 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) JL_NOTS
}
}
gc_check_all_remsets_are_empty();
// 1.4. queue image objects with cross-heap references.
// Only needed after a full sweep (which clears non-image objects'
// mark bits). After quick sweeps, old objects retain their marks,
// so children of image_remset entries survive without re-tracing.
if (prev_sweep_full)
gc_queue_image_remset(mq);

// 2. walk roots
gc_mark_roots(mq);
Expand Down Expand Up @@ -3213,8 +3242,6 @@ static int _jl_gc_collect(jl_ptls_t ptls, jl_gc_collection_t collection) JL_NOTS
gc_scrub();
gc_verify_tags();
gc_sweep_pool();
if (sweep_full)
gc_sweep_perm_alloc();
}

JL_PROBE_GC_SWEEP_END();
Expand Down Expand Up @@ -3737,6 +3764,9 @@ void jl_gc_init(void)
{
JL_MUTEX_INIT(&heapsnapshot_lock, "heapsnapshot_lock");
JL_MUTEX_INIT(&finalizers_lock, "finalizers_lock");
JL_MUTEX_INIT(&image_remset_lock, "image_remset_lock");
htable_new(&image_remset, 0);
arraylist_new(&image_remset_list, 0);
uv_mutex_init(&page_profile_lock);
uv_mutex_init(&gc_perm_lock);
uv_mutex_init(&gc_pages_lock);
Expand Down
3 changes: 0 additions & 3 deletions src/gc-stock.h
Original file line number Diff line number Diff line change
Expand Up @@ -607,8 +607,6 @@ void gc_final_pause_end(int64_t t0, int64_t tend);
void gc_time_pool_start(void) JL_NOTSAFEPOINT;
void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT;
void gc_time_pool_end(int sweep_full) JL_NOTSAFEPOINT;
void gc_time_sysimg_end(uint64_t t0) JL_NOTSAFEPOINT;

void gc_time_big_start(void) JL_NOTSAFEPOINT;
void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT;
void gc_time_big_end(void) JL_NOTSAFEPOINT;
Expand Down Expand Up @@ -641,7 +639,6 @@ STATIC_INLINE void gc_time_count_page(int freedall, int pg_skpd) JL_NOTSAFEPOINT
(void)pg_skpd;
}
#define gc_time_pool_end(sweep_full) (void)(sweep_full)
#define gc_time_sysimg_end(t0) (void)(t0)
#define gc_time_big_start()
STATIC_INLINE void gc_time_count_big(int old_bits, int bits) JL_NOTSAFEPOINT
{
Expand Down
21 changes: 12 additions & 9 deletions src/gc-wb-stock.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,9 @@ extern "C" {
STATIC_INLINE void jl_gc_wb(const void *parent, const void *ptr) JL_NOTSAFEPOINT
{
// parent and ptr isa jl_value_t*
if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ && // parent is old and not in remset
(jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0)) // ptr is young
if (__unlikely(jl_astaggedvalue(parent)->bits.gc == 3 /* GC_OLD_MARKED */ &&
(jl_astaggedvalue(parent)->bits.in_image || // image parents are never fully traced
(jl_astaggedvalue(ptr)->bits.gc & 1 /* GC_MARKED */) == 0))) // ptr is young
jl_gc_queue_root((jl_value_t*)parent);
}

Expand All @@ -33,7 +34,7 @@ STATIC_INLINE void jl_gc_multi_wb(const void *parent, const jl_value_t *ptr) JL_
// ptr is an immutable object
if (__likely(jl_astaggedvalue(parent)->bits.gc != 3))
return; // parent is young or in remset
if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3))
if (__likely(jl_astaggedvalue(ptr)->bits.gc == 3 && !jl_astaggedvalue(parent)->bits.in_image))
return; // ptr is old and not in remset (thus it does not point to young)
jl_datatype_t *dt = (jl_datatype_t*)jl_typeof(ptr);
const jl_datatype_layout_t *ly = dt->layout;
Expand All @@ -48,13 +49,14 @@ STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const jl_value_t *dest_owne
if (__unlikely(jl_astaggedvalue(dest_owner)->bits.gc == 3 /* GC_OLD_MARKED */ )) {
jl_value_t *src_owner = jl_genericmemory_owner(src);
size_t done = 0;
if (jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) {
int in_image = jl_astaggedvalue(dest_owner)->bits.in_image;
if (in_image || jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) {
if (dest_p < src_p || dest_p > src_p + (*n)) {
for (; done < (*n); done++) { // copy forwards
void *val = jl_atomic_load_relaxed(src_p + done);
jl_atomic_store_release(dest_p + done, val);
// `val` is young or old-unmarked
if (val && !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */)) {
// `val` is young or old-unmarked (or dest is image and val is non-image)
if (val && (in_image || !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */))) {
jl_gc_queue_root(dest_owner);
break;
}
Expand All @@ -66,8 +68,8 @@ STATIC_INLINE void jl_gc_wb_genericmemory_copy_boxed(const jl_value_t *dest_owne
for (; done < (*n); done++) { // copy backwards
void *val = jl_atomic_load_relaxed(src_p + (*n) - done - 1);
jl_atomic_store_release(dest_p + (*n) - done - 1, val);
// `val` is young or old-unmarked
if (val && !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */)) {
// `val` is young or old-unmarked (or dest is image and val is non-image)
if (val && (in_image || !(jl_astaggedvalue(val)->bits.gc & 1 /* GC_MARKED */))) {
jl_gc_queue_root(dest_owner);
break;
}
Expand All @@ -84,7 +86,8 @@ STATIC_INLINE void jl_gc_wb_genericmemory_copy_ptr(const jl_value_t *owner, jl_g
if (__unlikely(jl_astaggedvalue(owner)->bits.gc == 3 /* GC_OLD_MARKED */)) {
jl_value_t *src_owner = jl_genericmemory_owner(src);
size_t elsz = dt->layout->size;
if (jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) {
if (jl_astaggedvalue(owner)->bits.in_image ||
jl_astaggedvalue(src_owner)->bits.gc != 3 /* GC_OLD_MARKED */) {
dt = (jl_datatype_t*)jl_tparam1(dt);
for (size_t done = 0; done < n; done++) { // copy forwards
char* s = (char*)src_p+done*elsz;
Expand Down
4 changes: 3 additions & 1 deletion src/julia_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,7 +534,9 @@ jl_value_t *jl_gc_small_alloc_noinline(jl_ptls_t ptls, int offset,
int osize);
jl_value_t *jl_gc_big_alloc_noinline(jl_ptls_t ptls, size_t allocsz);
JL_DLLEXPORT int jl_gc_classify_pools(size_t sz, int *osize) JL_NOTSAFEPOINT;
void gc_sweep_sysimg(void) JL_NOTSAFEPOINT;
extern htable_t image_remset;
extern arraylist_t image_remset_list;
extern jl_mutex_t image_remset_lock;


// pools are 16376 bytes large (GC_POOL_SZ - GC_PAGE_OFFSET)
Expand Down
10 changes: 8 additions & 2 deletions src/llvm-final-gc-lowering-stock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,11 +52,16 @@ void FinalLowerGC::lowerWriteBarrier(CallInst *target, Function &F) {
auto parent = target->getArgOperand(0);
IRBuilder<> builder(target);
builder.SetCurrentDebugLocation(target->getDebugLoc());
auto parBits = builder.CreateAnd(EmitLoadTag(builder, T_size, parent, tbaa_tag), GC_OLD_MARKED, "parent_bits");
auto parTag = EmitLoadTag(builder, T_size, parent, tbaa_tag);
auto parBits = builder.CreateAnd(parTag, GC_OLD_MARKED, "parent_bits");
auto parOldMarked = builder.CreateICmpEQ(parBits, ConstantInt::get(T_size, GC_OLD_MARKED), "parent_old_marked");
auto mayTrigTerm = SplitBlockAndInsertIfThen(parOldMarked, target, false);
builder.SetInsertPoint(mayTrigTerm);
mayTrigTerm->getParent()->setName("may_trigger_wb");
// Image parents are never fully traced by the mark phase, so we must
// always trigger the write barrier regardless of the child's mark bits.
auto parInImage = builder.CreateAnd(parTag, ConstantInt::get(T_size, GC_IN_IMAGE), "parent_in_image");
auto parIsImage = builder.CreateICmpNE(parInImage, ConstantInt::get(T_size, 0), "parent_is_image");
Value *anyChldNotMarked = NULL;
for (unsigned i = 1; i < target->arg_size(); i++) {
Value *child = target->getArgOperand(i);
Expand All @@ -65,9 +70,10 @@ void FinalLowerGC::lowerWriteBarrier(CallInst *target, Function &F) {
anyChldNotMarked = anyChldNotMarked ? builder.CreateOr(anyChldNotMarked, chldNotMarked) : chldNotMarked;
}
assert(anyChldNotMarked); // handled by all_of test above
auto shouldTrigger = builder.CreateOr(parIsImage, anyChldNotMarked, "should_trigger_wb");
MDBuilder MDB(parent->getContext());
SmallVector<uint32_t, 2> Weights{1, 9};
auto trigTerm = SplitBlockAndInsertIfThen(anyChldNotMarked, mayTrigTerm, false,
auto trigTerm = SplitBlockAndInsertIfThen(shouldTrigger, mayTrigTerm, false,
MDB.createBranchWeights(Weights));
trigTerm->getParent()->setName("trigger_wb");
builder.SetInsertPoint(trigTerm);
Expand Down
47 changes: 11 additions & 36 deletions src/staticdata.c
Original file line number Diff line number Diff line change
Expand Up @@ -2071,40 +2071,15 @@ static void jl_read_arraylist(ios_t *s, arraylist_t *list)
ios_read(s, (char*)list->items, list_len * sizeof(void*));
}

void gc_sweep_sysimg(void) JL_NOTSAFEPOINT
{
size_t nblobs = n_linkage_blobs();
if (nblobs == 0)
return;
for (size_t i = 0; i < nblobs; i++) {
image_metadata_t *meta = (image_metadata_t*)image_tree.ranges[i].data;
reloc_t *relocs = (reloc_t*)meta->relocs_base;
if (!relocs)
continue;
uintptr_t base = meta->base;
uintptr_t last_pos = 0;
uint8_t *current = (uint8_t *)relocs;
while (1) {
// Read the offset of the next object
size_t pos_diff = 0;
size_t cnt = 0;
while (1) {
int8_t c = *current++;
pos_diff |= ((size_t)c & 0x7F) << (7 * cnt++);
if ((c >> 7) == 0)
break;
}
if (pos_diff == 0)
break;

uintptr_t pos = last_pos + pos_diff;
last_pos = pos;
jl_taggedvalue_t *o = (jl_taggedvalue_t *)(base + pos);
o->bits.gc = GC_OLD;
assert(o->bits.in_image == 1);
}
}
}
// Persistent set of image objects that reference non-image objects.
// Processed as additional GC roots at the start of each full mark phase.
// Maintained incrementally by jl_gc_queue_root when image objects are mutated.
// Note: cross-heap refs created during pkgimage uniquing (types, method instances,
// bindings) don't need tracking here because the uniqued objects are always rooted
// through type caches, method specializations, or module binding tables.
htable_t image_remset;
arraylist_t image_remset_list;
jl_mutex_t image_remset_lock;

// jl_write_value and jl_read_value are used for storing Julia objects that are adjuncts to
// the image proper. For example, new methods added to external callables require
Expand Down Expand Up @@ -3840,7 +3815,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image,
reloc_t *relocs_base = (reloc_t*)&relocs.buf[0];

s.s = &sysimg;
jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD | GC_IN_IMAGE); // gctags
jl_read_reloclist(&s, s.link_ids_gctags, GC_OLD_MARKED | GC_IN_IMAGE); // gctags
size_t sizeof_tags = ios_pos(&relocs);
(void)sizeof_tags;
jl_read_reloclist(&s, s.link_ids_relocs, 0); // general relocs
Expand Down Expand Up @@ -3966,7 +3941,7 @@ static void jl_restore_system_image_from_stream_(ios_t *f, jl_image_t *image,
arraylist_push(&cleanup_list, (void*)obj);
}
if (tag == 1)
*pfld = (uintptr_t)newobj | GC_OLD | GC_IN_IMAGE;
*pfld = (uintptr_t)newobj | GC_OLD_MARKED | GC_IN_IMAGE;
else
*pfld = (uintptr_t)newobj;
assert(!(image_base < (char*)newobj && (char*)newobj <= image_base + sizeof_sysimg));
Expand Down
Loading