From 2919ad1ee3bf475c8f3aae44c2aec694a9843c4d Mon Sep 17 00:00:00 2001 From: Ian Lance Taylor Date: Thu, 13 Sep 2018 16:44:43 +0000 Subject: [PATCH] libgo: build roots index to speed up bulkBarrierPreWrite To reduce the amount of time spent in write barrier processing (specifically runtime.bulkBarrierPreWrite), add support for building a 'GC roots index', basically a sorted list of all roots, so as to allow more efficient lookups of gcdata structures for globals. The previous implementation worked on the raw (unsorted) roots list itself, which did not scale well. Reviewed-on: https://go-review.googlesource.com/132595 From-SVN: r264276 --- gcc/go/gofrontend/MERGE | 2 +- gcc/go/gofrontend/gogo.cc | 3 +- libgo/go/runtime/cgocall.go | 24 +++++---- libgo/go/runtime/mbitmap.go | 26 +++++----- libgo/go/runtime/mgc_gccgo.go | 92 +++++++++++++++++++++++++++++++++++ libgo/go/runtime/proc.go | 1 + 6 files changed, 125 insertions(+), 23 deletions(-) diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE index 1afbcb48b34..ca47d87b427 100644 --- a/gcc/go/gofrontend/MERGE +++ b/gcc/go/gofrontend/MERGE @@ -1,4 +1,4 @@ -acf852f838e6b99f907d84648be853fa2c374393 +70bd9801911f8ed27df410d36a928166c37a68fd The first line of this file holds the git revision number of the last merge done from the gofrontend repository. diff --git a/gcc/go/gofrontend/gogo.cc b/gcc/go/gofrontend/gogo.cc index c16b40e56fd..dd6733fab8b 100644 --- a/gcc/go/gofrontend/gogo.cc +++ b/gcc/go/gofrontend/gogo.cc @@ -1535,7 +1535,8 @@ Gogo::write_globals() // Avoid putting runtime.gcRoots itself on the list. if (this->compiling_runtime() && this->package_name() == "runtime" - && Gogo::unpack_hidden_name(no->name()) == "gcRoots") + && (Gogo::unpack_hidden_name(no->name()) == "gcRoots" + || Gogo::unpack_hidden_name(no->name()) == "gcRootsIndex")) ; else var_gc.push_back(no); diff --git a/libgo/go/runtime/cgocall.go b/libgo/go/runtime/cgocall.go index d5794bc3341..67b2bce7eb6 100644 --- a/libgo/go/runtime/cgocall.go +++ b/libgo/go/runtime/cgocall.go @@ -243,17 +243,21 @@ func cgoCheckUnknownPointer(p unsafe.Pointer, msg string) (base, i uintptr) { return } - roots := gcRoots - for roots != nil { - for j := 0; j < roots.count; j++ { - pr := roots.roots[j] - addr := uintptr(pr.decl) - if cgoInRange(p, addr, addr+pr.size) { - cgoCheckBits(pr.decl, pr.gcdata, 0, pr.ptrdata) - return - } + lo := 0 + hi := len(gcRootsIndex) + for lo < hi { + m := lo + (hi-lo)/2 + pr := gcRootsIndex[m] + addr := uintptr(pr.decl) + if cgoInRange(p, addr, addr+pr.size) { + cgoCheckBits(pr.decl, pr.gcdata, 0, pr.ptrdata) + return + } + if uintptr(p) < addr { + hi = m + } else { + lo = m + 1 } - roots = roots.next } return diff --git a/libgo/go/runtime/mbitmap.go b/libgo/go/runtime/mbitmap.go index 191239ac547..c6c8e6a3da8 100644 --- a/libgo/go/runtime/mbitmap.go +++ b/libgo/go/runtime/mbitmap.go @@ -575,19 +575,23 @@ func bulkBarrierPreWrite(dst, src, size uintptr) { if !inheap(dst) { // If dst is a global, use the data or BSS bitmaps to // execute write barriers. - roots := gcRoots - for roots != nil { - for i := 0; i < roots.count; i++ { - pr := roots.roots[i] - addr := uintptr(pr.decl) - if addr <= dst && dst < addr+pr.size { - if dst < addr+pr.ptrdata { - bulkBarrierBitmap(dst, src, size, dst-addr, pr.gcdata) - } - return + lo := 0 + hi := len(gcRootsIndex) + for lo < hi { + m := lo + (hi-lo)/2 + pr := gcRootsIndex[m] + addr := uintptr(pr.decl) + if addr <= dst && dst < addr+pr.size { + if dst < addr+pr.ptrdata { + bulkBarrierBitmap(dst, src, size, dst-addr, pr.gcdata) } + return + } + if dst < addr { + hi = m + } else { + lo = m + 1 } - roots = roots.next } return } diff --git a/libgo/go/runtime/mgc_gccgo.go b/libgo/go/runtime/mgc_gccgo.go index 107a70a7898..cf7780c37f4 100644 --- a/libgo/go/runtime/mgc_gccgo.go +++ b/libgo/go/runtime/mgc_gccgo.go @@ -12,6 +12,7 @@ import ( ) // gcRoot is a single GC root: a variable plus a ptrmask. +//go:notinheap type gcRoot struct { decl unsafe.Pointer // Pointer to variable. size uintptr // Size of variable. @@ -32,6 +33,97 @@ type gcRootList struct { // The compiler keeps this variable itself off the list. var gcRoots *gcRootList +// Slice containing pointers to all reachable gcRoot's sorted by +// starting address (generated at init time from 'gcRoots'). +// The compiler also keeps this variable itself off the list. +// The storage backing this slice is allocated via persistentalloc(), the +// idea being that we don't want to treat the slice itself as a global +// variable, since it points to things that don't need to be scanned +// themselves. +var gcRootsIndex []*gcRoot + +// rootradixsort performs an in-place radix sort of the 'arr' rootptr slice. +// Note: not a stable sort, however we expect it to be called only on slices +// with no duplicate entries, so this should not matter. +func rootradixsort(arr []*gcRoot, lo, hi int, bit uint) { + // Partition the array into two bins based on the values at the + // specified bit position: 0's bin (grown from the left) and and + // 1's bin (grown from the right). We keep two boundary markers, + // the 0's boundary "zbb" (which grows to the right) and the 1's + // boundary "obb" (which grows to the left). At each step we + // examine the bit for the right-of-ZBB element: if it is zero, we + // leave it in place and move the ZBB to the right. If the bit is + // not zero, then we swap the ZBB and OBB elements and move the + // OBB to the left. When this is done, the two partitions are then + // sorted using the next lower bit. + + // 0's bin boundary, initially set to before the first element + zbb := lo - 1 + // 1's bin boundary, set to just beyond the last element + obb := hi + 1 + // mask to pick up bit of interest + bmask := uintptr(1) << bit + + for obb-zbb > 1 { + zbbval := uintptr(arr[zbb+1].decl) & bmask + if zbbval == 0 { + // Move zbb one to the right + zbb++ + } else { + // Move obb one to the left and swap + arr[obb-1], arr[zbb+1] = arr[zbb+1], arr[obb-1] + obb-- + } + } + + if bit != 0 { + // NB: in most cases there is just a single partition to visit + // so if we wanted to reduce stack space we could check for this + // and insert a goto back up to the top. + if zbb-lo > 0 { + rootradixsort(arr, lo, zbb, bit-1) + } + if hi-obb > 0 { + rootradixsort(arr, obb, hi, bit-1) + } + } +} + +//go:nowritebarrier +func createGcRootsIndex() { + // Count roots + nroots := 0 + gcr := gcRoots + for gcr != nil { + nroots += gcr.count + gcr = gcr.next + } + + // Construct the gcRootsIndex slice. Use non-heap storage for the array + // backing the slice. + sp := (*notInHeapSlice)(unsafe.Pointer(&gcRootsIndex)) + sp.array = (*notInHeap)(persistentalloc1(sys.PtrSize*uintptr(nroots), sys.PtrSize, &memstats.other_sys)) + if sp.array == nil { + throw("runtime: cannot allocate memory") + } + sp.len = nroots + sp.cap = nroots + + // Populate the roots index slice + gcr = gcRoots + k := 0 + for gcr != nil { + for i := 0; i < gcr.count; i++ { + gcRootsIndex[k] = &gcr.roots[i] + k++ + } + gcr = gcr.next + } + + // Sort it by starting address. + rootradixsort(gcRootsIndex, 0, nroots-1, sys.PtrSize*8-1) +} + // registerGCRoots is called by compiler-generated code. //go:linkname registerGCRoots runtime.registerGCRoots diff --git a/libgo/go/runtime/proc.go b/libgo/go/runtime/proc.go index 4c217cc1c0a..74325e3974b 100644 --- a/libgo/go/runtime/proc.go +++ b/libgo/go/runtime/proc.go @@ -207,6 +207,7 @@ func main() { fn := main_init // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime fn() + createGcRootsIndex() close(main_init_done) needUnlock = false -- 2.30.2