// Copyright 2009 The Go Authors. All rights reserved.// Use of this source code is governed by a BSD-style// license that can be found in the LICENSE file.// Garbage collector: type and heap bitmaps.//// Stack, data, and bss bitmaps//// Stack frames and global variables in the data and bss sections are// described by bitmaps with 1 bit per pointer-sized word. A "1" bit// means the word is a live pointer to be visited by the GC (referred to// as "pointer"). A "0" bit means the word should be ignored by GC// (referred to as "scalar", though it could be a dead pointer value).//// Heap bitmap//// The heap bitmap comprises 2 bits for each pointer-sized word in the heap,// stored in the heapArena metadata backing each heap arena.// That is, if ha is the heapArena for the arena starting a start,// then ha.bitmap[0] holds the 2-bit entries for the four words start// through start+3*ptrSize, ha.bitmap[1] holds the entries for// start+4*ptrSize through start+7*ptrSize, and so on.//// In each 2-bit entry, the lower bit is a pointer/scalar bit, just// like in the stack/data bitmaps described above. The upper bit// indicates scan/dead: a "1" value ("scan") indicates that there may// be pointers in later words of the allocation, and a "0" value// ("dead") indicates there are no more pointers in the allocation. If// the upper bit is 0, the lower bit must also be 0, and this// indicates scanning can ignore the rest of the allocation.//// The 2-bit entries are split when written into the byte, so that the top half// of the byte contains 4 high (scan) bits and the bottom half contains 4 low// (pointer) bits. This form allows a copy from the 1-bit to the 4-bit form to// keep the pointer bits contiguous, instead of having to space them out.//// The code makes use of the fact that the zero value for a heap// bitmap means scalar/dead. This property must be preserved when// modifying the encoding.//// The bitmap for noscan spans is not maintained. Code must ensure// that an object is scannable before consulting its bitmap by// checking either the noscan bit in the span or by consulting its// type's information.package runtimeimport ()const (bitPointer = 1 << 0bitScan = 1 << 4heapBitsShift = 1// shift offset between successive bitPointer or bitScan entrieswordsPerBitmapByte = 8 / 2// heap words described by one bitmap byte// all scan/pointer bits in a bytebitScanAll = bitScan | bitScan<<heapBitsShift | bitScan<<(2*heapBitsShift) | bitScan<<(3*heapBitsShift)bitPointerAll = bitPointer | bitPointer<<heapBitsShift | bitPointer<<(2*heapBitsShift) | bitPointer<<(3*heapBitsShift))// addb returns the byte pointer p+n.//go:nowritebarrier//go:nosplitfunc ( *byte, uintptr) *byte {// Note: wrote out full expression instead of calling add(p, n) // to reduce the number of temporaries generated by the // compiler for this trivial expression during inlining.return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) + ))}// subtractb returns the byte pointer p-n.//go:nowritebarrier//go:nosplitfunc ( *byte, uintptr) *byte {// Note: wrote out full expression instead of calling add(p, -n) // to reduce the number of temporaries generated by the // compiler for this trivial expression during inlining.return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) - ))}// add1 returns the byte pointer p+1.//go:nowritebarrier//go:nosplitfunc ( *byte) *byte {// Note: wrote out full expression instead of calling addb(p, 1) // to reduce the number of temporaries generated by the // compiler for this trivial expression during inlining.return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) + 1))}// subtract1 returns the byte pointer p-1.//go:nowritebarrier//// nosplit because it is used during write barriers and must not be preempted.//go:nosplitfunc ( *byte) *byte {// Note: wrote out full expression instead of calling subtractb(p, 1) // to reduce the number of temporaries generated by the // compiler for this trivial expression during inlining.return (*byte)(unsafe.Pointer(uintptr(unsafe.Pointer()) - 1))}// heapBits provides access to the bitmap bits for a single heap word.// The methods on heapBits take value receivers so that the compiler// can more easily inline calls to those methods and registerize the// struct fields independently.typeheapBitsstruct { bitp *uint8 shift uint32 arena uint32// Index of heap arena containing bitp last *uint8// Last byte arena's bitmap}// Make the compiler check that heapBits.arena is large enough to hold// the maximum arena frame number.var _ = heapBits{arena: (1<<heapAddrBits)/heapArenaBytes - 1}// markBits provides access to the mark bit for an object in the heap.// bytep points to the byte holding the mark bit.// mask is a byte with a single bit set that can be &ed with *bytep// to see if the bit has been set.// *m.byte&m.mask != 0 indicates the mark bit is set.// index can be used along with span information to generate// the address of the object in the heap.// We maintain one set of mark bits for allocation and one for// marking purposes.typemarkBitsstruct { bytep *uint8 mask uint8 index uintptr}//go:nosplitfunc ( *mspan) ( uintptr) markBits { , := .allocBits.bitp()returnmarkBits{, , }}// refillAllocCache takes 8 bytes s.allocBits starting at whichByte// and negates them so that ctz (count trailing zeros) instructions// can be used. It then places these 8 bytes into the cached 64 bit// s.allocCache.func ( *mspan) ( uintptr) { := (*[8]uint8)(unsafe.Pointer(.allocBits.bytep())) := uint64(0) |= uint64([0]) |= uint64([1]) << (1 * 8) |= uint64([2]) << (2 * 8) |= uint64([3]) << (3 * 8) |= uint64([4]) << (4 * 8) |= uint64([5]) << (5 * 8) |= uint64([6]) << (6 * 8) |= uint64([7]) << (7 * 8) .allocCache = ^}// nextFreeIndex returns the index of the next free object in s at// or after s.freeindex.// There are hardware instructions that can be used to make this// faster if profiling warrants it.func ( *mspan) () uintptr { := .freeindex := .nelemsif == {return }if > {throw("s.freeindex > s.nelems") } := .allocCache := sys.Ctz64()for == 64 {// Move index to start of next cached bits. = ( + 64) &^ (64 - 1)if >= { .freeindex = return } := / 8// Refill s.allocCache with the next 64 alloc bits. .refillAllocCache() = .allocCache = sys.Ctz64()// nothing available in cached bits // grab the next 8 bytes and try again. } := + uintptr()if >= { .freeindex = return } .allocCache >>= uint( + 1) = + 1if %64 == 0 && != {// We just incremented s.freeindex so it isn't 0. // As each 1 in s.allocCache was encountered and used for allocation // it was shifted away. At this point s.allocCache contains all 0s. // Refill s.allocCache so that it corresponds // to the bits at s.allocBits starting at s.freeindex. := / 8 .refillAllocCache() } .freeindex = return}// isFree reports whether the index'th object in s is unallocated.//// The caller must ensure s.state is mSpanInUse, and there must have// been no preemption points since ensuring this (which could allow a// GC transition, which would allow the state to change).func ( *mspan) ( uintptr) bool {if < .freeindex {returnfalse } , := .allocBits.bitp()return *& == 0}func ( *mspan) ( uintptr) uintptr { := - .base()if == 0 {return0 }if .baseMask != 0 {// s.baseMask is non-0, elemsize is a power of two, so shift by s.divShiftreturn >> .divShift }returnuintptr(((uint64() >> .divShift) * uint64(.divMul)) >> .divShift2)}func ( uintptr) markBits { := spanOf() := .objIndex()return .markBitsForIndex()}func ( *mspan) ( uintptr) markBits { , := .gcmarkBits.bitp()returnmarkBits{, , }}func ( *mspan) () markBits {returnmarkBits{(*uint8)(.gcmarkBits), uint8(1), 0}}// isMarked reports whether mark bit m is set.func ( markBits) () bool {return *.bytep&.mask != 0}// setMarked sets the marked bit in the markbits, atomically.func ( markBits) () {// Might be racing with other updates, so use atomic update always. // We used to be clever here and use a non-atomic update in certain // cases, but it's not worth the risk.atomic.Or8(.bytep, .mask)}// setMarkedNonAtomic sets the marked bit in the markbits, non-atomically.func ( markBits) () { *.bytep |= .mask}// clearMarked clears the marked bit in the markbits, atomically.func ( markBits) () {// Might be racing with other updates, so use atomic update always. // We used to be clever here and use a non-atomic update in certain // cases, but it's not worth the risk.atomic.And8(.bytep, ^.mask)}// markBitsForSpan returns the markBits for the span base address base.func ( uintptr) ( markBits) { = markBitsForAddr()if .mask != 1 {throw("markBitsForSpan: unaligned start") }return}// advance advances the markBits to the next object in the span.func ( *markBits) () {if .mask == 1<<7 { .bytep = (*uint8)(unsafe.Pointer(uintptr(unsafe.Pointer(.bytep)) + 1)) .mask = 1 } else { .mask = .mask << 1 } .index++}// heapBitsForAddr returns the heapBits for the address addr.// The caller must ensure addr is in an allocated span.// In particular, be careful not to point past the end of an object.//// nosplit because it is used during write barriers and must not be preempted.//go:nosplitfunc ( uintptr) ( heapBits) {// 2 bits per word, 4 pairs per byte, and a mask is hard coded. := arenaIndex() := mheap_.arenas[.l1()][.l2()]// The compiler uses a load for nil checking ha, but in this // case we'll almost never hit that cache line again, so it // makes more sense to do a value check.if == nil {// addr is not in the heap. Return nil heapBits, which // we expect to crash in the caller.return } .bitp = &.bitmap[(/(sys.PtrSize*4))%heapArenaBitmapBytes] .shift = uint32(( / sys.PtrSize) & 3) .arena = uint32() .last = &.bitmap[len(.bitmap)-1]return}// badPointer throws bad pointer in heap panic.func ( *mspan, , , uintptr) {// Typically this indicates an incorrect use // of unsafe or cgo to store a bad pointer in // the Go heap. It may also indicate a runtime // bug. // // TODO(austin): We could be more aggressive // and detect pointers to unallocated objects // in allocated spans.printlock()print("runtime: pointer ", hex()) := .state.get()if != mSpanInUse {print(" to unallocated span") } else {print(" to unused region of span") }print(" span.base()=", hex(.base()), " span.limit=", hex(.limit), " span.state=", , "\n")if != 0 {print("runtime: found in object at *(", hex(), "+", hex(), ")\n")gcDumpObject("object", , ) }getg().m.traceback = 2throw("found bad pointer in Go heap (incorrect use of unsafe or cgo?)")}// findObject returns the base address for the heap object containing// the address p, the object's span, and the index of the object in s.// If p does not point into a heap object, it returns base == 0.//// If p points is an invalid heap pointer and debug.invalidptr != 0,// findObject panics.//// refBase and refOff optionally give the base address of the object// in which the pointer p was found and the byte offset at which it// was found. These are used for error reporting.//// It is nosplit so it is safe for p to be a pointer to the current goroutine's stack.// Since p is a uintptr, it would not be adjusted if the stack were to move.//go:nosplitfunc (, , uintptr) ( uintptr, *mspan, uintptr) { = spanOf()// If s is nil, the virtual address has never been part of the heap. // This pointer may be to some mmap'd region, so we allow it.if == nil {return }// If p is a bad pointer, it may not be in s's bounds. // // Check s.state to synchronize with span initialization // before checking other fields. See also spanOfHeap.if := .state.get(); != mSpanInUse || < .base() || >= .limit {// Pointers into stacks are also ok, the runtime manages these explicitly.if == mSpanManual {return }// The following ensures that we are rigorous about what data // structures hold valid pointers.ifdebug.invalidptr != 0 {badPointer(, , , ) }return }// If this span holds object of a power of 2 size, just mask off the bits to // the interior of the object. Otherwise use the size to get the base.if .baseMask != 0 {// optimize for power of 2 sized objects. = .base() = + (-)&uintptr(.baseMask) = ( - .base()) >> .divShift// base = p & s.baseMask is faster for small spans, // but doesn't work for large spans. // Overall, it's faster to use the more general computation above. } else { = .base()if - >= .elemsize {// n := (p - base) / s.elemsize, using division by multiplication = uintptr(-) >> .divShift * uintptr(.divMul) >> .divShift2 += * .elemsize } }return}// next returns the heapBits describing the next pointer-sized word in memory.// That is, if h describes address p, h.next() describes p+ptrSize.// Note that next does not modify h. The caller must record the result.//// nosplit because it is used during write barriers and must not be preempted.//go:nosplitfunc ( heapBits) () heapBits {if .shift < 3*heapBitsShift { .shift += heapBitsShift } elseif .bitp != .last { .bitp, .shift = add1(.bitp), 0 } else {// Move to the next arena.return .nextArena() }return}// nextArena advances h to the beginning of the next heap arena.//// This is a slow-path helper to next. gc's inliner knows that// heapBits.next can be inlined even though it calls this. This is// marked noinline so it doesn't get inlined into next and cause next// to be too big to inline.////go:nosplit//go:noinlinefunc ( heapBits) () heapBits { .arena++ := arenaIdx(.arena) := mheap_.arenas[.l1()]if == nil {// We just passed the end of the object, which // was also the end of the heap. Poison h. It // should never be dereferenced at this point.returnheapBits{} } := [.l2()]if == nil {returnheapBits{} } .bitp, .shift = &.bitmap[0], 0 .last = &.bitmap[len(.bitmap)-1]return}// forward returns the heapBits describing n pointer-sized words ahead of h in memory.// That is, if h describes address p, h.forward(n) describes p+n*ptrSize.// h.forward(1) is equivalent to h.next(), just slower.// Note that forward does not modify h. The caller must record the result.// bits returns the heap bits for the current word.//go:nosplitfunc ( heapBits) ( uintptr) heapBits { += uintptr(.shift) / heapBitsShift := uintptr(unsafe.Pointer(.bitp)) + /4 .shift = uint32(%4) * heapBitsShiftif <= uintptr(unsafe.Pointer(.last)) { .bitp = (*uint8)(unsafe.Pointer())return }// We're in a new heap arena. := - (uintptr(unsafe.Pointer(.last)) + 1) .arena += 1 + uint32(/heapArenaBitmapBytes) := arenaIdx(.arena)if := mheap_.arenas[.l1()]; != nil && [.l2()] != nil { := [.l2()] .bitp = &.bitmap[%heapArenaBitmapBytes] .last = &.bitmap[len(.bitmap)-1] } else { .bitp, .last = nil, nil }return}// forwardOrBoundary is like forward, but stops at boundaries between// contiguous sections of the bitmap. It returns the number of words// advanced over, which will be <= n.func ( heapBits) ( uintptr) (heapBits, uintptr) { := 4 * ((uintptr(unsafe.Pointer(.last)) + 1) - uintptr(unsafe.Pointer(.bitp)))if > { = }return .forward(), }// The caller can test morePointers and isPointer by &-ing with bitScan and bitPointer.// The result includes in its higher bits the bits for subsequent words// described by the same bitmap byte.//// nosplit because it is used during write barriers and must not be preempted.//go:nosplitfunc ( heapBits) () uint32 {// The (shift & 31) eliminates a test and conditional branch // from the generated code.returnuint32(*.bitp) >> (.shift & 31)}// morePointers reports whether this word and all remaining words in this object// are scalars.// h must not describe the second word of the object.func ( heapBits) () bool {return .bits()&bitScan != 0}// isPointer reports whether the heap bits describe a pointer word.//// nosplit because it is used during write barriers and must not be preempted.//go:nosplitfunc ( heapBits) () bool {return .bits()&bitPointer != 0}// bulkBarrierPreWrite executes a write barrier// for every pointer slot in the memory range [src, src+size),// using pointer/scalar information from [dst, dst+size).// This executes the write barriers necessary before a memmove.// src, dst, and size must be pointer-aligned.// The range [dst, dst+size) must lie within a single object.// It does not perform the actual writes.//// As a special case, src == 0 indicates that this is being used for a// memclr. bulkBarrierPreWrite will pass 0 for the src of each write// barrier.//// Callers should call bulkBarrierPreWrite immediately before// calling memmove(dst, src, size). This function is marked nosplit// to avoid being preempted; the GC must not stop the goroutine// between the memmove and the execution of the barriers.// The caller is also responsible for cgo pointer checks if this// may be writing Go pointers into non-Go memory.//// The pointer bitmap is not maintained for allocations containing// no pointers at all; any caller of bulkBarrierPreWrite must first// make sure the underlying allocation contains pointers, usually// by checking typ.ptrdata.//// Callers must perform cgo checks if writeBarrier.cgo.////go:nosplitfunc (, , uintptr) {if (||)&(sys.PtrSize-1) != 0 {throw("bulkBarrierPreWrite: unaligned arguments") }if !writeBarrier.needed {return }if := spanOf(); == nil {// If dst is a global, use the data or BSS bitmaps to // execute write barriers.for , := rangeactiveModules() {if .data <= && < .edata {bulkBarrierBitmap(, , , -.data, .gcdatamask.bytedata)return } }for , := rangeactiveModules() {if .bss <= && < .ebss {bulkBarrierBitmap(, , , -.bss, .gcbssmask.bytedata)return } }return } elseif .state.get() != mSpanInUse || < .base() || .limit <= {// dst was heap memory at some point, but isn't now. // It can't be a global. It must be either our stack, // or in the case of direct channel sends, it could be // another stack. Either way, no need for barriers. // This will also catch if dst is in a freed span, // though that should never have.return } := &getg().m.p.ptr().wbBuf := heapBitsForAddr()if == 0 {for := uintptr(0); < ; += sys.PtrSize {if .isPointer() { := (*uintptr)(unsafe.Pointer( + ))if !.putFast(*, 0) {wbBufFlush(nil, 0) } } = .next() } } else {for := uintptr(0); < ; += sys.PtrSize {if .isPointer() { := (*uintptr)(unsafe.Pointer( + )) := (*uintptr)(unsafe.Pointer( + ))if !.putFast(*, *) {wbBufFlush(nil, 0) } } = .next() } }}// bulkBarrierPreWriteSrcOnly is like bulkBarrierPreWrite but// does not execute write barriers for [dst, dst+size).//// In addition to the requirements of bulkBarrierPreWrite// callers need to ensure [dst, dst+size) is zeroed.//// This is used for special cases where e.g. dst was just// created and zeroed with malloc.//go:nosplitfunc (, , uintptr) {if (||)&(sys.PtrSize-1) != 0 {throw("bulkBarrierPreWrite: unaligned arguments") }if !writeBarrier.needed {return } := &getg().m.p.ptr().wbBuf := heapBitsForAddr()for := uintptr(0); < ; += sys.PtrSize {if .isPointer() { := (*uintptr)(unsafe.Pointer( + ))if !.putFast(0, *) {wbBufFlush(nil, 0) } } = .next() }}// bulkBarrierBitmap executes write barriers for copying from [src,// src+size) to [dst, dst+size) using a 1-bit pointer bitmap. src is// assumed to start maskOffset bytes into the data covered by the// bitmap in bits (which may not be a multiple of 8).//// This is used by bulkBarrierPreWrite for writes to data and BSS.////go:nosplitfunc (, , , uintptr, *uint8) { := / sys.PtrSize = addb(, /8) := uint8(1) << ( % 8) := &getg().m.p.ptr().wbBuffor := uintptr(0); < ; += sys.PtrSize {if == 0 { = addb(, 1)if * == 0 {// Skip 8 words. += 7 * sys.PtrSizecontinue } = 1 }if *& != 0 { := (*uintptr)(unsafe.Pointer( + ))if == 0 {if !.putFast(*, 0) {wbBufFlush(nil, 0) } } else { := (*uintptr)(unsafe.Pointer( + ))if !.putFast(*, *) {wbBufFlush(nil, 0) } } } <<= 1 }}// typeBitsBulkBarrier executes a write barrier for every// pointer that would be copied from [src, src+size) to [dst,// dst+size) by a memmove using the type bitmap to locate those// pointer slots.//// The type typ must correspond exactly to [src, src+size) and [dst, dst+size).// dst, src, and size must be pointer-aligned.// The type typ must have a plain bitmap, not a GC program.// The only use of this function is in channel sends, and the// 64 kB channel element limit takes care of this for us.//// Must not be preempted because it typically runs right before memmove,// and the GC must observe them as an atomic action.//// Callers must perform cgo checks if writeBarrier.cgo.////go:nosplitfunc ( *_type, , , uintptr) {if == nil {throw("runtime: typeBitsBulkBarrier without type") }if .size != {println("runtime: typeBitsBulkBarrier with type ", .string(), " of size ", .size, " but memory size", )throw("runtime: invalid typeBitsBulkBarrier") }if .kind&kindGCProg != 0 {println("runtime: typeBitsBulkBarrier with type ", .string(), " with GC prog")throw("runtime: invalid typeBitsBulkBarrier") }if !writeBarrier.needed {return } := .gcdata := &getg().m.p.ptr().wbBufvaruint32for := uintptr(0); < .ptrdata; += sys.PtrSize {if &(sys.PtrSize*8-1) == 0 { = uint32(*) = addb(, 1) } else { = >> 1 }if &1 != 0 { := (*uintptr)(unsafe.Pointer( + )) := (*uintptr)(unsafe.Pointer( + ))if !.putFast(*, *) {wbBufFlush(nil, 0) } } }}// The methods operating on spans all require that h has been returned// by heapBitsForSpan and that size, n, total are the span layout description// returned by the mspan's layout method.// If total > size*n, it means that there is extra leftover memory in the span,// usually due to rounding.//// TODO(rsc): Perhaps introduce a different heapBitsSpan type.// initSpan initializes the heap bitmap for a span.// If this is a span of pointer-sized objects, it initializes all// words to pointer/scan.// Otherwise, it initializes all words to scalar/dead.func ( heapBits) ( *mspan) {// Clear bits corresponding to objects. := (.npages << _PageShift) / sys.PtrSizeif %wordsPerBitmapByte != 0 {throw("initSpan: unaligned length") }if .shift != 0 {throw("initSpan: unaligned base") } := sys.PtrSize == 8 && .elemsize == sys.PtrSizefor > 0 { , := .forwardOrBoundary() := / wordsPerBitmapByteif { := .bitpfor := uintptr(0); < ; ++ { * = bitPointerAll | bitScanAll = add1() } } else {memclrNoHeapPointers(unsafe.Pointer(.bitp), ) } = -= }}// countAlloc returns the number of objects allocated in span s by// scanning the allocation bitmap.func ( *mspan) () int { := 0 := divRoundUp(.nelems, 8)// Iterate over each 8-byte chunk and count allocations // with an intrinsic. Note that newMarkBits guarantees that // gcmarkBits will be 8-byte aligned, so we don't have to // worry about edge cases, irrelevant bits will simply be zero.for := uintptr(0); < ; += 8 {// Extract 64 bits from the byte pointer and get a OnesCount. // Note that the unsafe cast here doesn't preserve endianness, // but that's OK. We only care about how many bits are 1, not // about the order we discover them in. := *(*uint64)(unsafe.Pointer(.gcmarkBits.bytep())) += sys.OnesCount64() }return}// heapBitsSetType records that the new allocation [x, x+size)// holds in [x, x+dataSize) one or more values of type typ.// (The number of values is given by dataSize / typ.size.)// If dataSize < size, the fragment [x+dataSize, x+size) is// recorded as non-pointer data.// It is known that the type has pointers somewhere;// malloc does not call heapBitsSetType when there are no pointers,// because all free objects are marked as noscan during// heapBitsSweepSpan.//// There can only be one allocation from a given span active at a time,// and the bitmap for a span always falls on byte boundaries,// so there are no write-write races for access to the heap bitmap.// Hence, heapBitsSetType can access the bitmap without atomics.//// There can be read-write races between heapBitsSetType and things// that read the heap bitmap like scanobject. However, since// heapBitsSetType is only used for objects that have not yet been// made reachable, readers will ignore bits being modified by this// function. This does mean this function cannot transiently modify// bits that belong to neighboring objects. Also, on weakly-ordered// machines, callers must execute a store/store (publication) barrier// between calling this function and making the object reachable.func (, , uintptr, *_type) {const = false// slow but helpful; enable to test modifications to this codeconst ( = bitPointer | bitScan// 00010001 = bitPointer | bitScan | <<heapBitsShift// 00110011 = bitPointer | bitScan | <<heapBitsShift// 01110111 )// dataSize is always size rounded up to the next malloc size class, // except in the case of allocating a defer block, in which case // size is sizeof(_defer{}) (at least 6 words) and dataSize may be // arbitrarily larger. // // The checks for size == sys.PtrSize and size == 2*sys.PtrSize can therefore // assume that dataSize == size without checking it explicitly.ifsys.PtrSize == 8 && == sys.PtrSize {// It's one word and it has pointers, it must be a pointer. // Since all allocated one-word objects are pointers // (non-pointers are aggregated into tinySize allocations), // initSpan sets the pointer bits for us. Nothing to do here.if { := heapBitsForAddr()if !.isPointer() {throw("heapBitsSetType: pointer bit missing") }if !.morePointers() {throw("heapBitsSetType: scan bit missing") } }return } := heapBitsForAddr() := .gcdata// start of 1-bit pointer mask (or GC program, handled below)// 2-word objects only have 4 bitmap bits and 3-word objects only have 6 bitmap bits. // Therefore, these objects share a heap bitmap byte with the objects next to them. // These are called out as a special case primarily so the code below can assume all // objects are at least 4 words long and that their bitmaps start either at the beginning // of a bitmap byte, or half-way in (h.shift of 0 and 2 respectively).if == 2*sys.PtrSize {if .size == sys.PtrSize {// We're allocating a block big enough to hold two pointers. // On 64-bit, that means the actual object must be two pointers, // or else we'd have used the one-pointer-sized block. // On 32-bit, however, this is the 8-byte block, the smallest one. // So it could be that we're allocating one pointer and this was // just the smallest block available. Distinguish by checking dataSize. // (In general the number of instances of typ being allocated is // dataSize/typ.size.)ifsys.PtrSize == 4 && == sys.PtrSize {// 1 pointer object. On 32-bit machines clear the bit for the // unused second word. *.bitp &^= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << .shift *.bitp |= (bitPointer | bitScan) << .shift } else {// 2-element array of pointer. *.bitp |= (bitPointer | bitScan | (bitPointer|bitScan)<<heapBitsShift) << .shift }return }// Otherwise typ.size must be 2*sys.PtrSize, // and typ.kind&kindGCProg == 0.if {if .size != 2*sys.PtrSize || .kind&kindGCProg != 0 {print("runtime: heapBitsSetType size=", , " but typ.size=", .size, " gcprog=", .kind&kindGCProg != 0, "\n")throw("heapBitsSetType") } } := uint32(*) := & 3 |= bitScanAll & ((bitScan << (.ptrdata / sys.PtrSize)) - 1)// Clear the bits for this object so we can set the // appropriate ones. *.bitp &^= (bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << .shift *.bitp |= uint8( << .shift)return } elseif == 3*sys.PtrSize { := uint8(*)if {if == 0 {println("runtime: invalid type ", .string())throw("heapBitsSetType: called with non-pointer type") }ifsys.PtrSize != 8 {throw("heapBitsSetType: unexpected 3 pointer wide size class on 32 bit") }if .kind&kindGCProg != 0 {throw("heapBitsSetType: unexpected GC prog for 3 pointer wide size class") }if .size == 2*sys.PtrSize {print("runtime: heapBitsSetType size=", , " but typ.size=", .size, "\n")throw("heapBitsSetType: inconsistent object sizes") } }if .size == sys.PtrSize {// The type contains a pointer otherwise heapBitsSetType wouldn't have been called. // Since the type is only 1 pointer wide and contains a pointer, its gcdata must be exactly 1.if && *.gcdata != 1 {print("runtime: heapBitsSetType size=", , " typ.size=", .size, "but *typ.gcdata", *.gcdata, "\n")throw("heapBitsSetType: unexpected gcdata for 1 pointer wide type size in 3 pointer wide size class") }// 3 element array of pointers. Unrolling ptrmask 3 times into p yields 00000111. = 7 } := & 7// Set bitScan bits for all pointers. |= << wordsPerBitmapByte// First bitScan bit is always set since the type contains pointers. |= bitScan// Second bitScan bit needs to also be set if the third bitScan bit is set. |= & (bitScan << (2 * heapBitsShift)) >> 1// For h.shift > 1 heap bits cross a byte boundary and need to be written part // to h.bitp and part to the next h.bitp.switch .shift {case0: *.bitp &^= << 0 *.bitp |= << 0case1: *.bitp &^= << 1 *.bitp |= << 1case2: *.bitp &^= << 2 *.bitp |= ( & ) << 2// Two words written to the first byte. // Advance two words to get to the next byte. = .next().next() *.bitp &^= *.bitp |= ( >> 2) & case3: *.bitp &^= << 3 *.bitp |= ( & ) << 3// One word written to the first byte. // Advance one word to get to the next byte. = .next() *.bitp &^= *.bitp |= ( >> 1) & }return }// Copy from 1-bit ptrmask into 2-bit bitmap. // The basic approach is to use a single uintptr as a bit buffer, // alternating between reloading the buffer and writing bitmap bytes. // In general, one load can supply two bitmap byte writes. // This is a lot of lines of code, but it compiles into relatively few // machine instructions. := falseifarenaIndex(+-1) != arenaIdx(.arena) || ( && fastrand()%2 == 0) {// This object spans heap arenas, so the bitmap may be // discontiguous. Unroll it into the object instead // and then copy it out. // // In doubleCheck mode, we randomly do this anyway to // stress test the bitmap copying path. = true .bitp = (*uint8)(unsafe.Pointer()) .last = nil }var (// Ptrmask input. *byte// last ptrmask byte readuintptr// ptrmask bits already loadeduintptr// number of bits in b at next read *byte// final ptrmask byte to read (then repeat)uintptr// number of valid bits in *endpuintptr// alternate source of bits// Heap bitmap output.uintptr// words processeduintptr// number of words to process *byte// next heap bitmap byte to writeuintptr// bits being prepared for *hbitp ) = .bitp// Handle GC program. Delayed until this part of the code // so that we can use the same double-checking mechanism // as the 1-bit case. Nothing above could have encountered // GC programs: the cases were all too small.if .kind&kindGCProg != 0 {heapBitsSetTypeGCProg(, .ptrdata, .size, , , addb(.gcdata, 4))if {// Double-check the heap bits written by GC program // by running the GC program to create a 1-bit pointer mask // and then jumping to the double-check code below. // This doesn't catch bugs shared between the 1-bit and 4-bit // GC program execution, but it does catch mistakes specific // to just one of those and bugs in heapBitsSetTypeGCProg's // implementation of arrays.lock(&debugPtrmask.lock)ifdebugPtrmask.data == nil {debugPtrmask.data = (*byte)(persistentalloc(1<<20, 1, &memstats.other_sys)) } = debugPtrmask.datarunGCProg(addb(.gcdata, 4), nil, , 1) }goto }// Note about sizes: // // typ.size is the number of words in the object, // and typ.ptrdata is the number of words in the prefix // of the object that contains pointers. That is, the final // typ.size - typ.ptrdata words contain no pointers. // This allows optimization of a common pattern where // an object has a small header followed by a large scalar // buffer. If we know the pointers are over, we don't have // to scan the buffer's heap bitmap at all. // The 1-bit ptrmasks are sized to contain only bits for // the typ.ptrdata prefix, zero padded out to a full byte // of bitmap. This code sets nw (below) so that heap bitmap // bits are only written for the typ.ptrdata prefix; if there is // more room in the allocated object, the next heap bitmap // entry is a 00, indicating that there are no more pointers // to scan. So only the ptrmask for the ptrdata bytes is needed. // // Replicated copies are not as nice: if there is an array of // objects with scalar tails, all but the last tail does have to // be initialized, because there is no way to say "skip forward". // However, because of the possibility of a repeated type with // size not a multiple of 4 pointers (one heap bitmap byte), // the code already must handle the last ptrmask byte specially // by treating it as containing only the bits for endnb pointers, // where endnb <= 4. We represent large scalar tails that must // be expanded in the replication by setting endnb larger than 4. // This will have the effect of reading many bits out of b, // but once the real bits are shifted out, b will supply as many // zero bits as we try to read, which is exactly what we need. = if .size < {// Filling in bits for an array of typ. // Set up for repetition of ptrmask during main loop. // Note that ptrmask describes only a prefix ofconst = sys.PtrSize*8 - 7if .ptrdata/sys.PtrSize <= {// Entire ptrmask fits in uintptr with room for a byte fragment. // Load into pbits and never read from ptrmask again. // This is especially important when the ptrmask has // fewer than 8 bits in it; otherwise the reload in the middle // of the Phase 2 loop would itself need to loop to gather // at least 8 bits.// Accumulate ptrmask into b. // ptrmask is sized to describe only typ.ptrdata, but we record // it as describing typ.size bytes, since all the high bits are zero. = .ptrdata / sys.PtrSizefor := uintptr(0); < ; += 8 { |= uintptr(*) << = add1() } = .size / sys.PtrSize// Replicate ptrmask to fill entire pbits uintptr. // Doubling and truncating is fewer steps than // iterating by nb each time. (nb could be 1.) // Since we loaded typ.ptrdata/sys.PtrSize bits // but are pretending to have typ.size/sys.PtrSize, // there might be no replication necessary/possible. = = if + <= {for <= sys.PtrSize*8 { |= << += }// Truncate to a multiple of original ptrmask. // Because nb+nb <= maxBits, nb fits in a byte. // Byte division is cheaper than uintptr division. = uintptr(/byte()) * &= 1<< - 1 = = }// Clear p and endp as sentinel for using pbits. // Checked during Phase 2 loop. = nil = nil } else {// Ptrmask is larger. Read it multiple times. := (.ptrdata/sys.PtrSize+7)/8 - 1 = addb(, ) = .size/sys.PtrSize - *8 } }if != nil { = uintptr(*) = add1() = 8 }if .size == {// Single entry: can stop once we reach the non-pointer data. = .ptrdata / sys.PtrSize } else {// Repeated instances of typ in an array. // Have to process first N-1 entries in full, but can stop // once we reach the non-pointer data in the final entry. = ((/.size-1)*.size + .ptrdata) / sys.PtrSize }if == 0 {// No pointers! Caller was supposed to check.println("runtime: invalid type ", .string())throw("heapBitsSetType: called with non-pointer type")return }// Phase 1: Special case for leading byte (shift==0) or half-byte (shift==2). // The leading byte is special because it contains the bits for word 1, // which does not have the scan bit set. // The leading half-byte is special because it's a half a byte, // so we have to be careful with the bits already there.switch {default:throw("heapBitsSetType: unexpected shift")case .shift == 0:// Ptrmask and heap bitmap are aligned. // // This is a fast path for small objects. // // The first byte we write out covers the first four // words of the object. The scan/dead bit on the first // word must be set to scan since there are pointers // somewhere in the object. // In all following words, we set the scan/dead // appropriately to indicate that the object continues // to the next 2-bit entry in the bitmap. // // We set four bits at a time here, but if the object // is fewer than four words, phase 3 will clear // unnecessary bits. = & bitPointerAll |= bitScanAllif += 4; >= {goto } * = uint8() = add1() >>= 4 -= 4case .shift == 2:// Ptrmask and heap bitmap are misaligned. // // On 32 bit architectures only the 6-word object that corresponds // to a 24 bytes size class can start with h.shift of 2 here since // all other non 16 byte aligned size classes have been handled by // special code paths at the beginning of heapBitsSetType on 32 bit. // // Many size classes are only 16 byte aligned. On 64 bit architectures // this results in a heap bitmap position starting with a h.shift of 2. // // The bits for the first two words are in a byte shared // with another object, so we must be careful with the bits // already there. // // We took care of 1-word, 2-word, and 3-word objects above, // so this is at least a 6-word object. = ( & (bitPointer | bitPointer<<heapBitsShift)) << (2 * heapBitsShift) |= bitScan << (2 * heapBitsShift)if > 1 { |= bitScan << (3 * heapBitsShift) } >>= 2 -= 2 * &^= uint8((bitPointer | bitScan | ((bitPointer | bitScan) << heapBitsShift)) << (2 * heapBitsShift)) * |= uint8() = add1()if += 2; >= {// We know that there is more data, because we handled 2-word and 3-word objects above. // This must be at least a 6-word object. If we're out of pointer words, // mark no scan in next bitmap byte and finish. = 0 += 4goto } }// Phase 2: Full bytes in bitmap, up to but not including write to last byte (full or partial) in bitmap. // The loop computes the bits for that last write but does not execute the write; // it leaves the bits in hb for processing by phase 3. // To avoid repeated adjustment of nb, we subtract out the 4 bits we're going to // use in the first half of the loop right now, and then we only adjust nb explicitly // if the 8 bits used by each iteration isn't balanced by 8 bits loaded mid-loop. -= 4for {// Emit bitmap byte. // b has at least nb+4 bits, with one exception: // if w+4 >= nw, then b has only nw-w bits, // but we'll stop at the break and then truncate // appropriately in Phase 3. = & bitPointerAll |= bitScanAllif += 4; >= {break } * = uint8() = add1() >>= 4// Load more bits. b has nb right now.if != {// Fast path: keep reading from ptrmask. // nb unmodified: we just loaded 8 bits, // and the next iteration will consume 8 bits, // leaving us with the same nb the next time we're here.if < 8 { |= uintptr(*) << = add1() } else {// Reduce the number of bits in b. // This is important if we skipped // over a scalar tail, since nb could // be larger than the bit width of b. -= 8 } } elseif == nil {// Almost as fast path: track bit count and refill from pbits. // For short repetitions.if < 8 { |= << += } -= 8// for next iteration } else {// Slow path: reached end of ptrmask. // Process final partial byte and rewind to start. |= uintptr(*) << += if < 8 { |= uintptr(*) << = add1() } else { -= 8 = } }// Emit bitmap byte. = & bitPointerAll |= bitScanAllif += 4; >= {break } * = uint8() = add1() >>= 4 }:// Phase 3: Write last byte or partial byte and zero the rest of the bitmap entries.if > {// Counting the 4 entries in hb not yet written to memory, // there are more entries than possible pointer slots. // Discard the excess entries (can't be more than 3). := uintptr(1)<<(4-(-)) - 1 &= | <<4// apply mask to both pointer bits and scan bits }// Change nw from counting possibly-pointer words to total words in allocation. = / sys.PtrSize// Write whole bitmap bytes. // The first is hb, the rest are zero.if <= { * = uint8() = add1() = 0// for possible final half-byte belowfor += 4; <= ; += 4 { * = 0 = add1() } }// Write final partial bitmap byte if any. // We know w > nw, or else we'd still be in the loop above. // It can be bigger only due to the 4 entries in hb that it counts. // If w == nw+4 then there's nothing left to do: we wrote all nw entries // and can discard the 4 sitting in hb. // But if w == nw+2, we need to write first two in hb. // The byte is shared with the next object, so be careful with // existing bits.if == +2 { * = *&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | uint8() }:// Phase 4: Copy unrolled bitmap to per-arena bitmaps, if necessary.if {// TODO: We could probably make this faster by // handling [x+dataSize, x+size) specially. := heapBitsForAddr()// cnw is the number of heap words, or bit pairs // remaining (like nw above). := / sys.PtrSize := (*uint8)(unsafe.Pointer())// We know the first and last byte of the bitmap are // not the same, but it's still possible for small // objects span arenas, so it may share bitmap bytes // with neighboring objects. // // Handle the first byte specially if it's shared. See // Phase 1 for why this is the only special case we need.if {if !(.shift == 0 || .shift == 2) {print("x=", , " size=", , " cnw=", .shift, "\n")throw("bad start shift") } }if .shift == 2 { *.bitp = *.bitp&^((bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift)<<(2*heapBitsShift)) | * = .next().next() -= 2 = addb(, 1) }// We're now byte aligned. Copy out to per-arena // bitmaps until the last byte (which may again be // partial).for >= 4 {// This loop processes four words at a time, // so round cnw down accordingly. , := .forwardOrBoundary( / 4 * 4)// n is the number of bitmap bytes to copy. := / 4memmove(unsafe.Pointer(.bitp), unsafe.Pointer(), ) -= = = addb(, ) }if && .shift != 0 {print("cnw=", , " h.shift=", .shift, "\n")throw("bad shift after block copy") }// Handle the last byte if it's shared.if == 2 { *.bitp = *.bitp&^(bitPointer|bitScan|(bitPointer|bitScan)<<heapBitsShift) | * = addb(, 1) = .next().next() }if {ifuintptr(unsafe.Pointer()) > + {throw("copy exceeded object size") }if !( == 0 || == 2) {print("x=", , " size=", , " cnw=", , "\n")throw("bad number of remaining words") }// Set up hbitp so doubleCheck code below can check it. = .bitp }// Zero the object where we wrote the bitmap.memclrNoHeapPointers(unsafe.Pointer(), uintptr(unsafe.Pointer())-) }// Double check the whole bitmap.if {// x+size may not point to the heap, so back up one // word and then advance it the way we do above. := heapBitsForAddr( + - sys.PtrSize)if {// In out-of-place copying, we just advance // using next. = .next() } else {// Don't use next because that may advance to // the next arena and the in-place logic // doesn't do that. .shift += heapBitsShiftif .shift == 4*heapBitsShift { .bitp, .shift = add1(.bitp), 0 } }if .kind&kindGCProg == 0 && ( != .bitp || ( == +2) != (.shift == 2)) {println("ended at wrong bitmap byte for", .string(), "x", /.size)print("typ.size=", .size, " typ.ptrdata=", .ptrdata, " dataSize=", , " size=", , "\n")print("w=", , " nw=", , " b=", hex(), " nb=", , " hb=", hex(), "\n") := heapBitsForAddr()print("initial bits h0.bitp=", .bitp, " h0.shift=", .shift, "\n")print("ended at hbitp=", , " but next starts at bitp=", .bitp, " shift=", .shift, "\n")throw("bad heapBitsSetType") }// Double-check that bits to be written were written correctly. // Does not check that other bits were not written, unfortunately. := heapBitsForAddr() := .ptrdata / sys.PtrSize := .size / sys.PtrSize := / .size := ((-1)*.size + .ptrdata) / sys.PtrSizefor := uintptr(0); < /sys.PtrSize; ++ { := % var , uint8 = (*.bitp >> .shift) & (bitPointer | bitScan)if >= {if .kind&kindGCProg != 0 && < (+3)/4*4 {// heapBitsSetTypeGCProg always fills // in full nibbles of bitScan. = bitScan } } else {if < && (*addb(, /8)>>(%8))&1 != 0 { |= bitPointer } |= bitScan }if != {println("mismatch writing bits for", .string(), "x", /.size)print("typ.size=", .size, " typ.ptrdata=", .ptrdata, " dataSize=", , " size=", , "\n")print("kindGCProg=", .kind&kindGCProg != 0, " outOfPlace=", , "\n")print("w=", , " nw=", , " b=", hex(), " nb=", , " hb=", hex(), "\n") := heapBitsForAddr()print("initial bits h0.bitp=", .bitp, " h0.shift=", .shift, "\n")print("current bits h.bitp=", .bitp, " h.shift=", .shift, " *h.bitp=", hex(*.bitp), "\n")print("ptrmask=", , " p=", , " endp=", , " endnb=", , " pbits=", hex(), " b=", hex(), " nb=", , "\n")println("at word", , "offset", *sys.PtrSize, "have", hex(), "want", hex())if .kind&kindGCProg != 0 {println("GC program:")dumpGCProg(addb(.gcdata, 4)) }throw("bad heapBitsSetType") } = .next() }if == debugPtrmask.data {unlock(&debugPtrmask.lock) } }}vardebugPtrmaskstruct { lock mutex data *byte}// heapBitsSetTypeGCProg implements heapBitsSetType using a GC program.// progSize is the size of the memory described by the program.// elemSize is the size of the element that the GC program describes (a prefix of).// dataSize is the total size of the intended data, a multiple of elemSize.// allocSize is the total size of the allocated memory.//// GC programs are only used for large allocations.// heapBitsSetType requires that allocSize is a multiple of 4 words,// so that the relevant bitmap bytes are not shared with surrounding// objects.func ( heapBits, , , , uintptr, *byte) {ifsys.PtrSize == 8 && %(4*sys.PtrSize) != 0 {// Alignment will be wrong.throw("heapBitsSetTypeGCProg: small allocation") }varuintptrif == { = runGCProg(, nil, .bitp, 2)if *sys.PtrSize != {println("runtime: heapBitsSetTypeGCProg: total bits", , "but progSize", )throw("heapBitsSetTypeGCProg: unexpected bit count") } } else { := / // Piece together program trailer to run after prog that does: // literal(0) // repeat(1, elemSize-progSize-1) // zeros to fill element size // repeat(elemSize, count-1) // repeat that element for count // This zero-pads the data remaining in the first element and then // repeats that first element to fill the array.var [40]byte// 3 varints (max 10 each) + some bytes := 0if := /sys.PtrSize - /sys.PtrSize; > 0 {// literal(0) [] = 0x01 ++ [] = 0 ++if > 1 {// repeat(1, n-1) [] = 0x81 ++ --for ; >= 0x80; >>= 7 { [] = byte( | 0x80) ++ } [] = byte() ++ } }// repeat(elemSize/ptrSize, count-1) [] = 0x80 ++ := / sys.PtrSizefor ; >= 0x80; >>= 7 { [] = byte( | 0x80) ++ } [] = byte() ++ = - 1for ; >= 0x80; >>= 7 { [] = byte( | 0x80) ++ } [] = byte() ++ [] = 0 ++runGCProg(, &[0], .bitp, 2)// Even though we filled in the full array just now, // record that we only filled in up to the ptrdata of the // last element. This will cause the code below to // memclr the dead section of the final array element, // so that scanobject can stop early in the final element. = (*(-1) + ) / sys.PtrSize } := unsafe.Pointer(addb(.bitp, (+3)/4)) := unsafe.Pointer(addb(.bitp, /sys.PtrSize/wordsPerBitmapByte))memclrNoHeapPointers(, uintptr()-uintptr())}// progToPointerMask returns the 1-bit pointer mask output by the GC program prog.// size the size of the region described by prog, in bytes.// The resulting bitvector will have no more than size/sys.PtrSize bits.func ( *byte, uintptr) bitvector { := (/sys.PtrSize + 7) / 8 := (*[1 << 30]byte)(persistentalloc(+1, 1, &memstats.buckhash_sys))[:+1] [len()-1] = 0xa1// overflow check sentinel = runGCProg(, nil, &[0], 1)if [len()-1] != 0xa1 {throw("progToPointerMask: overflow") }returnbitvector{int32(), &[0]}}// Packed GC pointer bitmaps, aka GC programs.//// For large types containing arrays, the type information has a// natural repetition that can be encoded to save space in the// binary and in the memory representation of the type information.//// The encoding is a simple Lempel-Ziv style bytecode machine// with the following instructions://// 00000000: stop// 0nnnnnnn: emit n bits copied from the next (n+7)/8 bytes// 10000000 n c: repeat the previous n bits c times; n, c are varints// 1nnnnnnn c: repeat the previous n bits c times; c is a varint// runGCProg executes the GC program prog, and then trailer if non-nil,// writing to dst with entries of the given size.// If size == 1, dst is a 1-bit pointer mask laid out moving forward from dst.// If size == 2, dst is the 2-bit heap bitmap, and writes move backward// starting at dst (because the heap bitmap does). In this case, the caller guarantees// that only whole bytes in dst need to be written.//// runGCProg returns the number of 1- or 2-bit entries written to memory.func (, , *byte, int) uintptr { := // Bits waiting to be written to memory.varuintptrvaruintptr := :for {// Flush accumulated full bytes. // The rest of the loop assumes that nbits <= 7.for ; >= 8; -= 8 {if == 1 { * = uint8() = add1() >>= 8 } else { := &bitPointerAll | bitScanAll * = uint8() = add1() >>= 4 = &bitPointerAll | bitScanAll * = uint8() = add1() >>= 4 } }// Process one instruction. := uintptr(*) = add1() := & 0x7Fif &0x80 == 0 {// Literal bits; n == 0 means end of program.if == 0 {// Program is over; continue in trailer if present.if != nil { = = nilcontinue }break } := / 8for := uintptr(0); < ; ++ { |= uintptr(*) << = add1()if == 1 { * = uint8() = add1() >>= 8 } else { := &0xf | bitScanAll * = uint8() = add1() >>= 4 = &0xf | bitScanAll * = uint8() = add1() >>= 4 } }if %= 8; > 0 { |= uintptr(*) << = add1() += }continue }// Repeat. If n == 0, it is encoded in a varint in the next bytes.if == 0 {for := uint(0); ; += 7 { := uintptr(*) = add1() |= ( & 0x7F) << if &0x80 == 0 {break } } }// Count is encoded in a varint in the next bytes. := uintptr(0)for := uint(0); ; += 7 { := uintptr(*) = add1() |= ( & 0x7F) << if &0x80 == 0 {break } } *= // now total number of bits to copy// If the number of bits being repeated is small, load them // into a register and use that register for the entire loop // instead of repeatedly reading from memory. // Handling fewer than 8 bits here makes the general loop simpler. // The cutoff is sys.PtrSize*8 - 7 to guarantee that when we add // the pattern to a bit buffer holding at most 7 bits (a partial byte) // it will not overflow. := const = sys.PtrSize*8 - 7if <= {// Start with bits in output buffer. := := // If we need more bits, fetch them from memory.if == 1 { = subtract1()for < { <<= 8 |= uintptr(*) = subtract1() += 8 } } else { = subtract1()for < { <<= 4 |= uintptr(*) & 0xf = subtract1() += 4 } }// We started with the whole bit output buffer, // and then we loaded bits from whole bytes. // Either way, we might now have too many instead of too few. // Discard the extra.if > { >>= - = }// Replicate pattern to at most maxBits.if == 1 {// One bit being repeated. // If the bit is 1, make the pattern all 1s. // If the bit is 0, the pattern is already all 0s, // but we can claim that the number of bits // in the word is equal to the number we need (c), // because right shift of bits will zero fill.if == 1 { = 1<< - 1 = } else { = } } else { := := if + <= {// Double pattern until the whole uintptr is filled.for <= sys.PtrSize*8 { |= << += }// Trim away incomplete copy of original pattern in high bits. // TODO(rsc): Replace with table lookup or loop on systems without divide? = / * &= 1<< - 1 = = } }// Add pattern to bit buffer and flush bit buffer, c/npattern times. // Since pattern contains >8 bits, there will be full bytes to flush // on each iteration.for ; >= ; -= { |= << += if == 1 {for >= 8 { * = uint8() = add1() >>= 8 -= 8 } } else {for >= 4 { * = uint8(&0xf | bitScanAll) = add1() >>= 4 -= 4 } } }// Add final fragment to bit buffer.if > 0 { &= 1<< - 1 |= << += }continue }// Repeat; n too large to fit in a register. // Since nbits <= 7, we know the first few bytes of repeated data // are already written to memory. := - // n > nbits because n > maxBits and nbits <= 7if == 1 {// Leading src fragment. = subtractb(, (+7)/8)if := & 7; != 0 { |= uintptr(*) >> (8 - ) << = add1() += -= }// Main loop: load one byte, write another. // The bits are rotating through the bit buffer.for := / 8; > 0; -- { |= uintptr(*) << = add1() * = uint8() = add1() >>= 8 }// Final src fragment.if %= 8; > 0 { |= (uintptr(*) & (1<< - 1)) << += } } else {// Leading src fragment. = subtractb(, (+3)/4)if := & 3; != 0 { |= (uintptr(*) & 0xf) >> (4 - ) << = add1() += -= }// Main loop: load one byte, write another. // The bits are rotating through the bit buffer.for := / 4; > 0; -- { |= (uintptr(*) & 0xf) << = add1() * = uint8(&0xf | bitScanAll) = add1() >>= 4 }// Final src fragment.if %= 4; > 0 { |= (uintptr(*) & (1<< - 1)) << += } } }// Write any final bits out, using full-byte writes, even for the final byte.varuintptrif == 1 { = (uintptr(unsafe.Pointer())-uintptr(unsafe.Pointer()))*8 + += - & 7for ; > 0; -= 8 { * = uint8() = add1() >>= 8 } } else { = (uintptr(unsafe.Pointer())-uintptr(unsafe.Pointer()))*4 + += - & 3for ; > 0; -= 4 { := &0xf | bitScanAll * = uint8() = add1() >>= 4 } }return}// materializeGCProg allocates space for the (1-bit) pointer bitmask// for an object of size ptrdata. Then it fills that space with the// pointer bitmask specified by the program prog.// The bitmask starts at s.startAddr.// The result must be deallocated with dematerializeGCProg.func ( uintptr, *byte) *mspan {// Each word of ptrdata needs one bit in the bitmap. := divRoundUp(, 8*sys.PtrSize)// Compute the number of pages needed for bitmapBytes. := divRoundUp(, pageSize) := mheap_.allocManual(, spanAllocPtrScalarBits)runGCProg(addb(, 4), nil, (*byte)(unsafe.Pointer(.startAddr)), 1)return}func ( *mspan) {mheap_.freeManual(, spanAllocPtrScalarBits)}func ( *byte) { := 0for { := * = add1()if == 0 {print("\t", , " end\n")break }if &0x80 == 0 {print("\t", , " lit ", , ":") := int(+7) / 8for := 0; < ; ++ {print(" ", hex(*)) = add1() }print("\n") += int() } else { := int( &^ 0x80)if == 0 {for := uint(0); ; += 7 { := * = add1() |= int(&0x7f) << if &0x80 == 0 {break } } } := 0for := uint(0); ; += 7 { := * = add1() |= int(&0x7f) << if &0x80 == 0 {break } }print("\t", , " repeat ", , " × ", , "\n") += * } }}// Testing.func ( *stkframe, unsafe.Pointer) bool { := (*stkframe)()if .sp <= .sp && .sp < .varp { * = *returnfalse }returntrue}// gcbits returns the GC type info for x, for testing.// The result is the bitmap entries (0 or 1), one entry per byte.//go:linkname reflect_gcbits reflect.gcbitsfunc ( interface{}) []byte { := getgcmask() := (*ptrtype)(unsafe.Pointer(efaceOf(&)._type)).elem := .ptrdata / sys.PtrSizeforuintptr(len()) > && [len()-1] == 0 { = [:len()-1] }return}// Returns GC type info for the pointer stored in ep for testing.// If ep points to the stack, only static live information will be returned// (i.e. not for objects which are only dynamically live stack objects).func ( interface{}) ( []byte) { := *efaceOf(&) := .data := ._type// data or bssfor , := rangeactiveModules() {// dataif .data <= uintptr() && uintptr() < .edata { := .gcdatamask.bytedata := (*ptrtype)(unsafe.Pointer()).elem.size = make([]byte, /sys.PtrSize)for := uintptr(0); < ; += sys.PtrSize { := (uintptr() + - .data) / sys.PtrSize [/sys.PtrSize] = (*addb(, /8) >> ( % 8)) & 1 }return }// bssif .bss <= uintptr() && uintptr() < .ebss { := .gcbssmask.bytedata := (*ptrtype)(unsafe.Pointer()).elem.size = make([]byte, /sys.PtrSize)for := uintptr(0); < ; += sys.PtrSize { := (uintptr() + - .bss) / sys.PtrSize [/sys.PtrSize] = (*addb(, /8) >> ( % 8)) & 1 }return } }// heapif , , := findObject(uintptr(), 0, 0); != 0 { := heapBitsForAddr() := .elemsize = make([]byte, /sys.PtrSize)for := uintptr(0); < ; += sys.PtrSize {if .isPointer() { [/sys.PtrSize] = 1 }if !.morePointers() { = [:/sys.PtrSize]break } = .next() }return }// stackif := getg(); .m.curg.stack.lo <= uintptr() && uintptr() < .m.curg.stack.hi {varstkframe .sp = uintptr() := getg()gentraceback(.m.curg.sched.pc, .m.curg.sched.sp, 0, .m.curg, 0, nil, 1000, getgcmaskcb, noescape(unsafe.Pointer(&)), 0)if .fn.valid() { , , := getStackMap(&, nil, false)if .n == 0 {return } := uintptr(.n) * sys.PtrSize := (*ptrtype)(unsafe.Pointer()).elem.size = make([]byte, /sys.PtrSize)for := uintptr(0); < ; += sys.PtrSize { := (uintptr() + - .varp + ) / sys.PtrSize [/sys.PtrSize] = .ptrbit() } }return }// otherwise, not something the GC knows about. // possibly read-only data, like malloc(0). // must not have pointersreturn}
The pages are generated with Goldsv0.3.6. (GOOS=darwin GOARCH=amd64)
Golds is a Go 101 project developed by Tapir Liu.
PR and bug reports are welcome and can be submitted to the issue list.
Please follow @Go100and1 (reachable from the left QR code) to get the latest news of Golds.