From 07f7626dfcf7b2cb8fea6d9de279ad37931bf7e4 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 12 May 2024 08:34:37 +0200 Subject: [PATCH] ARM64: avoid memory barrier in InlinedMemmoveGCRefsHelper if dest is not on heap (#102084) * don't put full barrier if dest is not on the heap * Address feedback --------- Co-authored-by: Jan Kotas --- .../classlibnative/bcltype/arraynative.inl | 12 +++++- .../nativeaot/Runtime/GCMemoryHelpers.cpp | 23 +++++++++--- .../nativeaot/Runtime/GCMemoryHelpers.inl | 11 ++---- src/coreclr/vm/gchelpers.cpp | 37 +------------------ src/coreclr/vm/gchelpers.h | 1 - src/coreclr/vm/gchelpers.inl | 8 +--- src/mono/mono/metadata/icall.c | 3 ++ 7 files changed, 36 insertions(+), 59 deletions(-) diff --git a/src/coreclr/classlibnative/bcltype/arraynative.inl b/src/coreclr/classlibnative/bcltype/arraynative.inl index 913b8c64939b84..632b83905849f5 100644 --- a/src/coreclr/classlibnative/bcltype/arraynative.inl +++ b/src/coreclr/classlibnative/bcltype/arraynative.inl @@ -307,7 +307,12 @@ FORCEINLINE void InlinedMemmoveGCRefsHelper(void *dest, const void *src, size_t _ASSERTE(CheckPointer(dest)); _ASSERTE(CheckPointer(src)); - GCHeapMemoryBarrier(); + const bool notInHeap = ((BYTE*)dest < g_lowest_address || (BYTE*)dest >= g_highest_address); + + if (!notInHeap) + { + GCHeapMemoryBarrier(); + } // To be able to copy forwards, the destination buffer cannot start inside the source buffer if ((size_t)dest - (size_t)src >= len) @@ -319,7 +324,10 @@ FORCEINLINE void InlinedMemmoveGCRefsHelper(void *dest, const void *src, size_t InlinedBackwardGCSafeCopyHelper(dest, src, len); } - InlinedSetCardsAfterBulkCopyHelper((Object**)dest, len); + if (!notInHeap) + { + InlinedSetCardsAfterBulkCopyHelper((Object**)dest, len); + } } #endif // !_ARRAYNATIVE_INL_ diff --git a/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp b/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp index 0154bd1fde5077..b34c41c37bb34b 100644 --- a/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp +++ b/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.cpp @@ -44,17 +44,28 @@ FCIMPLEND FCIMPL3(void, RhBulkMoveWithWriteBarrier, uint8_t* pDest, uint8_t* pSrc, size_t cbDest) { - // It is possible that the bulk write is publishing object references accessible so far only - // by the current thread to shared memory. - // The memory model requires that writes performed by current thread are observable no later - // than the writes that will actually publish the references. - GCHeapMemoryBarrier(); + if (cbDest == 0 || pDest == pSrc) + return; + + const bool notInHeap = pDest < g_lowest_address || pDest >= g_highest_address; + + if (!notInHeap) + { + // It is possible that the bulk write is publishing object references accessible so far only + // by the current thread to shared memory. + // The memory model requires that writes performed by current thread are observable no later + // than the writes that will actually publish the references. + GCHeapMemoryBarrier(); + } if (pDest <= pSrc || pSrc + cbDest <= pDest) InlineForwardGCSafeCopy(pDest, pSrc, cbDest); else InlineBackwardGCSafeCopy(pDest, pSrc, cbDest); - InlinedBulkWriteBarrier(pDest, cbDest); + if (!notInHeap) + { + InlinedBulkWriteBarrier(pDest, cbDest); + } } FCIMPLEND diff --git a/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.inl b/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.inl index cb4adbfc6a275f..033b83c738ce25 100644 --- a/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.inl +++ b/src/coreclr/nativeaot/Runtime/GCMemoryHelpers.inl @@ -228,14 +228,9 @@ FORCEINLINE void InlineCheckedWriteBarrier(void * dst, void * ref) FORCEINLINE void InlinedBulkWriteBarrier(void* pMemStart, size_t cbMemSize) { - // Check whether the writes were even into the heap. If not there's no card update required. - // Also if the size is smaller than a pointer, no write barrier is required. - // This case can occur with universal shared generic code where the size - // is not known at compile time. - if (pMemStart < g_lowest_address || (pMemStart >= g_highest_address) || (cbMemSize < sizeof(uintptr_t))) - { - return; - } + // Caller is expected to check whether the writes were even into the heap + ASSERT(cbMemSize >= sizeof(uintptr_t)); + ASSERT((pMemStart >= g_lowest_address) && (pMemStart < g_highest_address)); #ifdef WRITE_BARRIER_CHECK // Perform shadow heap updates corresponding to the gc heap updates that immediately preceded this helper diff --git a/src/coreclr/vm/gchelpers.cpp b/src/coreclr/vm/gchelpers.cpp index ecbf8f09ce293d..63754563b496b4 100644 --- a/src/coreclr/vm/gchelpers.cpp +++ b/src/coreclr/vm/gchelpers.cpp @@ -1484,39 +1484,4 @@ void ErectWriteBarrierForMT(MethodTable **dst, MethodTable *ref) } } } -} - -//---------------------------------------------------------------------------- -// -// Write Barrier Support for bulk copy ("Clone") operations -// -// StartPoint is the target bulk copy start point -// len is the length of the bulk copy (in bytes) -// -// -// Performance Note: -// -// This is implemented somewhat "conservatively", that is we -// assume that all the contents of the bulk copy are object -// references. If they are not, and the value lies in the -// ephemeral range, we will set false positives in the card table. -// -// We could use the pointer maps and do this more accurately if necessary - -#if defined(_MSC_VER) && defined(TARGET_X86) -#pragma optimize("y", on) // Small critical routines, don't put in EBP frame -#endif //_MSC_VER && TARGET_X86 - -void -SetCardsAfterBulkCopy(Object **start, size_t len) -{ - // If the size is smaller than a pointer, no write barrier is required. - if (len >= sizeof(uintptr_t)) - { - InlinedSetCardsAfterBulkCopyHelper(start, len); - } -} - -#if defined(_MSC_VER) && defined(TARGET_X86) -#pragma optimize("", on) // Go back to command line default optimizations -#endif //_MSC_VER && TARGET_X86 +} \ No newline at end of file diff --git a/src/coreclr/vm/gchelpers.h b/src/coreclr/vm/gchelpers.h index 242a0156964d81..6e5a696dd2ccd4 100644 --- a/src/coreclr/vm/gchelpers.h +++ b/src/coreclr/vm/gchelpers.h @@ -87,7 +87,6 @@ extern void ThrowOutOfMemoryDimensionsExceeded(); //======================================================================== void ErectWriteBarrier(OBJECTREF* dst, OBJECTREF ref); -void SetCardsAfterBulkCopy(Object **start, size_t len); void PublishFrozenObject(Object*& orObject); diff --git a/src/coreclr/vm/gchelpers.inl b/src/coreclr/vm/gchelpers.inl index e56bcd16a1ed43..60b93bfad94d99 100644 --- a/src/coreclr/vm/gchelpers.inl +++ b/src/coreclr/vm/gchelpers.inl @@ -31,13 +31,9 @@ FORCEINLINE void InlinedSetCardsAfterBulkCopyHelper(Object **start, size_t len) { - // Check whether the writes were even into the heap. If not there's no card update required. - // Also if the size is smaller than a pointer, no write barrier is required. + // Caller is expected to check whether the writes were even into the heap _ASSERTE(len >= sizeof(uintptr_t)); - if ((BYTE*)start < g_lowest_address || (BYTE*)start >= g_highest_address) - { - return; - } + _ASSERTE(((BYTE*)start >= g_lowest_address) && ((BYTE*)start < g_highest_address)); // Don't optimize the Generation 0 case if we are checking for write barrier violations // since we need to update the shadow heap even in the generation 0 case. diff --git a/src/mono/mono/metadata/icall.c b/src/mono/mono/metadata/icall.c index 0e29ed10ffedee..a1ab947afd36c1 100644 --- a/src/mono/mono/metadata/icall.c +++ b/src/mono/mono/metadata/icall.c @@ -957,6 +957,9 @@ ves_icall_System_Runtime_RuntimeImports_Memmove (guint8 *destination, guint8 *so void ves_icall_System_Buffer_BulkMoveWithWriteBarrier (guint8 *destination, guint8 *source, size_t len, MonoType *type) { + if (len == 0 || destination == source) + return; + if (MONO_TYPE_IS_REFERENCE (type)) mono_gc_wbarrier_arrayref_copy_internal (destination, source, (guint)len); else