@@ -948,6 +948,180 @@ _PyMem_Strdup(const char *str)
948
948
return copy ;
949
949
}
950
950
951
+ /***********************************************/
952
+ /* Delayed freeing support for Py_GIL_DISABLED */
953
+ /***********************************************/
954
+
955
+ // So that sizeof(struct _mem_work_chunk) is 4096 bytes on 64-bit platforms.
956
+ #define WORK_ITEMS_PER_CHUNK 254
957
+
958
+ // A pointer to be freed once the QSBR read sequence reaches qsbr_goal.
959
+ struct _mem_work_item {
960
+ void * ptr ;
961
+ uint64_t qsbr_goal ;
962
+ };
963
+
964
+ // A fixed-size buffer of pointers to be freed
965
+ struct _mem_work_chunk {
966
+ // Linked list node of chunks in queue
967
+ struct llist_node node ;
968
+
969
+ Py_ssize_t rd_idx ; // index of next item to read
970
+ Py_ssize_t wr_idx ; // index of next item to write
971
+ struct _mem_work_item array [WORK_ITEMS_PER_CHUNK ];
972
+ };
973
+
974
+ void
975
+ _PyMem_FreeDelayed (void * ptr )
976
+ {
977
+ #ifndef Py_GIL_DISABLED
978
+ PyMem_Free (ptr );
979
+ #else
980
+ if (_PyRuntime .stoptheworld .world_stopped ) {
981
+ // Free immediately if the world is stopped, including during
982
+ // interpreter shutdown.
983
+ PyMem_Free (ptr );
984
+ return ;
985
+ }
986
+
987
+ _PyThreadStateImpl * tstate = (_PyThreadStateImpl * )_PyThreadState_GET ();
988
+ struct llist_node * head = & tstate -> mem_free_queue ;
989
+
990
+ struct _mem_work_chunk * buf = NULL ;
991
+ if (!llist_empty (head )) {
992
+ // Try to re-use the last buffer
993
+ buf = llist_data (head -> prev , struct _mem_work_chunk , node );
994
+ if (buf -> wr_idx == WORK_ITEMS_PER_CHUNK ) {
995
+ // already full
996
+ buf = NULL ;
997
+ }
998
+ }
999
+
1000
+ if (buf == NULL ) {
1001
+ buf = PyMem_Calloc (1 , sizeof (* buf ));
1002
+ if (buf != NULL ) {
1003
+ llist_insert_tail (head , & buf -> node );
1004
+ }
1005
+ }
1006
+
1007
+ if (buf == NULL ) {
1008
+ // failed to allocate a buffer, free immediately
1009
+ _PyEval_StopTheWorld (tstate -> base .interp );
1010
+ PyMem_Free (ptr );
1011
+ _PyEval_StartTheWorld (tstate -> base .interp );
1012
+ return ;
1013
+ }
1014
+
1015
+ assert (buf != NULL && buf -> wr_idx < WORK_ITEMS_PER_CHUNK );
1016
+ uint64_t seq = _Py_qsbr_deferred_advance (tstate -> qsbr );
1017
+ buf -> array [buf -> wr_idx ].ptr = ptr ;
1018
+ buf -> array [buf -> wr_idx ].qsbr_goal = seq ;
1019
+ buf -> wr_idx ++ ;
1020
+
1021
+ if (buf -> wr_idx == WORK_ITEMS_PER_CHUNK ) {
1022
+ _PyMem_ProcessDelayed ((PyThreadState * )tstate );
1023
+ }
1024
+ #endif
1025
+ }
1026
+
1027
+ static struct _mem_work_chunk *
1028
+ work_queue_first (struct llist_node * head )
1029
+ {
1030
+ return llist_data (head -> next , struct _mem_work_chunk , node );
1031
+ }
1032
+
1033
+ static void
1034
+ process_queue (struct llist_node * head , struct _qsbr_thread_state * qsbr )
1035
+ {
1036
+ while (!llist_empty (head )) {
1037
+ struct _mem_work_chunk * buf = work_queue_first (head );
1038
+
1039
+ if (buf -> rd_idx == buf -> wr_idx ) {
1040
+ llist_remove (& buf -> node );
1041
+ PyMem_Free (buf );
1042
+ continue ;
1043
+ }
1044
+
1045
+ struct _mem_work_item * item = & buf -> array [buf -> rd_idx ];
1046
+ if (!_Py_qsbr_poll (qsbr , item -> qsbr_goal )) {
1047
+ return ;
1048
+ }
1049
+
1050
+ PyMem_Free (item -> ptr );
1051
+ buf -> rd_idx ++ ;
1052
+ }
1053
+ }
1054
+
1055
+ static void
1056
+ process_interp_queue (struct _Py_mem_interp_free_queue * queue ,
1057
+ struct _qsbr_thread_state * qsbr )
1058
+ {
1059
+ if (!_Py_atomic_load_int_relaxed (& queue -> has_work )) {
1060
+ return ;
1061
+ }
1062
+
1063
+ // Try to acquire the lock, but don't block if it's already held.
1064
+ if (_PyMutex_LockTimed (& queue -> mutex , 0 , 0 ) == PY_LOCK_ACQUIRED ) {
1065
+ process_queue (& queue -> head , qsbr );
1066
+
1067
+ int more_work = !llist_empty (& queue -> head );
1068
+ _Py_atomic_store_int_relaxed (& queue -> has_work , more_work );
1069
+
1070
+ PyMutex_Unlock (& queue -> mutex );
1071
+ }
1072
+ }
1073
+
1074
+ void
1075
+ _PyMem_ProcessDelayed (PyThreadState * tstate )
1076
+ {
1077
+ PyInterpreterState * interp = tstate -> interp ;
1078
+ _PyThreadStateImpl * tstate_impl = (_PyThreadStateImpl * )tstate ;
1079
+
1080
+ // Process thread-local work
1081
+ process_queue (& tstate_impl -> mem_free_queue , tstate_impl -> qsbr );
1082
+
1083
+ // Process shared interpreter work
1084
+ process_interp_queue (& interp -> mem_free_queue , tstate_impl -> qsbr );
1085
+ }
1086
+
1087
+ void
1088
+ _PyMem_AbandonDelayed (PyThreadState * tstate )
1089
+ {
1090
+ PyInterpreterState * interp = tstate -> interp ;
1091
+ struct llist_node * queue = & ((_PyThreadStateImpl * )tstate )-> mem_free_queue ;
1092
+
1093
+ if (llist_empty (queue )) {
1094
+ return ;
1095
+ }
1096
+
1097
+ // Merge the thread's work queue into the interpreter's work queue.
1098
+ PyMutex_Lock (& interp -> mem_free_queue .mutex );
1099
+ llist_concat (& interp -> mem_free_queue .head , queue );
1100
+ _Py_atomic_store_int_relaxed (& interp -> mem_free_queue .has_work , 1 );
1101
+ PyMutex_Unlock (& interp -> mem_free_queue .mutex );
1102
+
1103
+ assert (llist_empty (queue )); // the thread's queue is now empty
1104
+ }
1105
+
1106
+ void
1107
+ _PyMem_FiniDelayed (PyInterpreterState * interp )
1108
+ {
1109
+ struct llist_node * head = & interp -> mem_free_queue .head ;
1110
+ while (!llist_empty (head )) {
1111
+ struct _mem_work_chunk * buf = work_queue_first (head );
1112
+
1113
+ while (buf -> rd_idx < buf -> wr_idx ) {
1114
+ // Free the remaining items immediately. There should be no other
1115
+ // threads accessing the memory at this point during shutdown.
1116
+ struct _mem_work_item * item = & buf -> array [buf -> rd_idx ];
1117
+ PyMem_Free (item -> ptr );
1118
+ buf -> rd_idx ++ ;
1119
+ }
1120
+
1121
+ llist_remove (& buf -> node );
1122
+ PyMem_Free (buf );
1123
+ }
1124
+ }
951
1125
952
1126
/**************************/
953
1127
/* the "object" allocator */
0 commit comments