-
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
New issue
Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? # to your account
MDEV-36226 Stall and crash when page cleaner fails to generate free p… #3885
base: 10.6
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -602,20 +602,66 @@ static void buf_dblwr_check_block(const buf_page_t *bpage) noexcept | |
} | ||
#endif /* UNIV_DEBUG */ | ||
|
||
ATTRIBUTE_COLD void buf_dblwr_t::print_info() const noexcept | ||
{ | ||
mysql_mutex_assert_owner(&mutex); | ||
const slot *flush_slot= active_slot == &slots[0] ? &slots[1] : &slots[0]; | ||
|
||
sql_print_information("Innodb: Double Write State\n" | ||
"-------------------\n" | ||
"Batch running : %s\n" | ||
"Active Slot - first_free: %lu reserved: %lu\n" | ||
"Flush Slot - first_free: %lu reserved: %lu\n" | ||
"-------------------", | ||
(batch_running ? "true" : "false"), | ||
active_slot->first_free, active_slot->reserved, | ||
flush_slot->first_free, flush_slot->reserved); | ||
Comment on lines
+610
to
+618
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Error log messages should start with sql_print_information("InnoDB: doublewrite %s (active: %zu, %zu) (flush: %zu, %zu)",
batch_running ? "active" : "idle",
active_slot->first_free, active_slot->reserved,
flush_slot->first_free, flush_slot->reserved); There is a type mismatch between More verbose output is not necessarily any more readable to an end user. When analyzing the output, it should not be much of an extra hurdle to consult the source code for decoding the output. |
||
} | ||
|
||
bool buf_dblwr_t::flush_buffered_writes(const ulint size) noexcept | ||
{ | ||
mysql_mutex_assert_owner(&mutex); | ||
ut_ad(size == block_size()); | ||
|
||
for (;;) | ||
const ulong max_count= 60 * 60; | ||
const ulong first_log_count= 30; | ||
const ulong fatal_threshold= srv_fatal_semaphore_wait_threshold; | ||
ulong log_count= first_log_count; | ||
|
||
for (ulong count= 0;;) | ||
Comment on lines
+626
to
+631
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any use of the types |
||
{ | ||
if (!active_slot->first_free) | ||
return false; | ||
if (!batch_running) | ||
break; | ||
my_cond_wait(&cond, &mutex.m_mutex); | ||
} | ||
|
||
timespec abstime; | ||
set_timespec(abstime, 1); | ||
my_cond_timedwait(&cond, &mutex.m_mutex, &abstime); | ||
Comment on lines
-616
to
+640
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn’t we avoid the timeout handling when if (!my_cond_timedwait(&cond, &mutex.m_mutex, &abstime))
{
/* buf_dblwr_t::write_completed()` had been invoked */
count= 0;
continue;
} Can we set a longer timeout than 1 second? Could it be somehow based on |
||
|
||
if (count > fatal_threshold) | ||
{ | ||
buf_pool.print_flush_info(); | ||
print_info(); | ||
ib::fatal() << "Innodb: Long wait (" << count | ||
<< " seconds) for double-write buffer flush."; | ||
} | ||
else if (++count < first_log_count && !(count % 5)) | ||
{ | ||
sql_print_information("Innodb: Long wait (%lu seconds) for double-write" | ||
" buffer flush.", count); | ||
buf_pool.print_flush_info(); | ||
print_info(); | ||
} | ||
else if (!(count % log_count)) | ||
{ | ||
sql_print_warning("Innodb: Long wait (%lu seconds) for double-write" | ||
" buffer flush.", count); | ||
buf_pool.print_flush_info(); | ||
print_info(); | ||
log_count= log_count >= max_count ? max_count : log_count * 2; | ||
} | ||
} | ||
ut_ad(active_slot->reserved == active_slot->first_free); | ||
ut_ad(!flushing_buffered_writes); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1374,7 +1374,10 @@ static void buf_flush_LRU_list_batch(ulint max, flush_counters_t *n) noexcept | |
break; | ||
} | ||
|
||
if (neighbors && space->is_rotational()) | ||
if (neighbors && space->is_rotational() && | ||
/* Skip neighbourhood flush from LRU list if we haven't yet reached | ||
half of the free page target. */ | ||
UT_LIST_GET_LEN(buf_pool.free) * 2 >= free_limit) | ||
n->flushed+= buf_flush_try_neighbors(space, page_id, bpage, | ||
neighbors == 1, | ||
n->flushed, max); | ||
|
@@ -1763,8 +1766,16 @@ static ulint buf_flush_LRU(ulint max_n) noexcept | |
buf_do_LRU_batch(max_n, &n); | ||
|
||
ulint pages= n.flushed; | ||
ulint evicted= n.evicted; | ||
|
||
if (n.evicted) | ||
/* If we have exhausted flush quota, it is likely we exited before | ||
generating enough free pages. Call once more with 0 flush to generate | ||
free pages immediately as required. */ | ||
if (pages == max_n) | ||
buf_do_LRU_batch(0, &n); | ||
Comment on lines
+1771
to
+1775
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. For safety, could the condition be |
||
|
||
evicted+= n.evicted; | ||
if (evicted) | ||
{ | ||
buf_pool.try_LRU_scan= true; | ||
pthread_cond_broadcast(&buf_pool.done_free); | ||
|
@@ -2331,6 +2342,11 @@ static void buf_flush_page_cleaner() noexcept | |
DBUG_EXECUTE_IF("ib_page_cleaner_sleep", | ||
{ | ||
std::this_thread::sleep_for(std::chrono::seconds(1)); | ||
/* Cover the logging code in debug mode. */ | ||
buf_pool.print_flush_info(); | ||
buf_dblwr.lock(); | ||
buf_dblwr.print_info(); | ||
buf_dblwr.unlock(); | ||
}); | ||
lsn_limit= buf_flush_sync_lsn; | ||
|
||
|
@@ -2535,6 +2551,10 @@ static void buf_flush_page_cleaner() noexcept | |
|
||
n= srv_max_io_capacity; | ||
n= n >= n_flushed ? n - n_flushed : 0; | ||
/* It is critical to generate free pages to keep the system alive. Make | ||
sure we are not hindered by dirty pages in LRU tail. */ | ||
n= std::max<ulint>(n, std::min<ulint>(srv_max_io_capacity, | ||
buf_pool.LRU_scan_depth)); | ||
goto LRU_flush; | ||
} | ||
|
||
|
@@ -2573,10 +2593,11 @@ ATTRIBUTE_COLD void buf_pool_t::LRU_warn() noexcept | |
mysql_mutex_assert_owner(&mutex); | ||
try_LRU_scan= false; | ||
if (!LRU_warned.test_and_set(std::memory_order_acquire)) | ||
{ | ||
sql_print_warning("InnoDB: Could not free any blocks in the buffer pool!" | ||
" %zu blocks are in use and %zu free." | ||
" Consider increasing innodb_buffer_pool_size.", | ||
UT_LIST_GET_LEN(LRU), UT_LIST_GET_LEN(free)); | ||
" Consider increasing innodb_buffer_pool_size."); | ||
buf_pool.print_flush_info(); | ||
} | ||
} | ||
|
||
/** Initialize page_cleaner. */ | ||
|
@@ -2656,6 +2677,53 @@ void buf_flush_sync() noexcept | |
thd_wait_end(nullptr); | ||
} | ||
|
||
ATTRIBUTE_COLD void buf_pool_t::print_flush_info() const noexcept | ||
{ | ||
/* We do dirty read of UT_LIST count variable. */ | ||
ulint lru_size= UT_LIST_GET_LEN(LRU); | ||
ulint dirty_size= UT_LIST_GET_LEN(flush_list); | ||
ulint free_size= UT_LIST_GET_LEN(free); | ||
ulint dirty_pct= lru_size ? dirty_size * 100 / (lru_size + free_size) : 0; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
sql_print_information("Innodb: Buffer Pool pages \n" | ||
"-------------------\n" | ||
"LRU Pages: %lu\n" | ||
"Free Pages: %lu\n" | ||
"Dirty Pages: %lu : %lu%%\n" | ||
"-------------------", | ||
lru_size, free_size, dirty_size, dirty_pct); | ||
Comment on lines
+2687
to
+2693
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like I pointed out about Because this very often should precede a crash report, it would be natural to model this after It seems that each call of |
||
|
||
lsn_t lsn= log_sys.get_lsn(); | ||
lsn_t clsn= log_sys.last_checkpoint_lsn; | ||
sql_print_information("Innodb: LSN flush parameters\n" | ||
"-------------------\n" | ||
"System LSN : %" PRIu64 "\n" | ||
"Checkpoint LSN: %" PRIu64 "\n" | ||
"Flush ASync LSN: %" PRIu64 "\n" | ||
"Flush Sync LSN: %" PRIu64 "\n" | ||
"-------------------", | ||
lsn, clsn, buf_flush_async_lsn.load(), buf_flush_sync_lsn.load()); | ||
|
||
lsn_t age= lsn - clsn; | ||
lsn_t age_pct= log_sys.max_checkpoint_age | ||
? age * 100 / log_sys.max_checkpoint_age : 0; | ||
sql_print_information("Innodb: LSN age parameters\n" | ||
"-------------------\n" | ||
"Current Age : %" PRIu64 " : %" PRIu64 "%%\n" | ||
"Max Age(Async): %" PRIu64 "\n" | ||
"Max Age(Sync) : %" PRIu64 "\n" | ||
"Capacity : %" PRIu64 "\n" | ||
"-------------------", | ||
age, age_pct, log_sys.max_modified_age_async, log_sys.max_checkpoint_age, | ||
log_sys.log_capacity); | ||
|
||
sql_print_information("Innodb: Pending IO count\n" | ||
"-------------------\n" | ||
"Pending Read: %zu\n" | ||
"Pending Write: %zu\n" | ||
"-------------------", | ||
os_aio_pending_reads_approx(), os_aio_pending_writes_approx()); | ||
Comment on lines
+2697
to
+2724
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I’d still prefer packing as much information per line as possible, and I would omit any math that can be calculated from the numbers afterwards. These would include sql_print_information("InnoDB: LSN " LSN_PF "; checkpoint " LSN_PF
", flush " LSN_PF "," LSN_PF
"; max age " LSN_PF "," LSN_PF
"; capacity " LSN_PF,
lsn, clsn,
buf_flush_async_lsn.load(), buf_flush_sync_lsn.load(),
log_sys.max_modified_age_async,
log_sys.max_checkpoint_age,
log_sys.log_capacity);
sql_print_information("InnoDB: Pending page reads: %zu, writes: %zu",
os_aio_pending_reads_approx(), os_aio_pending_writes_approx()); These numbers are not saying much to an end user anyway; a subject matter expert would be needed for decoding this, and the code that produces it is open source. So, we would not seem to lose that much by having slightly less verbose output. Having more data per line would make it easier to filter the output with tools like |
||
} | ||
|
||
#ifdef UNIV_DEBUG | ||
/** Functor to validate the flush list. */ | ||
struct Check { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The added pattern seems to be incorrect and should therefore not have the desired effect of preventing sporadic test failures in a heavily loaded environment. The regular expression group delimiters
()
are metacharacters in Perl regular expressions, as you can see 2 lines below the addition.