Skip to content

Commit

Permalink
Attempt to limit the impact of bug #5687.
Browse files Browse the repository at this point in the history
With this commit, a failing merge will not restart the indexing pipeline.
This should prevent the split of death loop.

This also include a tantivy fix, that adds a panic handler on the merge thread pool
  • Loading branch information
fulmicoton-dd committed Feb 19, 2025
1 parent ea6739f commit 9f3e803
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 21 deletions.
18 changes: 9 additions & 9 deletions quickwit/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion quickwit/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,9 @@ quickwit-serve = { path = "quickwit-serve" }
quickwit-storage = { path = "quickwit-storage" }
quickwit-telemetry = { path = "quickwit-telemetry" }

tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "71cf198", default-features = false, features = [
#tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "71cf198", default-features = false, features = [

tantivy = { git = "https://github.com/quickwit-oss/tantivy/", rev = "82b510b88bca638fea7959b4d2a858e381ccf227", default-features = false, features = [
"lz4-compression",
"mmap",
"quickwit",
Expand Down
45 changes: 34 additions & 11 deletions quickwit/quickwit-indexing/src/actors/merge_executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,9 @@ use tracing::{debug, error, info, instrument, warn};
use crate::actors::Packager;
use crate::controlled_directory::ControlledDirectory;
use crate::merge_policy::MergeOperationType;
use crate::models::{IndexedSplit, IndexedSplitBatch, MergeScratch, PublishLock, SplitAttrs};
use crate::models::{
IndexedSplit, IndexedSplitBatch, MergeScratch, PublishLock, SplitAttrs,
};

#[derive(Clone)]
pub struct MergeExecutor {
Expand Down Expand Up @@ -95,16 +97,37 @@ impl Handler<MergeScratch> for MergeExecutor {
let start = Instant::now();
let merge_task = merge_scratch.merge_task;
let indexed_split_opt: Option<IndexedSplit> = match merge_task.operation_type {
MergeOperationType::Merge => Some(
self.process_merge(
merge_task.merge_split_id.clone(),
merge_task.splits.clone(),
merge_scratch.tantivy_dirs,
merge_scratch.merge_scratch_directory,
ctx,
)
.await?,
),
MergeOperationType::Merge => {
let merge_res = self
.process_merge(
merge_task.merge_split_id.clone(),
merge_task.splits.clone(),
merge_scratch.tantivy_dirs,
merge_scratch.merge_scratch_directory,
ctx,
)
.await;
match merge_res {
Ok(indexed_split) => Some(indexed_split),
Err(err) => {
// A failure in a merge is a bit special.
//
// Instead of failing the pipeline, we just log it.
// The idea is to limit the risk associated with a potential split of death.
//
// Such a split is now not tracked by the merge planner and won't undergo a
// merge until the merge pipeline is restarted.
//
// With a merge policy that marks splits as mature after a day or so, this
// limits the noise associated to those failed
// merges.
//
// This logic is part of a temporary firefighting fix for #5687
error!(merge_task=?merge_task, split_err=?err, "failed to merge splits");
return Ok(());
}
}
}
MergeOperationType::DeleteAndMerge => {
assert_eq!(
merge_task.splits.len(),
Expand Down

0 comments on commit 9f3e803

Please # to comment.