Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Fix reset workflow in replication reapply #3449

Merged
merged 2 commits into from
Oct 1, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 6 additions & 20 deletions service/history/nDCTransactionMgr.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,6 @@ import (
"context"
"time"

"go.temporal.io/server/common"

"go.temporal.io/server/common/persistence/serialization"

"github.com/pborman/uuid"
commonpb "go.temporal.io/api/common/v1"
enumspb "go.temporal.io/api/enums/v1"
Expand All @@ -45,6 +41,7 @@ import (
"go.temporal.io/server/common/metrics"
"go.temporal.io/server/common/namespace"
"go.temporal.io/server/common/persistence"
"go.temporal.io/server/common/persistence/serialization"
"go.temporal.io/server/common/persistence/versionhistory"
"go.temporal.io/server/service/history/shard"
"go.temporal.io/server/service/history/workflow"
Expand Down Expand Up @@ -323,18 +320,6 @@ func (r *nDCTransactionMgrImpl) backfillWorkflowEventsReapply(
baseRunID := baseMutableState.GetExecutionState().GetRunId()
resetRunID := uuid.New()
baseRebuildLastEventID := baseMutableState.GetPreviousStartedEventID()

// TODO when https://github.com/uber/cadence/issues/2420 is finished, remove this block,
// since cannot reapply event to a finished workflow which had no workflow task started
if baseRebuildLastEventID == common.EmptyEventID {
r.logger.Warn("cannot reapply event to a finished workflow",
tag.WorkflowNamespaceID(namespaceID.String()),
tag.WorkflowID(workflowID),
)
r.metricsClient.IncCounter(metrics.HistoryReapplyEventsScope, metrics.EventReapplySkippedCount)
return persistence.UpdateWorkflowModeBypassCurrent, workflow.TransactionPolicyPassive, nil
}

baseVersionHistories := baseMutableState.GetExecutionInfo().GetVersionHistories()
baseCurrentVersionHistory, err := versionhistory.GetCurrentVersionHistory(baseVersionHistories)
if err != nil {
Expand Down Expand Up @@ -367,14 +352,15 @@ func (r *nDCTransactionMgrImpl) backfillWorkflowEventsReapply(
case *serviceerror.InvalidArgument:
// no-op. Usually this is due to reset workflow with pending child workflows
r.logger.Warn("Cannot reset workflow. Ignoring reapply events.", tag.Error(err))
// the target workflow is not reset so it is still the current workflow. It need to persist updated version histories.
return persistence.UpdateWorkflowModeUpdateCurrent, workflow.TransactionPolicyPassive, nil
case nil:
// no-op
// after the reset of target workflow (current workflow) with additional events to be reapplied
// target workflow is no longer the current workflow
return persistence.UpdateWorkflowModeBypassCurrent, workflow.TransactionPolicyPassive, nil
default:
return 0, workflow.TransactionPolicyActive, err
}
// after the reset of target workflow (current workflow) with additional events to be reapplied
// target workflow is no longer the current workflow
return persistence.UpdateWorkflowModeBypassCurrent, workflow.TransactionPolicyPassive, nil
}

// case 2
Expand Down
78 changes: 78 additions & 0 deletions service/history/nDCTransactionMgr_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,84 @@ func (s *nDCTransactionMgrSuite) TestBackfillWorkflow_CurrentWorkflow_Active_Clo
s.True(releaseCalled)
}

func (s *nDCTransactionMgrSuite) TestBackfillWorkflow_CurrentWorkflow_Closed_ResetFailed() {
ctx := context.Background()
now := time.Now().UTC()

namespaceID := namespace.ID("some random namespace ID")
workflowID := "some random workflow ID"
runID := "some random run ID"
lastWorkflowTaskStartedEventID := int64(9999)
nextEventID := lastWorkflowTaskStartedEventID * 2
lastWorkflowTaskStartedVersion := s.namespaceEntry.FailoverVersion()
versionHistory := versionhistory.NewVersionHistory([]byte("branch token"), []*historyspb.VersionHistoryItem{
{EventId: lastWorkflowTaskStartedEventID, Version: lastWorkflowTaskStartedVersion},
})
histories := versionhistory.NewVersionHistories(versionHistory)

releaseCalled := false

targetWorkflow := NewMocknDCWorkflow(s.controller)
weContext := workflow.NewMockContext(s.controller)
mutableState := workflow.NewMockMutableState(s.controller)
var releaseFn workflow.ReleaseCacheFunc = func(error) { releaseCalled = true }

workflowEvents := &persistence.WorkflowEvents{}

targetWorkflow.EXPECT().getContext().Return(weContext).AnyTimes()
targetWorkflow.EXPECT().getMutableState().Return(mutableState).AnyTimes()
targetWorkflow.EXPECT().getReleaseFn().Return(releaseFn).AnyTimes()

s.mockClusterMetadata.EXPECT().ClusterNameForFailoverVersion(s.namespaceEntry.IsGlobalNamespace(), s.namespaceEntry.FailoverVersion()).Return(cluster.TestCurrentClusterName).AnyTimes()
s.mockClusterMetadata.EXPECT().GetCurrentClusterName().Return(cluster.TestCurrentClusterName).AnyTimes()

mutableState.EXPECT().IsCurrentWorkflowGuaranteed().Return(false).AnyTimes()
mutableState.EXPECT().IsWorkflowExecutionRunning().Return(false).AnyTimes()
mutableState.EXPECT().GetNamespaceEntry().Return(s.namespaceEntry).AnyTimes()
mutableState.EXPECT().GetExecutionInfo().Return(&persistencespb.WorkflowExecutionInfo{
NamespaceId: namespaceID.String(),
WorkflowId: workflowID,
VersionHistories: histories,
}).AnyTimes()
mutableState.EXPECT().GetExecutionState().Return(&persistencespb.WorkflowExecutionState{
RunId: runID,
}).AnyTimes()
mutableState.EXPECT().GetNextEventID().Return(nextEventID).AnyTimes()
mutableState.EXPECT().GetPreviousStartedEventID().Return(lastWorkflowTaskStartedEventID)

s.mockWorkflowResetter.EXPECT().resetWorkflow(
ctx,
namespaceID,
workflowID,
runID,
versionHistory.GetBranchToken(),
lastWorkflowTaskStartedEventID,
lastWorkflowTaskStartedVersion,
nextEventID,
gomock.Any(),
gomock.Any(),
targetWorkflow,
eventsReapplicationResetWorkflowReason,
workflowEvents.Events,
enumspb.RESET_REAPPLY_TYPE_SIGNAL,
).Return(serviceerror.NewInvalidArgument("reset fail"))

s.mockExecutionMgr.EXPECT().GetCurrentExecution(gomock.Any(), &persistence.GetCurrentExecutionRequest{
ShardID: s.mockShard.GetShardID(),
NamespaceID: namespaceID.String(),
WorkflowID: workflowID,
}).Return(&persistence.GetCurrentExecutionResponse{RunID: runID}, nil)

weContext.EXPECT().PersistWorkflowEvents(gomock.Any(), workflowEvents).Return(int64(0), nil)
weContext.EXPECT().UpdateWorkflowExecutionWithNew(
gomock.Any(), now, persistence.UpdateWorkflowModeUpdateCurrent, nil, nil, workflow.TransactionPolicyPassive, (*workflow.TransactionPolicy)(nil),
).Return(nil)

err := s.transactionMgr.backfillWorkflow(ctx, now, targetWorkflow, workflowEvents)
s.NoError(err)
s.True(releaseCalled)
}

func (s *nDCTransactionMgrSuite) TestBackfillWorkflow_CurrentWorkflow_Passive_Open() {
ctx := context.Background()
now := time.Now().UTC()
Expand Down