Skip to content

Commit

Permalink
Use explicit startup/shutdown order for development servers (#5459)
Browse files Browse the repository at this point in the history
## What changed?
Instead of starting all services in one process in parallel, serialize
the startup in a defined order, and the shutdown in the reverse order.

## Why?
This reduces noisy error logs during both startup and shutdown.

I originally thought that serializing startup would be vulnerable to the
problem fixed in #3100 but I couldn't reproduce it after a bunch of
testing, so I think some other change in between must have also
addressed that. If that problem reappears, though, an easy fix would be
to start matching and history at the same time, and wait for them before
moving on to frontend.

## How did you test it?
local server startup/shutdown

## Potential risks
Generally multiple services in one process is only used in development,
so this shouldn't have any effect on production.
  • Loading branch information
dnr authored Feb 28, 2024
1 parent f67201e commit 39ec799
Showing 1 changed file with 30 additions and 33 deletions.
63 changes: 30 additions & 33 deletions temporal/server_impl.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,12 @@
package temporal

import (
"cmp"
"context"
"fmt"
"sync"

"go.uber.org/multierr"
"golang.org/x/exp/slices"

"go.temporal.io/server/common/cluster"
"go.temporal.io/server/common/config"
Expand Down Expand Up @@ -59,6 +60,17 @@ type (
}
)

// When starting multiple services in one process (typically a development server), start them
// in this order and stop them in the reverse order. This most important part here is that the
// worker depends on the frontend, which depends on matching and history.
var initOrder = map[primitives.ServiceName]int{
primitives.MatchingService: 1,
primitives.HistoryService: 2,
primitives.InternalFrontendService: 3,
primitives.FrontendService: 3,
primitives.WorkerService: 4,
}

// NewServerFxImpl returns a new instance of server that serves one or many services.
func NewServerFxImpl(
opts *serverOptions,
Expand Down Expand Up @@ -110,19 +122,16 @@ func (s *ServerImpl) Start(ctx context.Context) error {
}

func (s *ServerImpl) Stop(ctx context.Context) error {
var wg sync.WaitGroup
wg.Add(len(s.servicesMetadata))
close(s.stoppedCh)

for _, svcMeta := range s.servicesMetadata {
go func(svc *ServicesMetadata) {
svc.Stop(ctx)
wg.Done()
}(svcMeta)
svcs := slices.Clone(s.servicesMetadata)
slices.SortFunc(svcs, func(a, b *ServicesMetadata) int {
return -cmp.Compare(initOrder[a.serviceName], initOrder[b.serviceName]) // note negative
})
for _, svc := range svcs {
svc.Stop(ctx)
}

wg.Wait()

if s.so.metricHandler != nil {
s.so.metricHandler.Stop(s.logger)
}
Expand All @@ -135,32 +144,20 @@ func (s *ServerImpl) startServices() error {
timeout := max(serviceStartTimeout, 2*s.so.config.Global.Membership.MaxJoinDuration)
ctx, cancel := context.WithTimeout(context.Background(), timeout)
defer cancel()
results := make(chan startServiceResult, len(s.servicesMetadata))
for _, svcMeta := range s.servicesMetadata {
go func(svcMeta *ServicesMetadata) {
err := svcMeta.app.Start(ctx)
results <- startServiceResult{
svc: svcMeta,
err: err,
}
}(svcMeta)
}
return s.readResults(results)
}

func (s *ServerImpl) readResults(results chan startServiceResult) (err error) {
for range s.servicesMetadata {
r := <-results
if r.err != nil {
err = multierr.Combine(err, fmt.Errorf("failed to start service %v: %w", r.svc.serviceName, r.err))
svcs := slices.Clone(s.servicesMetadata)
slices.SortFunc(svcs, func(a, b *ServicesMetadata) int {
return cmp.Compare(initOrder[a.serviceName], initOrder[b.serviceName])
})

var allErrs error
for _, svc := range svcs {
err := svc.app.Start(ctx)
if err != nil {
allErrs = multierr.Append(allErrs, fmt.Errorf("failed to start service %v: %w", svc.serviceName, err))
}
}
return
}

type startServiceResult struct {
svc *ServicesMetadata
err error
return allErrs
}

func initSystemNamespaces(
Expand Down

0 comments on commit 39ec799

Please # to comment.