Skip to content

Commit

Permalink
feat(cli): cdk rollback (#31407)
Browse files Browse the repository at this point in the history
Add a CLI feature to roll a stuck change back.

This is mostly useful for deployments performed using `--no-rollback`: if a failure occurs, the stack gets stuck in an `UPDATE_FAILED` state from which there are 2 options:

- Try again using a new template
- Roll back to the last stable state

There used to be no way to perform the second operation using the CDK CLI, but there now is.

`cdk rollback` works in 2 situations:

- A paused fail state; it will initiating a fresh rollback (on `CREATE_FAILED`, `UPDATE_FAILED`).
- A paused rollback state; it will retry the rollback, optionally skipping some resources (on `UPDATE_ROLLBACK_FAILED` -- it seems there is no way to continue a rollback in `ROLLBACK_FAILED` state).

`cdk rollback --orphan <logicalid>` can be used to skip resource rollbacks that are causing problems.

`cdk rollback --force` will look up all failed resources and continue skipping them until the rollback has finished.

This change requires new bootstrap permissions, so the bootstrap stack is updated to add the following IAM permissions to the `deploy-action` role:

```
                  - cloudformation:RollbackStack
                  - cloudformation:ContinueUpdateRollback
```

These are necessary to call the 2 CloudFormation APIs that start and continue a rollback. 

Relates to (but does not close yet) #30546.

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
rix0rrr authored Oct 2, 2024
1 parent 7e80cc9 commit 0755561
Show file tree
Hide file tree
Showing 20 changed files with 1,121 additions and 218 deletions.
2 changes: 1 addition & 1 deletion packages/@aws-cdk-testing/cli-integ/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Test suites are written as a collection of Jest tests, and they are run using Je

### Setup

Building the @aws-cdk-testing package is not very different from building the rest of the CDK. However, If you are having issues with the tests, you can ensure your enviornment is built properly by following the steps below:
Building the @aws-cdk-testing package is not very different from building the rest of the CDK. However, If you are having issues with the tests, you can ensure your environment is built properly by following the steps below:

```shell
yarn install # Install dependencies
Expand Down
19 changes: 17 additions & 2 deletions packages/@aws-cdk-testing/cli-integ/lib/with-cdk-app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ export const EXTENDED_TEST_TIMEOUT_S = 30 * 60;
* For backwards compatibility with existing tests (so we don't have to change
* too much) the inner block is expected to take a `TestFixture` object.
*/
export function withCdkApp(
export function withSpecificCdkApp(
appName: string,
block: (context: TestFixture) => Promise<void>,
): (context: TestContext & AwsContext & DisableBootstrapContext) => Promise<void> {
return async (context: TestContext & AwsContext & DisableBootstrapContext) => {
Expand All @@ -36,7 +37,7 @@ export function withCdkApp(
context.output.write(` Test directory: ${integTestDir}\n`);
context.output.write(` Region: ${context.aws.region}\n`);

await cloneDirectory(path.join(RESOURCES_DIR, 'cdk-apps', 'app'), integTestDir, context.output);
await cloneDirectory(path.join(RESOURCES_DIR, 'cdk-apps', appName), integTestDir, context.output);
const fixture = new TestFixture(
integTestDir,
stackNamePrefix,
Expand Down Expand Up @@ -87,6 +88,16 @@ export function withCdkApp(
};
}

/**
* Like `withSpecificCdkApp`, but uses the default integration testing app with a million stacks in it
*/
export function withCdkApp(
block: (context: TestFixture) => Promise<void>,
): (context: TestContext & AwsContext & DisableBootstrapContext) => Promise<void> {
// 'app' is the name of the default integration app in the `cdk-apps` directory
return withSpecificCdkApp('app', block);
}

export function withCdkMigrateApp<A extends TestContext>(language: string, block: (context: TestFixture) => Promise<void>) {
return async (context: A) => {
const stackName = `cdk-migrate-${language}-integ-${context.randomString}`;
Expand Down Expand Up @@ -188,6 +199,10 @@ export function withDefaultFixture(block: (context: TestFixture) => Promise<void
return withAws(withTimeout(DEFAULT_TEST_TIMEOUT_S, withCdkApp(block)));
}

export function withSpecificFixture(appName: string, block: (context: TestFixture) => Promise<void>) {
return withAws(withTimeout(DEFAULT_TEST_TIMEOUT_S, withSpecificCdkApp(appName, block)));
}

export function withExtendedTimeoutFixture(block: (context: TestFixture) => Promise<void>) {
return withAws(withTimeout(EXTENDED_TEST_TIMEOUT_S, withCdkApp(block)));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
const cdk = require('aws-cdk-lib');
const lambda = require('aws-cdk-lib/aws-lambda');
const cr = require('aws-cdk-lib/custom-resources');

/**
* This stack will be deployed in multiple phases, to achieve a very specific effect
*
* It contains resources r1 and r2, where r1 gets deployed first.
*
* - PHASE = 1: both resources deploy regularly.
* - PHASE = 2a: r1 gets updated, r2 will fail to update
* - PHASE = 2b: r1 gets updated, r2 will fail to update, and r1 will fail its rollback.
*
* To exercise this app:
*
* ```
* env PHASE=1 npx cdk deploy
* env PHASE=2b npx cdk deploy --no-rollback
* # This will leave the stack in UPDATE_FAILED
*
* env PHASE=2b npx cdk rollback
* # This will start a rollback that will fail because r1 fails its rollabck
*
* env PHASE=2b npx cdk rollback --force
* # This will retry the rollabck and skip r1
* ```
*/
class RollbacktestStack extends cdk.Stack {
constructor(scope, id, props) {
super(scope, id, props);

let r1props = {};
let r2props = {};

const phase = process.env.PHASE;
switch (phase) {
case '1':
// Normal deployment
break;
case '2a':
// r1 updates normally, r2 fails updating
r2props.FailUpdate = true;
break;
case '2b':
// r1 updates normally, r2 fails updating, r1 fails rollback
r1props.FailRollback = true;
r2props.FailUpdate = true;
break;
}

const fn = new lambda.Function(this, 'Fun', {
runtime: lambda.Runtime.NODEJS_LATEST,
code: lambda.Code.fromInline(`exports.handler = async function(event, ctx) {
const key = \`Fail\${event.RequestType}\`;
if (event.ResourceProperties[key]) {
throw new Error(\`\${event.RequestType} fails!\`);
}
if (event.OldResourceProperties?.FailRollback) {
throw new Error('Failing rollback!');
}
return {};
}`),
handler: 'index.handler',
timeout: cdk.Duration.minutes(1),
});
const provider = new cr.Provider(this, "MyProvider", {
onEventHandler: fn,
});

const r1 = new cdk.CustomResource(this, 'r1', {
serviceToken: provider.serviceToken,
properties: r1props,
});
const r2 = new cdk.CustomResource(this, 'r2', {
serviceToken: provider.serviceToken,
properties: r2props,
});
r2.node.addDependency(r1);
}
}

const app = new cdk.App({
context: {
'@aws-cdk/core:assetHashSalt': process.env.CODEBUILD_BUILD_ID, // Force all assets to be unique, but consistent in one build
},
});

const defaultEnv = {
account: process.env.CDK_DEFAULT_ACCOUNT,
region: process.env.CDK_DEFAULT_REGION
};

const stackPrefix = process.env.STACK_NAME_PREFIX;
if (!stackPrefix) {
throw new Error(`the STACK_NAME_PREFIX environment variable is required`);
}

// Sometimes we don't want to synthesize all stacks because it will impact the results
new RollbacktestStack(app, `${stackPrefix}-test-rollback`, { env: defaultEnv });
app.synth();
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"app": "node app.js",
"versionReporting": false,
"context": {
"aws-cdk:enableDiffNoFail": "true"
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import {
withCDKMigrateFixture,
withExtendedTimeoutFixture,
randomString,
withSpecificFixture,
withoutBootstrap,
} from '../../lib';

Expand Down Expand Up @@ -2260,6 +2261,84 @@ integTest(
}),
);

integTest(
'test cdk rollback',
withSpecificFixture('rollback-test-app', async (fixture) => {
let phase = '1';

// Should succeed
await fixture.cdkDeploy('test-rollback', {
options: ['--no-rollback'],
modEnv: { PHASE: phase },
verbose: false,
});
try {
phase = '2a';

// Should fail
const deployOutput = await fixture.cdkDeploy('test-rollback', {
options: ['--no-rollback'],
modEnv: { PHASE: phase },
verbose: false,
allowErrExit: true,
});
expect(deployOutput).toContain('UPDATE_FAILED');

// Rollback
await fixture.cdk(['rollback'], {
modEnv: { PHASE: phase },
verbose: false,
});
} finally {
await fixture.cdkDestroy('test-rollback');
}
}),
);

integTest(
'test cdk rollback --force',
withSpecificFixture('rollback-test-app', async (fixture) => {
let phase = '1';

// Should succeed
await fixture.cdkDeploy('test-rollback', {
options: ['--no-rollback'],
modEnv: { PHASE: phase },
verbose: false,
});
try {
phase = '2b'; // Fail update and also fail rollback

// Should fail
const deployOutput = await fixture.cdkDeploy('test-rollback', {
options: ['--no-rollback'],
modEnv: { PHASE: phase },
verbose: false,
allowErrExit: true,
});

expect(deployOutput).toContain('UPDATE_FAILED');

// Should still fail
const rollbackOutput = await fixture.cdk(['rollback'], {
modEnv: { PHASE: phase },
verbose: false,
allowErrExit: true,
});

expect(rollbackOutput).toContain('Failing rollback');

// Rollback and force cleanup
await fixture.cdk(['rollback', '--force'], {
modEnv: { PHASE: phase },
verbose: false,
});
} finally {
await fixture.cdkDestroy('test-rollback');
}
}),
);

integTest('cdk notices are displayed correctly', withDefaultFixture(async (fixture) => {

const cache = {
Expand Down
Loading

0 comments on commit 0755561

Please # to comment.