Skip to content

zenko 5050 : Test replication of objects with replicationStatus=failed #2249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: bug/ZENKO-5030
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/scripts/end2end/configure-e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ kubectl run ${POD_NAME} \
--env=AWS_SECRET_KEY=${AWS_SECRET_KEY} \
--env=AWS_ENDPOINT=${AWS_ENDPOINT} \
--env=AWS_FAIL_BUCKET_NAME=${AWS_FAIL_BUCKET_NAME} \
--env=AWS_REPLICATION_FAIL_CTST_BUCKET_NAME=${AWS_REPLICATION_FAIL_CTST_BUCKET_NAME} \
--env=AZURE_BACKEND_DESTINATION_LOCATION=${AZURE_BACKEND_DESTINATION_LOCATION} \
--env=AZURE_BACKEND_ENDPOINT=${AZURE_BACKEND_ENDPOINT} \
--env=AZURE_BACKEND_QUEUE_ENDPOINT=${AZURE_BACKEND_QUEUE_ENDPOINT} \
Expand Down
1 change: 1 addition & 0 deletions .github/scripts/end2end/patch-coredns.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ corefile="
rewrite name exact ci-zenko-aws-crr-target-bucket.aws-mock.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
rewrite name exact ci-zenko-aws-fail-target-bucket.aws-mock.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
rewrite name exact ci-zenko-aws-target-bucket.aws-mock.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
rewrite name exact ci-zenko-aws-replication-fail-ctst-bucket.aws-mock.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
rewrite name exact aws-mock.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
rewrite name exact azure-mock.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
rewrite name exact blob.azure-mock.zenko.local ingress-nginx-controller.ingress-nginx.svc.cluster.local
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/end2end.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ env:
AWS_BACKEND_SOURCE_LOCATION: awsbackend
AWS_BACKEND_DESTINATION_LOCATION: awsbackendmismatch
AWS_BACKEND_DESTINATION_FAIL_LOCATION: awsbackendfail
AWS_BACKEND_DESTINATION_REPLICATION_FAIL_CTST_LOCATION: awsbackendreplicationctstfail
GCP_BACKEND_DESTINATION_LOCATION: gcpbackendmismatch
AZURE_BACKEND_DESTINATION_LOCATION: azurebackendmismatch
COLD_BACKEND_DESTINATION_LOCATION: e2e-cold
Expand All @@ -66,6 +67,7 @@ env:
AWS_BUCKET_NAME: ci-zenko-aws-target-bucket
AWS_CRR_BUCKET_NAME: ci-zenko-aws-crr-target-bucket
AWS_FAIL_BUCKET_NAME: ci-zenko-aws-fail-target-bucket
AWS_REPLICATION_FAIL_CTST_BUCKET_NAME: ci-zenko-aws-replication-fail-ctst-bucket
AZURE_CRR_BUCKET_NAME: ci-zenko-azure-crr-target-bucket
AZURE_ARCHIVE_BUCKET_NAME: ci-zenko-azure-archive-target-bucket
AZURE_ARCHIVE_BUCKET_NAME_2: ci-zenko-azure-archive-target-bucket-2
Expand Down
17 changes: 15 additions & 2 deletions tests/ctst/common/hooks.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
After,
setParallelCanAssign,
parallelCanAssignHelpers,
ITestCaseHookParameter,
} from '@cucumber/cucumber';
import Zenko from '../world/Zenko';
import { CacheHelper, Identity } from 'cli-testing';
Expand All @@ -19,13 +20,25 @@ import {
process.env.NODE_TLS_REJECT_UNAUTHORIZED = '0';

const { atMostOnePicklePerTag } = parallelCanAssignHelpers;
const noParallelRun = atMostOnePicklePerTag(['@AfterAll', '@PRA', '@ColdStorage']);

export const replicationLockTags = [
'@Lockawsbackendreplicationctstfail',
];
const noParallelRun = atMostOnePicklePerTag([
'@AfterAll',
'@PRA',
'@ColdStorage',
...replicationLockTags
]);

setParallelCanAssign(noParallelRun);

Before(async function (this: Zenko) {
Before(async function (this: Zenko, scenario: ITestCaseHookParameter) {
this.resetSaved();
Identity.resetIdentity();
// Store scenario tags for access in step definitions
const scenarioTags = scenario.pickle.tags?.map(tag => tag.name) || [];
this.addToSaved('scenarioTags', scenarioTags);
await Zenko.init(this.parameters);
});

Expand Down
19 changes: 17 additions & 2 deletions tests/ctst/features/crrReplicationS3utils.feature
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,21 @@ Feature: Replication
Given an existing bucket "source-bucket" "with" versioning, "without" ObjectLock "without" retention mode
And an object "source-object-1" that "exists"
And a replication configuration to "awsbackendmismatch" location
When I run the job to replicate existing objects with status "NEW"
Then the object should eventually be replicated
When the job to replicate existing objects with status "NEW" is executed
Then the object should eventually "be" replicated
And the replicated object should be the same as the source object

@2.12.0
@PreMerge
@ReplicationTest
@Lockawsbackendreplicationctstfail
Scenario Outline: Re-replicate objects that failed to replicate
Given an existing bucket "source-bucket2" "with" versioning, "without" ObjectLock "without" retention mode
And a replication configuration to "awsbackendreplicationctstfail" location
And a deleted destination bucket on that location
And an object "source-object-2" that "exists"
Then the object should eventually "fail to be" replicated
When the destination bucket on the location is created again
And the job to replicate existing objects with status "FAILED" is executed
Then the object should eventually "be" replicated
And the replicated object should be the same as the source object
76 changes: 66 additions & 10 deletions tests/ctst/steps/replication.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,19 @@
import { When, Then } from '@cucumber/cucumber';
import { Given, When, Then } from '@cucumber/cucumber';
import Zenko from '../world/Zenko';
import { createAndRunPod, getMongoDBConfig, getZenkoVersion } from 'steps/utils/kubernetes';
import assert from 'assert';
import { GetObjectCommand } from '@aws-sdk/client-s3';
import {
GetObjectCommand,
DeleteBucketCommand,
CreateBucketCommand,
PutBucketVersioningCommand
} from '@aws-sdk/client-s3';
import { v4 as uuidv4 } from 'uuid';
import { getObject, headObject, getReplicationLocationConfig } from 'steps/utils/utils';
import { safeJsonParse } from 'common/utils';
import { replicationLockTags } from 'common/hooks';

When('I run the job to replicate existing objects with status {string}',
When('the job to replicate existing objects with status {string} is executed',
{ timeout: 600000 },
async function (
this: Zenko,
Expand Down Expand Up @@ -83,12 +89,12 @@ When('I run the job to replicate existing objects with status {string}',
await createAndRunPod(this, podManifest);
});

Then('the object should eventually be replicated',
async function (this: Zenko) {
Then('the object should eventually {string} replicated', { timeout: 360_000 },
async function (this: Zenko, replicate: 'be' | 'fail to be') {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: alternatively, we could make this a bit more generic by specifying the replication status in the test?

Suggested change
Then('the object should eventually {string} replicated', { timeout: 360_000 },
Then('the object replication state should eventually be {string}', { timeout: 360_000 },

where string can be COMPLETED or FAILED (or PENDING, but this could create flakoness)

It avoids the "mapping", not sure if it is more useful or it makes the tests more easier to read...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will reconsider this one with the next pr which adds another test, because this new test will introduce a third new status variable

const objectName = this.getSaved<string>('objectName');
const bucketSource = this.getSaved<string>('bucketName');
const startTime = Date.now();
const replicationTimeoutMs = 90_000;
const replicationTimeoutMs = 300_000;
while (Date.now() - startTime < replicationTimeoutMs) {
await new Promise(resolve => setTimeout(resolve, 3000));

Expand All @@ -105,15 +111,27 @@ Then('the object should eventually be replicated',
}>(response.stdout || '{}');
assert(parsed.ok);
const replicationStatus = parsed.result?.ReplicationStatus;
assert.notStrictEqual(replicationStatus, 'FAILED', `replication failed for object ${objectName}`);
if (replicationStatus === 'COMPLETED') {
return;

if (replicate === 'be') {
assert.notStrictEqual(replicationStatus, 'FAILED', `replication failed for object ${objectName}`);
if (replicationStatus === 'COMPLETED') {
return;
}
} else if (replicate === 'fail to be') {
assert.notStrictEqual(
replicationStatus,
'COMPLETED',
`expected replication to fail for object ${objectName}`
);
if (replicationStatus === 'FAILED') {
return;
}
}
if (replicationStatus === 'PENDING' || replicationStatus === 'PROCESSING') {
continue;
}
}
assert.fail(`Timeout: Object '${objectName}' was not replicated successfully until timeout`);
assert.fail(`Timeout: Object '${objectName}' is still pending/processing after timeout`);
});

Then(
Expand Down Expand Up @@ -173,3 +191,41 @@ Then(
'REPLICA'
);
});

Given('a deleted destination bucket on that location', async function (this: Zenko) {
const replicationLocation = this.getSaved<string>('replicationLocation');
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this approach will not "scale" well if multiple tests are using that location : since we will not be able to run multiple tests targeting it in parallel... (and which is why you created a new, dedicated location)

I don't really see an easy alternative option (except introducing "faults" in the system, either via code changes or network tinkering), and probably acceptable at the moment; but I think we should explicitly "document" this limitation and try to prepare for the case where this is reused to avoid introducing flakiness then

  • Maybe adding a comment in e2e-config.yaml-template, so that future readers who may be interested can understand how this location is used;
  • Creating already a tag for tests using this location, and preventing tests which have this tag from running in parallel;
  • Could we add an assertion in the test (maybe in a replication configuration to "{string}" location step) to verify that tag is set as expected?

What do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. investigating all 3 remarks

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, I added a tag which enforce non parallel run of tests using this tag. This tag is then read in the "delete bucket" step. If the tag is absent, the test returns an error.

const scenarioTags = this.getSaved<string[]>('scenarioTags') || [];
const lockTag = `@Lock${replicationLocation}`;
const hasTestLock = scenarioTags.includes(lockTag);
assert.strictEqual(
hasTestLock, true,
'This step can only be run when the tag @Lock$replicationLocation is configured'
);
assert.strictEqual(
true, replicationLockTags.includes(lockTag),
`The tag ${lockTag} must be added to the replicationLockTags array in common/hooks.ts`
);

const { destinationBucket, awsS3Client } =
await getReplicationLocationConfig(this, replicationLocation);
const command = new DeleteBucketCommand({
Bucket: destinationBucket,
});
await awsS3Client.send(command);
});

When('the destination bucket on the location is created again', async function (this: Zenko) {
const { destinationBucket, awsS3Client } =
await getReplicationLocationConfig(this, this.getSaved<string>('replicationLocation'));
const command = new CreateBucketCommand({
Bucket: destinationBucket,
});
await awsS3Client.send(command);
const versioningCommand = new PutBucketVersioningCommand({
Bucket: destinationBucket,
VersioningConfiguration: {
Status: 'Enabled',
},
});
await awsS3Client.send(versioningCommand);
});
5 changes: 4 additions & 1 deletion tests/zenko_tests/create_buckets.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,9 @@ def create_aws_buckets():
VERIFY_CERTIFICATES = get_env('VERIFY_CERTIFICATES', False)
AWS_ACCESS_KEY = get_env('AWS_ACCESS_KEY')
AWS_SECRET_KEY = get_env('AWS_SECRET_KEY')
AWS_FAIL_BUCKET_NAME = get_env('AWS_FAIL_BUCKET_NAME')
AWS_ENDPOINT = get_env('AWS_ENDPOINT')
AWS_FAIL_BUCKET_NAME = get_env('AWS_FAIL_BUCKET_NAME')
AWS_REPLICATION_FAIL_CTST_BUCKET_NAME = get_env('AWS_REPLICATION_FAIL_CTST_BUCKET_NAME')

s3c = Session(aws_access_key_id=AWS_ACCESS_KEY,
aws_secret_access_key=AWS_SECRET_KEY)
Expand All @@ -127,7 +128,9 @@ def create_aws_buckets():
## Creating AWS buckets
_log.info('Creating AWS buckets...')
bucket_safe_create(aws_s3c_client.Bucket(AWS_FAIL_BUCKET_NAME))
bucket_safe_create(aws_s3c_client.Bucket(AWS_REPLICATION_FAIL_CTST_BUCKET_NAME))
aws_s3c_client.Bucket(AWS_FAIL_BUCKET_NAME).Versioning().enable()
aws_s3c_client.Bucket(AWS_REPLICATION_FAIL_CTST_BUCKET_NAME).Versioning().enable()

def create_azure_containers():
AZURE_BACKEND_ENDPOINT = get_env("AZURE_BACKEND_ENDPOINT")
Expand Down
11 changes: 11 additions & 0 deletions tests/zenko_tests/e2e-config.yaml.template
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@ locations:
secretKey: "${AWS_SECRET_KEY}"
bucketMatch: yes
repoId: []
- name: "${AWS_BACKEND_DESTINATION_REPLICATION_FAIL_CTST_LOCATION}"
# This location is used for a specific test that temporarily removes its bucket
# Consider using other locations first
locationType: "location-aws-s3-v1"
details:
bucketName: "${AWS_REPLICATION_FAIL_CTST_BUCKET_NAME}"
endpoint: "${AWS_ENDPOINT}"
accessKey: "${AWS_ACCESS_KEY}"
secretKey: "${AWS_SECRET_KEY}"
bucketMatch: no
repoId: []
- name: ${AZURE_BACKEND_DESTINATION_LOCATION}
locationType: location-azure-v1
details:
Expand Down
Loading