diff --git a/common/cluster/metadata.go b/common/cluster/metadata.go index 748403ddde8..48f97e865f9 100644 --- a/common/cluster/metadata.go +++ b/common/cluster/metadata.go @@ -113,9 +113,9 @@ func NewMetadata( return m } -// GetNextFailoverVersion return the next failover version based on input -func (m Metadata) GetNextFailoverVersion(cluster string, currentFailoverVersion int64, domainName string) int64 { - initialFailoverVersion := m.getInitialFailoverVersion(cluster, domainName) +// GetNextFailoverVersion returns the next valid FailoverVersion for a domain +func (m Metadata) GetNextFailoverVersion(targetClusterName string, currentFailoverVersion int64, domainName string) int64 { + initialFailoverVersion := m.getInitialFailoverVersion(targetClusterName, domainName) failoverVersion := currentFailoverVersion/m.failoverVersionIncrement*m.failoverVersionIncrement + initialFailoverVersion if failoverVersion < currentFailoverVersion { return failoverVersion + m.failoverVersionIncrement diff --git a/common/cluster/metadata_test.go b/common/cluster/metadata_test.go index 63043fd32c6..e3ae5e223f9 100644 --- a/common/cluster/metadata_test.go +++ b/common/cluster/metadata_test.go @@ -42,6 +42,8 @@ func TestMetadataBehaviour(t *testing.T) { const initialFailoverVersionC1 = 0 const clusterName2 = "c2" const initialFailoverVersionC2 = 2 + const clusterName3 = "c3" + const initialFailoverVersionC3 = 4 const failoverVersionIncrement = 100 @@ -60,12 +62,17 @@ func TestMetadataBehaviour(t *testing.T) { currentVersion: 0, expectedOut: 2, }, - "a subsequent failover back": { + "a failover to c3 should set the failover version to be based on c3": { + failoverCluster: clusterName3, + currentVersion: 2, + expectedOut: 4, + }, + "a subsequent failover back to c1 should increment the failover version by failoverVersionIncrement": { failoverCluster: clusterName1, currentVersion: 2, expectedOut: 100, }, - "and a duplicate": { + "when the current failover version matches the target cluster it should not increment the failover version": { failoverCluster: clusterName1, currentVersion: 100, expectedOut: 100, @@ -75,6 +82,11 @@ func TestMetadataBehaviour(t *testing.T) { currentVersion: 100, expectedOut: 102, }, + "and a subsequent fail back over to c1 should skip over c3": { + failoverCluster: clusterName1, + currentVersion: 102, + expectedOut: 200, + }, } for name, td := range tests { @@ -88,10 +100,14 @@ func TestMetadataBehaviour(t *testing.T) { clusterName2: { InitialFailoverVersion: initialFailoverVersionC2, }, + clusterName3: { + InitialFailoverVersion: initialFailoverVersionC3, + }, }, versionToClusterName: map[int64]string{ initialFailoverVersionC1: clusterName1, initialFailoverVersionC2: clusterName2, + initialFailoverVersionC3: clusterName3, }, useNewFailoverVersionOverride: func(domain string) bool { return false }, metrics: metrics.NewNoopMetricsClient().Scope(0), diff --git a/common/domain/attrValidator.go b/common/domain/attrValidator.go index c6dc5cf1afb..f48efd57096 100644 --- a/common/domain/attrValidator.go +++ b/common/domain/attrValidator.go @@ -101,6 +101,10 @@ func (d *AttrValidatorImpl) validateDomainReplicationConfigForGlobalDomain( clusters := replicationConfig.Clusters activeClusters := replicationConfig.ActiveClusters + if activeCluster == "" { + return errActiveClusterNameRequired + } + for _, clusterConfig := range clusters { if err := d.validateClusterName(clusterConfig.ClusterName); err != nil { return err @@ -116,7 +120,16 @@ func (d *AttrValidatorImpl) validateDomainReplicationConfigForGlobalDomain( return false } + if err := d.validateClusterName(activeCluster); err != nil { + return err + } + + if !isInClusters(activeCluster) { + return errActiveClusterNotInClusters + } + if replicationConfig.IsActiveActive() { + // For active-active domains, also validate that all clusters in ActiveClustersByRegion are valid for _, cluster := range activeClusters.ActiveClustersByRegion { if err := d.validateClusterName(cluster.ActiveClusterName); err != nil { return err @@ -126,14 +139,6 @@ func (d *AttrValidatorImpl) validateDomainReplicationConfigForGlobalDomain( return errActiveClusterNotInClusters } } - } else { - if err := d.validateClusterName(activeCluster); err != nil { - return err - } - - if !isInClusters(activeCluster) { - return errActiveClusterNotInClusters - } } return nil diff --git a/common/domain/attrValidator_test.go b/common/domain/attrValidator_test.go index 6208b0e4987..f542de119de 100644 --- a/common/domain/attrValidator_test.go +++ b/common/domain/attrValidator_test.go @@ -237,6 +237,55 @@ func (s *attrValidatorSuite) TestValidateDomainReplicationConfigForGlobalDomain( }, ) s.NoError(err) + + // When ActiveClusterName is not provided, and ActiveClusters are not provided, it should return an error + err = s.validator.validateDomainReplicationConfigForGlobalDomain( + &persistence.DomainReplicationConfig{ + ActiveClusterName: "", + Clusters: []*persistence.ClusterReplicationConfig{ + {ClusterName: cluster.TestCurrentClusterName}, + }, + }, + ) + s.Error(err) + s.IsType(&types.BadRequestError{}, err) + + // When ActiveClusterName and ActiveClusters are provided, it should not return an error + err = s.validator.validateDomainReplicationConfigForGlobalDomain( + &persistence.DomainReplicationConfig{ + ActiveClusterName: cluster.TestCurrentClusterName, + Clusters: []*persistence.ClusterReplicationConfig{ + {ClusterName: cluster.TestCurrentClusterName}, + {ClusterName: cluster.TestAlternativeClusterName}, + }, + ActiveClusters: &types.ActiveClusters{ + ActiveClustersByRegion: map[string]types.ActiveClusterInfo{ + cluster.TestRegion1: {ActiveClusterName: cluster.TestCurrentClusterName}, + cluster.TestRegion2: {ActiveClusterName: cluster.TestAlternativeClusterName}, + }, + }, + }, + ) + s.NoError(err) + + // When ActiveClusterName is not provided, and ActiveClusters are provided, it should return an error + err = s.validator.validateDomainReplicationConfigForGlobalDomain( + &persistence.DomainReplicationConfig{ + ActiveClusterName: "", + Clusters: []*persistence.ClusterReplicationConfig{ + {ClusterName: cluster.TestCurrentClusterName}, + {ClusterName: cluster.TestAlternativeClusterName}, + }, + ActiveClusters: &types.ActiveClusters{ + ActiveClustersByRegion: map[string]types.ActiveClusterInfo{ + cluster.TestRegion1: {ActiveClusterName: cluster.TestCurrentClusterName}, + cluster.TestRegion2: {ActiveClusterName: cluster.TestAlternativeClusterName}, + }, + }, + }, + ) + s.Error(err) + s.IsType(&types.BadRequestError{}, err) } func (s *attrValidatorSuite) TestValidateDomainReplicationConfigClustersDoesNotRemove() { diff --git a/common/domain/errors.go b/common/domain/errors.go index 4d639a33106..1a52346b457 100644 --- a/common/domain/errors.go +++ b/common/domain/errors.go @@ -32,6 +32,7 @@ var ( errGracefulFailoverInActiveCluster = &types.BadRequestError{Message: "Cannot start the graceful failover from an active cluster to an active cluster."} errOngoingGracefulFailover = &types.BadRequestError{Message: "Cannot start concurrent graceful failover."} errInvalidGracefulFailover = &types.BadRequestError{Message: "Cannot start graceful failover without updating active cluster or in local domain."} + errActiveClusterNameRequired = &types.BadRequestError{Message: "ActiveClusterName is required for all global domains."} errInvalidRetentionPeriod = &types.BadRequestError{Message: "A valid retention period is not set on request."} errInvalidArchivalConfig = &types.BadRequestError{Message: "Invalid to enable archival without specifying a uri."} diff --git a/common/domain/handler.go b/common/domain/handler.go index 0fb2612a2a5..a73f64f47c9 100644 --- a/common/domain/handler.go +++ b/common/domain/handler.go @@ -268,12 +268,6 @@ func (d *handlerImpl) RegisterDomain( return err } - if activeClusters != nil { - // TODO: Leave a default activeClusterName for active-active domains - // active-active domain, activeClusterName is not used - activeClusterName = "" - } - replicationConfig := &persistence.DomainReplicationConfig{ ActiveClusterName: activeClusterName, Clusters: clusters, @@ -299,8 +293,7 @@ func (d *handlerImpl) RegisterDomain( } failoverVersion := constants.EmptyVersion - if registerRequest.GetIsGlobalDomain() && !replicationConfig.IsActiveActive() { - // assign failover version for active-passive domain + if registerRequest.GetIsGlobalDomain() { failoverVersion = d.clusterMetadata.GetNextFailoverVersion(activeClusterName, 0, registerRequest.Name) } @@ -594,10 +587,12 @@ func (d *handlerImpl) UpdateDomain( // we increment failover version so top level failoverVersion is updated and domain data is replicated. failoverVersion = d.clusterMetadata.GetNextFailoverVersion( replicationConfig.ActiveClusterName, + // TODO(active-active): This should be incremented in the same way as an active-passive domain failoverVersion+1, updateRequest.Name, ) + // TODO(active-active): Increment all ClusterAttributes that have changed // we also use the new failover version belonging to currentActiveCluster for the corresponding ActiveClustersByRegion map entry for region, clusterInfo := range replicationConfig.ActiveClusters.ActiveClustersByRegion { if clusterInfo.ActiveClusterName == currentActiveCluster { @@ -631,6 +626,7 @@ func (d *handlerImpl) UpdateDomain( // to indicate there was a change in replication config failoverVersion = d.clusterMetadata.GetNextFailoverVersion( d.clusterMetadata.GetCurrentClusterName(), + // TODO(active-active): If the domain level ActiveCluster has changed this should be incremented in the same way as an active-passive domain failoverVersion+1, updateRequest.Name, ) @@ -645,7 +641,7 @@ func (d *handlerImpl) UpdateDomain( now, failoverType, ¤tActiveCluster, - nil, + updateRequest.ActiveClusterName, currentActiveClusters, replicationConfig.ActiveClusters, )) diff --git a/common/domain/handler_test.go b/common/domain/handler_test.go index d349f768d7f..e7ea38699ee 100644 --- a/common/domain/handler_test.go +++ b/common/domain/handler_test.go @@ -366,7 +366,31 @@ func TestRegisterDomain(t *testing.T) { expectedErr: &types.BadRequestError{}, }, { - name: "active-active domain successfully registered", + name: "active-active domain successfully registered with explicit ActiveClusterName", + request: &types.RegisterDomainRequest{ + Name: "active-active-domain", + IsGlobalDomain: true, + ActiveClusterName: cluster.TestCurrentClusterName, + ActiveClustersByRegion: map[string]string{ + cluster.TestRegion1: cluster.TestCurrentClusterName, + cluster.TestRegion2: cluster.TestAlternativeClusterName, + }, + Clusters: []*types.ClusterReplicationConfiguration{ + {ClusterName: cluster.TestCurrentClusterName}, + {ClusterName: cluster.TestAlternativeClusterName}, + }, + WorkflowExecutionRetentionPeriodInDays: 3, + }, + isPrimaryCluster: true, + mockSetup: func(mockDomainMgr *persistence.MockDomainManager, mockReplicator *MockReplicator, request *types.RegisterDomainRequest) { + mockDomainMgr.EXPECT().GetDomain(gomock.Any(), &persistence.GetDomainRequest{Name: request.Name}).Return(nil, &types.EntityNotExistsError{}) + mockDomainMgr.EXPECT().CreateDomain(gomock.Any(), gomock.Any()).Return(&persistence.CreateDomainResponse{ID: "test-domain-id"}, nil) + mockReplicator.EXPECT().HandleTransmissionTask(gomock.Any(), types.DomainOperationCreate, gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), commonconstants.InitialPreviousFailoverVersion, true).Return(nil) + }, + wantErr: false, + }, + { + name: "active-active domain successfully registered without explicit ActiveClusterName (uses current cluster)", request: &types.RegisterDomainRequest{ Name: "active-active-domain", IsGlobalDomain: true, @@ -1640,6 +1664,7 @@ func TestHandler_UpdateDomain(t *testing.T) { setupMock: func(domainManager *persistence.MockDomainManager, updateRequest *types.UpdateDomainRequest, archivalMetadata *archiver.MockArchivalMetadata, timeSource clock.MockedTimeSource, domainReplicator *MockReplicator) { domainResponse := &persistence.GetDomainResponse{ ReplicationConfig: &persistence.DomainReplicationConfig{ + ActiveClusterName: cluster.TestCurrentClusterName, Clusters: []*persistence.ClusterReplicationConfig{ {ClusterName: cluster.TestCurrentClusterName}, {ClusterName: cluster.TestAlternativeClusterName}, @@ -1692,6 +1717,7 @@ func TestHandler_UpdateDomain(t *testing.T) { Info: domainResponse.Info, Config: domainResponse.Config, ReplicationConfig: &persistence.DomainReplicationConfig{ + ActiveClusterName: cluster.TestCurrentClusterName, Clusters: []*persistence.ClusterReplicationConfig{ {ClusterName: cluster.TestCurrentClusterName}, {ClusterName: cluster.TestAlternativeClusterName}, @@ -1747,6 +1773,7 @@ func TestHandler_UpdateDomain(t *testing.T) { response: func(timeSource clock.MockedTimeSource) *types.UpdateDomainResponse { data, _ := json.Marshal([]FailoverEvent{{ EventTime: timeSource.Now(), + FromCluster: cluster.TestCurrentClusterName, FailoverType: commonconstants.FailoverType(commonconstants.FailoverTypeForce).String(), FromActiveClusters: types.ActiveClusters{ ActiveClustersByRegion: map[string]types.ActiveClusterInfo{ @@ -1792,6 +1819,7 @@ func TestHandler_UpdateDomain(t *testing.T) { AsyncWorkflowConfig: &types.AsyncWorkflowConfiguration{Enabled: true}, }, ReplicationConfiguration: &types.DomainReplicationConfiguration{ + ActiveClusterName: cluster.TestCurrentClusterName, Clusters: []*types.ClusterReplicationConfiguration{ {ClusterName: cluster.TestCurrentClusterName}, {ClusterName: cluster.TestAlternativeClusterName}, diff --git a/common/persistence/data_manager_interfaces.go b/common/persistence/data_manager_interfaces.go index 9c307e1b66b..8b5583b4a52 100644 --- a/common/persistence/data_manager_interfaces.go +++ b/common/persistence/data_manager_interfaces.go @@ -1148,12 +1148,14 @@ type ( Clusters []*ClusterReplicationConfig // ActiveClusterName is the name of the cluster that the domain is active in. - // Applicable for active-passive domains. + // Required for all global domains (both active-passive and active-active). + // For active-passive domains, this is the single active cluster. + // For active-active domains this is the default cluster whenever a ClusterAttribute is not provided ActiveClusterName string - // TODO(c-warren): Update documentation once ActiveClusterName is the default for active-active domains. // ActiveClusters is only applicable for active-active domains. - // If this is set, ActiveClusterName is ignored. + // When this is set, the domain is considered active-active and workflows are routed + // based on their ClusterAttributes. ActiveClusters *types.ActiveClusters }