-
Notifications
You must be signed in to change notification settings - Fork 111
Online Indexer: replace the synchronized runner with a heartbeat #3530
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
c5c9998
27a9095
38833cf
bcbdc8f
79c3622
69638b9
c99bacc
77a0fb2
9db0abe
6e8bdeb
94c81c4
5d8db10
819c232
a8e853d
933c921
063f506
001b5e6
2a9f7e5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,206 @@ | ||
/* | ||
* IndexingHeartbeat.java | ||
* | ||
* This source file is part of the FoundationDB open source project | ||
* | ||
* Copyright 2015-2025 Apple Inc. and the FoundationDB project authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package com.apple.foundationdb.record.provider.foundationdb; | ||
|
||
import com.apple.foundationdb.KeyValue; | ||
import com.apple.foundationdb.annotation.API; | ||
import com.apple.foundationdb.async.AsyncIterator; | ||
import com.apple.foundationdb.async.AsyncUtil; | ||
import com.apple.foundationdb.record.IndexBuildProto; | ||
import com.apple.foundationdb.record.logging.KeyValueLogMessage; | ||
import com.apple.foundationdb.record.logging.LogMessageKeys; | ||
import com.apple.foundationdb.record.metadata.Index; | ||
import com.apple.foundationdb.synchronizedsession.SynchronizedSessionLockedException; | ||
import com.google.protobuf.InvalidProtocolBufferException; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
|
||
import javax.annotation.Nonnull; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
import java.util.UUID; | ||
import java.util.concurrent.CompletableFuture; | ||
import java.util.concurrent.atomic.AtomicInteger; | ||
|
||
/** | ||
* Indexing Shared Heartbeats can be used to define and handle "active" indexing processes. | ||
* Every indexer should update its unique heartbeat during its indexing iteration. If the indexing session is optimized for | ||
* non-mutual (as defined by the indexing type, see {@link IndexBuildProto.IndexBuildIndexingStamp}), detecting an existing | ||
* active heartbeat will help preventing concurrent, conflicting, indexing attempts. | ||
* In addition, the heartbeats can be used by users to query activity status of ongoing indexing sessions. | ||
*/ | ||
@API(API.Status.INTERNAL) | ||
public class IndexingHeartbeat { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This seems like a bit of the code could be more generic. Particularly, I see that the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Going in a completely different direction, would it make sense to combine the heartbeats more with the type stamp? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The I am not sure that I understand your The type stamp is complementary to the heartbeat. Initially I thought about making a copy of the type stamp to each heartbeat, but it seems easier - for integrity reasons - to keep the type stamp as is. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We discussed offline, and decided to take a balanced approach of changing the method to
ScottDugas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// [prefix, indexerId] -> [indexing-type, genesis time, heartbeat time] | ||
@Nonnull | ||
private static final Logger logger = LoggerFactory.getLogger(IndexingHeartbeat.class); | ||
public static final String INVALID_HEARTBEAT_INFO = "<< Invalid Heartbeat >>"; | ||
|
||
final UUID indexerId; | ||
final String info; | ||
final long genesisTimeMilliseconds; | ||
final long leaseLength; | ||
final boolean allowMutual; | ||
|
||
public IndexingHeartbeat(final UUID indexerId, String info, long leaseLength, boolean allowMutual) { | ||
this.indexerId = indexerId; | ||
this.info = info; | ||
this.leaseLength = leaseLength; | ||
this.allowMutual = allowMutual; | ||
this.genesisTimeMilliseconds = nowMilliseconds(); | ||
} | ||
|
||
public void updateHeartbeat(@Nonnull FDBRecordStore store, @Nonnull Index index) { | ||
byte[] key = IndexingSubspaces.indexHeartbeatSubspace(store, index, indexerId).pack(); | ||
byte[] value = IndexBuildProto.IndexBuildHeartbeat.newBuilder() | ||
.setInfo(info) | ||
.setGenesisTimeMilliseconds(genesisTimeMilliseconds) | ||
.setHeartbeatTimeMilliseconds(nowMilliseconds()) | ||
.build().toByteArray(); | ||
store.ensureContextActive().set(key, value); | ||
} | ||
|
||
public CompletableFuture<Void> checkAndUpdateHeartbeat(@Nonnull FDBRecordStore store, @Nonnull Index index) { | ||
// complete exceptionally if non-mutual, other exists | ||
if (allowMutual) { | ||
updateHeartbeat(store, index); | ||
return AsyncUtil.DONE; | ||
} | ||
|
||
final AsyncIterator<KeyValue> iterator = heartbeatsIterator(store, index); | ||
final long now = nowMilliseconds(); | ||
return AsyncUtil.whileTrue(() -> iterator.onHasNext() | ||
.thenApply(hasNext -> { | ||
if (!hasNext) { | ||
return false; | ||
} | ||
final KeyValue kv = iterator.next(); | ||
try { | ||
final UUID otherIndexerId = heartbeatKeyToIndexerId(store, index, kv.getKey()); | ||
if (!otherIndexerId.equals(this.indexerId)) { | ||
final IndexBuildProto.IndexBuildHeartbeat otherHeartbeat = IndexBuildProto.IndexBuildHeartbeat.parseFrom(kv.getValue()); | ||
final long age = now - otherHeartbeat.getHeartbeatTimeMilliseconds(); | ||
if (age > 0 && age < leaseLength) { | ||
ScottDugas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// For practical reasons, this exception is backward compatible to the Synchronized Lock one | ||
throw new SynchronizedSessionLockedException("Failed to initialize the session because of an existing session in progress") | ||
ScottDugas marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.addLogInfo(LogMessageKeys.INDEXER_ID, indexerId) | ||
.addLogInfo(LogMessageKeys.EXISTING_INDEXER_ID, otherIndexerId) | ||
.addLogInfo(LogMessageKeys.AGE_MILLISECONDS, age) | ||
.addLogInfo(LogMessageKeys.TIME_LIMIT_MILLIS, leaseLength); | ||
Check warning on line 107 in fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingHeartbeat.java
|
||
} | ||
} | ||
} catch (InvalidProtocolBufferException e) { | ||
if (logger.isWarnEnabled()) { | ||
logger.warn(KeyValueLogMessage.of("Bad indexing heartbeat item", | ||
LogMessageKeys.KEY, kv.getKey(), | ||
LogMessageKeys.VALUE, kv.getValue())); | ||
} | ||
} | ||
return true; | ||
})) | ||
.thenApply(ignore -> { | ||
updateHeartbeat(store, index); | ||
return null; | ||
}); | ||
Check warning on line 122 in fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/IndexingHeartbeat.java
|
||
} | ||
|
||
public void clearHeartbeat(@Nonnull FDBRecordStore store, @Nonnull Index index) { | ||
store.ensureContextActive().clear(IndexingSubspaces.indexHeartbeatSubspace(store, index, indexerId).pack()); | ||
} | ||
|
||
public static void clearAllHeartbeats(@Nonnull FDBRecordStore store, @Nonnull Index index) { | ||
store.ensureContextActive().clear(IndexingSubspaces.indexHeartbeatSubspace(store, index).range()); | ||
} | ||
|
||
public static CompletableFuture<Map<UUID, IndexBuildProto.IndexBuildHeartbeat>> getIndexingHeartbeats(FDBRecordStore store, Index index, int maxCount) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You could remove the need for this having a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wouldn't that be over engineering? The |
||
final Map<UUID, IndexBuildProto.IndexBuildHeartbeat> ret = new HashMap<>(); | ||
final AsyncIterator<KeyValue> iterator = heartbeatsIterator(store, index); | ||
final AtomicInteger iterationCount = new AtomicInteger(0); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ignoring my comment above about using a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What advantage does |
||
return AsyncUtil.whileTrue(() -> iterator.onHasNext() | ||
.thenApply(hasNext -> { | ||
if (!hasNext) { | ||
return false; | ||
} | ||
if (maxCount > 0 && maxCount < iterationCount.incrementAndGet()) { | ||
return false; | ||
} | ||
final KeyValue kv = iterator.next(); | ||
final UUID otherIndexerId = heartbeatKeyToIndexerId(store, index, kv.getKey()); | ||
try { | ||
final IndexBuildProto.IndexBuildHeartbeat otherHeartbeat = IndexBuildProto.IndexBuildHeartbeat.parseFrom(kv.getValue()); | ||
ret.put(otherIndexerId, otherHeartbeat); | ||
} catch (InvalidProtocolBufferException e) { | ||
// Let the caller know about this invalid heartbeat. | ||
ret.put(otherIndexerId, IndexBuildProto.IndexBuildHeartbeat.newBuilder() | ||
.setInfo(INVALID_HEARTBEAT_INFO) | ||
.setGenesisTimeMilliseconds(0) | ||
.setHeartbeatTimeMilliseconds(0) | ||
.build()); | ||
} | ||
return true; | ||
})) | ||
.thenApply(ignore -> ret); | ||
} | ||
|
||
public static CompletableFuture<Integer> clearIndexingHeartbeats(@Nonnull FDBRecordStore store, @Nonnull Index index, long minAgeMilliseconds, int maxIteration) { | ||
final AsyncIterator<KeyValue> iterator = heartbeatsIterator(store, index); | ||
final AtomicInteger deleteCount = new AtomicInteger(0); | ||
final AtomicInteger iterationCount = new AtomicInteger(0); | ||
final long now = nowMilliseconds(); | ||
return AsyncUtil.whileTrue(() -> iterator.onHasNext() | ||
.thenApply(hasNext -> { | ||
if (!hasNext) { | ||
return false; | ||
} | ||
if (maxIteration > 0 && maxIteration < iterationCount.incrementAndGet()) { | ||
return false; | ||
} | ||
final KeyValue kv = iterator.next(); | ||
boolean shouldRemove; | ||
try { | ||
final IndexBuildProto.IndexBuildHeartbeat otherHeartbeat = IndexBuildProto.IndexBuildHeartbeat.parseFrom(kv.getValue()); | ||
// remove heartbeat if too old | ||
shouldRemove = now >= otherHeartbeat.getHeartbeatTimeMilliseconds() + minAgeMilliseconds; | ||
} catch (InvalidProtocolBufferException e) { | ||
// remove heartbeat if invalid | ||
shouldRemove = true; | ||
Comment on lines
+183
to
+184
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this introduce a risk if we're running in an environment with future versions of the record layer? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The function should only be triggered by the user, and only if something went wrong while clearing the heartbeat (session end, or whenever the index because readable). If a protobuf upgrade will make it inconsistent (which is unthinkable while you are still around:) ) it is much more likely that a new code be called to clear old unusable heartbeats from a previous version. |
||
} | ||
if (shouldRemove) { | ||
store.ensureContextActive().clear(kv.getKey()); | ||
deleteCount.incrementAndGet(); | ||
} | ||
return true; | ||
})) | ||
.thenApply(ignore -> deleteCount.get()); | ||
} | ||
|
||
private static AsyncIterator<KeyValue> heartbeatsIterator(FDBRecordStore store, Index index) { | ||
return store.getContext().ensureActive().getRange(IndexingSubspaces.indexHeartbeatSubspace(store, index).range()).iterator(); | ||
} | ||
|
||
private static UUID heartbeatKeyToIndexerId(FDBRecordStore store, Index index, byte[] key) { | ||
return IndexingSubspaces.indexHeartbeatSubspace(store, index).unpack(key).getUUID(0); | ||
} | ||
|
||
private static long nowMilliseconds() { | ||
return System.currentTimeMillis(); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,6 +27,7 @@ | |
import com.apple.foundationdb.tuple.Tuple; | ||
|
||
import javax.annotation.Nonnull; | ||
import java.util.UUID; | ||
|
||
/** | ||
* List of subspaces related to the indexing/index-scrubbing processes. | ||
|
@@ -40,6 +41,7 @@ public final class IndexingSubspaces { | |
private static final Object INDEX_SCRUBBED_RECORDS_RANGES_ZERO = 4L; | ||
private static final Object INDEX_SCRUBBED_RECORDS_RANGES = 5L; | ||
private static final Object INDEX_SCRUBBED_INDEX_RANGES = 6L; | ||
private static final Object INDEX_BUILD_HEARTBEAT_PREFIX = 7L; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
|
||
private IndexingSubspaces() { | ||
throw new IllegalStateException("Utility class"); | ||
|
@@ -83,6 +85,29 @@ public static Subspace indexBuildTypeSubspace(@Nonnull FDBRecordStoreBase<?> sto | |
return indexBuildSubspace(store, index, INDEX_BUILD_TYPE_VERSION); | ||
} | ||
|
||
/** | ||
* Subspace that stores the indexing heartbeat. | ||
* @param store store | ||
* @param index index | ||
* @return subspace | ||
*/ | ||
@Nonnull | ||
public static Subspace indexHeartbeatSubspace(@Nonnull FDBRecordStoreBase<?> store, @Nonnull Index index) { | ||
return indexBuildSubspace(store, index, INDEX_BUILD_HEARTBEAT_PREFIX); | ||
} | ||
|
||
/** | ||
* Subspace that stores the indexing heartbeat. | ||
* @param store store | ||
* @param index index | ||
* @param indexerId session id | ||
* @return subspace | ||
*/ | ||
@Nonnull | ||
public static Subspace indexHeartbeatSubspace(@Nonnull FDBRecordStoreBase<?> store, @Nonnull Index index, @Nonnull UUID indexerId) { | ||
return indexHeartbeatSubspace(store, index).subspace(Tuple.from(indexerId)); | ||
} | ||
|
||
/** | ||
* Subspace that stores scrubbed records ranges of the zero range-id. This subspace is backward compatible | ||
* to record ranges scrubbed before range-id was introduced. | ||
|
@@ -184,5 +209,7 @@ public static void eraseAllIndexingDataButTheLock(@Nonnull FDBRecordContext cont | |
eraseAllIndexingScrubbingData(context, store, index); | ||
context.clear(Range.startsWith(indexBuildScannedRecordsSubspace(store, index).pack())); | ||
context.clear(Range.startsWith(indexBuildTypeSubspace(store, index).pack())); | ||
// The heartbeats, unlike the sync lock, may be erased here. If needed, an appropriate heartbeat will be set after this clear & within the same transaction. | ||
context.clear(Range.startsWith(indexHeartbeatSubspace(store, index).pack())); | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.