Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
/*
* RecordCoreTimeoutException.java
*
* This source file is part of the FoundationDB open source project
*
* Copyright 2015-2025 Apple Inc. and the FoundationDB project authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.apple.foundationdb.record;

import com.apple.foundationdb.annotation.API;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

/**
* This exception extends {@link RecordCoreException} and is specifically used to indicate
* timeout-related failures.
*/
@API(API.Status.UNSTABLE)
@SuppressWarnings("serial")

Check warning on line 33 in fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/RecordCoreTimeoutException.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/RecordCoreTimeoutException.java#L33

Warning of type 'serial' should not be suppressed https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3732%2Fjjezra%2Fmerger_retry_by_default%3AHEAD&id=FC3AC47CF5E199C0BC8EEBA7B6187E3F
public class RecordCoreTimeoutException extends RecordCoreException {
public RecordCoreTimeoutException(@Nonnull String msg, @Nullable Object ... keyValues) {
super(msg, keyValues);
}

Check warning on line 37 in fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/RecordCoreTimeoutException.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Test Gaps

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/RecordCoreTimeoutException.java#L35-L37

[Test Gap] Added method `RecordCoreTimeoutException` has not been tested. https://fdb.teamscale.io/metrics/code/foundationdb-fdb-record-layer/fdb-record-layer-core%2Fsrc%2Fmain%2Fjava%2Fcom%2Fapple%2Ffoundationdb%2Frecord%2FRecordCoreTimeoutException.java?coverage-mode=test-gap&t=FORK_MR%2F3732%2Fjjezra%2Fmerger_retry_by_default%3AHEAD&selection=char-1157-1281&merge-request=FoundationDB%2Ffdb-record-layer%2F3732

public RecordCoreTimeoutException(Throwable cause) {
super(cause);
}

Check warning on line 41 in fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/RecordCoreTimeoutException.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Test Gaps

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/RecordCoreTimeoutException.java#L39-L41

[Test Gap] Added method `RecordCoreTimeoutException` has not been tested. https://fdb.teamscale.io/metrics/code/foundationdb-fdb-record-layer/fdb-record-layer-core%2Fsrc%2Fmain%2Fjava%2Fcom%2Fapple%2Ffoundationdb%2Frecord%2FRecordCoreTimeoutException.java?coverage-mode=test-gap&t=FORK_MR%2F3732%2Fjjezra%2Fmerger_retry_by_default%3AHEAD&selection=char-1288-1367&merge-request=FoundationDB%2Ffdb-record-layer%2F3732

public RecordCoreTimeoutException(@Nonnull String msg, @Nullable Throwable cause) {
super(msg, cause);
}

public RecordCoreTimeoutException(@Nonnull String msg) {
super(msg);
}

protected RecordCoreTimeoutException(String message, Throwable cause,
boolean enableSuppression,
boolean writableStackTrace) {
super(message, cause, enableSuppression, writableStackTrace);
}

Check warning on line 55 in fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/RecordCoreTimeoutException.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Test Gaps

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/RecordCoreTimeoutException.java#L51-L55

[Test Gap] Added method `RecordCoreTimeoutException` has not been tested. https://fdb.teamscale.io/metrics/code/foundationdb-fdb-record-layer/fdb-record-layer-core%2Fsrc%2Fmain%2Fjava%2Fcom%2Fapple%2Ffoundationdb%2Frecord%2FRecordCoreTimeoutException.java?coverage-mode=test-gap&t=FORK_MR%2F3732%2Fjjezra%2Fmerger_retry_by_default%3AHEAD&selection=char-1584-1867&merge-request=FoundationDB%2Ffdb-record-layer%2F3732
}

Check warning on line 56 in fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/RecordCoreTimeoutException.java

View check run for this annotation

fdb.teamscale.io / Teamscale | Findings

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/RecordCoreTimeoutException.java#L34-L56

`RecordCoreTimeoutException` has inheritance depth of 4 which is deeper than maximum of 2 https://fdb.teamscale.io/findings/details/foundationdb-fdb-record-layer?t=FORK_MR%2F3732%2Fjjezra%2Fmerger_retry_by_default%3AHEAD&id=CFF4F2803157E8F0D0F99B997C346C10
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

package com.apple.foundationdb.record.provider.foundationdb;

import com.apple.foundationdb.FDBError;
import com.apple.foundationdb.FDBException;
import com.apple.foundationdb.MutationType;
import com.apple.foundationdb.annotation.API;
Expand Down Expand Up @@ -59,7 +58,6 @@
import java.time.DateTimeException;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
Expand Down Expand Up @@ -1213,22 +1211,6 @@ protected static <T> T findException(@Nullable Throwable ex, Class<T> classT) {
}
return null;
}

protected static boolean shouldLessenWork(@Nullable FDBException ex) {
// These error codes represent a list of errors that can occur if there is too much work to be done
// in a single transaction.
if (ex == null) {
return false;
}
final Set<Integer> lessenWorkCodes = new HashSet<>(Arrays.asList(
FDBError.TIMED_OUT.code(),
FDBError.TRANSACTION_TOO_OLD.code(),
FDBError.NOT_COMMITTED.code(),
FDBError.TRANSACTION_TIMED_OUT.code(),
FDBError.COMMIT_READ_INCOMPLETE.code(),
FDBError.TRANSACTION_TOO_LARGE.code()));
return lessenWorkCodes.contains(ex.getCode());
}
}


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@

import com.apple.foundationdb.FDBException;
import com.apple.foundationdb.async.AsyncUtil;
import com.apple.foundationdb.record.RecordCoreTimeoutException;
import com.apple.foundationdb.record.logging.KeyValueLogMessage;
import com.apple.foundationdb.record.logging.LogMessageKeys;
import com.apple.foundationdb.record.metadata.Index;
Expand All @@ -37,6 +38,7 @@
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
Expand Down Expand Up @@ -148,11 +150,10 @@ private CompletableFuture<Boolean> handleFailure(final IndexDeferredMaintenanceC
// merges. Not perfect, but as long as it's rare the impact should be minimal.

mergeControl.mergeHadFailed(); // report to adjust stats
final FDBException ex = IndexingBase.findException(e, FDBException.class);
final IndexDeferredMaintenanceControl.LastStep lastStep = mergeControl.getLastStep();
if (!IndexingBase.shouldLessenWork(ex)) {
if (shouldAbort(e)) {
giveUpMerging(mergeControl, e);
}
final IndexDeferredMaintenanceControl.LastStep lastStep = mergeControl.getLastStep();
switch (lastStep) {
case REPARTITION:
// Here: this exception might be resolved by reducing the number of documents to move during repartitioning
Expand All @@ -175,6 +176,13 @@ private CompletableFuture<Boolean> handleFailure(final IndexDeferredMaintenanceC
return AsyncUtil.READY_TRUE; // and retry
}

private boolean shouldAbort(@Nullable Throwable e) {
return e == null ||
(IndexingBase.findException(e, RecordCoreTimeoutException.class) == null &&
IndexingBase.findException(e, TimeoutException.class) == null &&
IndexingBase.findException(e, FDBException.class) == null);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looking at the error codes
It seems like probably the following should not be retried:

  • batch_transaction_throttled -- this indicates the cluster is overloaded and trying not to do batch-level work. Retrying increases the load
  • tag_throttled -- Similar, but for a tag (I'm not sure if record layer supports tags, but if we don't know, we may in the future)

There's definitely a long list of errors in there that would be pointless to retry, which is why the original code had a list of error codes where it should lessen work, rather than a list where it shouldn't.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

}

private void handleRepartitioningFailure(final IndexDeferredMaintenanceControl mergeControl, Throwable e) {
repartitionDocumentCount = mergeControl.getRepartitionDocumentCount();
if (repartitionDocumentCount == -1) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

package com.apple.foundationdb.record.provider.foundationdb;

import com.apple.foundationdb.FDBError;
import com.apple.foundationdb.FDBException;
import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.async.AsyncUtil;
Expand Down Expand Up @@ -60,6 +61,13 @@
public class IndexingThrottle {

@Nonnull private static final Logger LOGGER = LoggerFactory.getLogger(IndexingThrottle.class);
@Nonnull private static final Set<Integer> lessenWorkCodes = new HashSet<>(Arrays.asList(
FDBError.TIMED_OUT.code(),
FDBError.TRANSACTION_TOO_OLD.code(),
FDBError.NOT_COMMITTED.code(),
FDBError.TRANSACTION_TIMED_OUT.code(),
FDBError.COMMIT_READ_INCOMPLETE.code(),
FDBError.TRANSACTION_TOO_LARGE.code()));
@Nonnull private final IndexingCommon common;
@Nonnull private final Booker booker;
private final boolean isScrubber;
Expand Down Expand Up @@ -133,7 +141,7 @@ boolean mayRetryAfterHandlingException(@Nullable FDBException fdbException,
@Nullable List<Object> additionalLogMessageKeyValues,
int currTries,
final boolean adjustLimits) {
if (currTries >= common.config.getMaxRetries() || !IndexingBase.shouldLessenWork(fdbException)) {
if (currTries >= common.config.getMaxRetries() || !shouldLessenWork(fdbException)) {
// Here: should not retry or no more retries. There is no real need to handle limits.
return false;
}
Expand All @@ -144,6 +152,15 @@ boolean mayRetryAfterHandlingException(@Nullable FDBException fdbException,
return true;
}

private static boolean shouldLessenWork(@Nullable FDBException ex) {
// These error codes represent a list of errors that can occur if there is too much work to be done
// in a single transaction.
if (ex == null) {
return false;
}
return lessenWorkCodes.contains(ex.getCode());
}

void decreaseLimit(@Nonnull FDBException fdbException,
@Nullable List<Object> additionalLogMessageKeyValues) {
// TODO: decrease the limit only for certain errors
Expand Down
Loading