Skip to content

Commit 190eafc

Browse files
authored
Add the ability to run header repair as part of record repair (#3780)
In order to support the ability to repair a store when it may have both corrupt header and corrupt records, this PR adds the ability to repair the header (and to not commit the repair in case of dry run). - Add read only mode to transactional runner and retrying iterator - Add read only mode to repair runner - Create repair-if-necessary store builder - Add read-only and repair ro repair runners Resolves #3779
1 parent 65b4806 commit 190eafc

File tree

8 files changed

+277
-17
lines changed

8 files changed

+277
-17
lines changed

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/recordrepair/RecordRepair.java

Lines changed: 37 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
import com.apple.foundationdb.record.logging.LogMessageKeys;
2828
import com.apple.foundationdb.record.provider.foundationdb.FDBDatabase;
2929
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore;
30+
import com.apple.foundationdb.record.provider.foundationdb.FormatVersion;
3031
import com.apple.foundationdb.record.provider.foundationdb.runners.throttled.CursorFactory;
3132
import com.apple.foundationdb.record.provider.foundationdb.runners.throttled.ThrottledRetryingIterator;
3233
import com.apple.foundationdb.tuple.Tuple;
@@ -96,15 +97,18 @@ public enum ValidationKind { RECORD_VALUE, RECORD_VALUE_AND_VERSION }
9697
private final ValidationKind validationKind;
9798
@Nonnull
9899
private final ThrottledRetryingIterator<Tuple> throttledIterator;
100+
private final boolean allowRepair;
99101

100-
protected RecordRepair(@Nonnull final Builder config) {
102+
protected RecordRepair(@Nonnull final Builder config, boolean allowRepair) {
101103
this.database = config.database;
102104
this.storeBuilder = config.getStoreBuilder();
103105
this.validationKind = config.getValidationKind();
104106
ThrottledRetryingIterator.Builder<Tuple> iteratorBuilder =
105107
ThrottledRetryingIterator.builder(database, cursorFactory(), this::handleOneItem)
106108
.withMdcContext(MDC.getCopyOfContextMap());
107-
throttledIterator = configureThrottlingIterator(iteratorBuilder, config).build();
109+
this.allowRepair = allowRepair;
110+
// This will also ensure the transaction only commits when needed
111+
throttledIterator = configureThrottlingIterator(iteratorBuilder, config, allowRepair).build();
108112
}
109113

110114
/**
@@ -171,15 +175,16 @@ protected CompletableFuture<RecordRepairResult> validateInternal(@Nonnull final
171175
});
172176
}
173177

174-
private ThrottledRetryingIterator.Builder<Tuple> configureThrottlingIterator(ThrottledRetryingIterator.Builder<Tuple> builder, Builder config) {
178+
private ThrottledRetryingIterator.Builder<Tuple> configureThrottlingIterator(ThrottledRetryingIterator.Builder<Tuple> builder, Builder config, boolean allowRepair) {
175179
return builder
176180
.withTransactionInitNotification(this::logStartTransaction)
177181
.withTransactionSuccessNotification(this::logCommitTransaction)
178182
.withTransactionTimeQuotaMillis(config.getTransactionTimeQuotaMillis())
179183
.withMaxRecordsDeletesPerTransaction(config.getMaxRecordDeletesPerTransaction())
180184
.withMaxRecordsScannedPerSec(config.getMaxRecordScannedPerSec())
181185
.withMaxRecordsDeletesPerSec(config.getMaxRecordDeletesPerSec())
182-
.withNumOfRetries(config.getNumOfRetries());
186+
.withNumOfRetries(config.getNumOfRetries())
187+
.withCommitWhenDone(allowRepair);
183188
}
184189

185190
@SuppressWarnings("PMD.UnusedFormalParameter")
@@ -191,7 +196,8 @@ private void logStartTransaction(ThrottledRetryingIterator.QuotaManager quotaMan
191196

192197
private void logCommitTransaction(ThrottledRetryingIterator.QuotaManager quotaManager) {
193198
if (logger.isDebugEnabled()) {
194-
logger.debug(KeyValueLogMessage.of("RecordRepairRunner: transaction committed",
199+
String message = allowRepair ? "RecordRepairRunner: transaction committed" : "RecordRepairRunner: transaction ended";
200+
logger.debug(KeyValueLogMessage.of(message,
195201
LogMessageKeys.RECORDS_SCANNED, quotaManager.getScannedCount(),
196202
LogMessageKeys.RECORDS_DELETED, quotaManager.getDeletesCount()));
197203
}
@@ -215,6 +221,8 @@ public static class Builder {
215221
private int maxRecordScannedPerSec = 0;
216222
private int maxRecordDeletesPerSec = 1000;
217223
private int numOfRetries = 4;
224+
private int userVersion;
225+
private @Nullable FormatVersion minimumPossibleFormatVersion;
218226

219227
/**
220228
* Constructor.
@@ -236,6 +244,7 @@ public RecordRepairStatsRunner buildStatsRunner() {
236244

237245
/**
238246
* Finalize the build and create a repair runner.
247+
* @param allowRepair whether to repair the found issues (TRUE) or run in read-only mode (FALSE)
239248
* @return the newly created repair runner
240249
*/
241250
public RecordRepairValidateRunner buildRepairRunner(boolean allowRepair) {
@@ -329,14 +338,36 @@ public Builder withNumOfRetries(final int numOfRetries) {
329338
return this;
330339
}
331340

341+
/**
342+
* Set the store header repair parameters.
343+
* If set, the runner will try to repair the store header (See {@link FDBRecordStore.Builder#repairMissingHeader(int, FormatVersion)})
344+
* as part of the repair operation in case the store fails to open.
345+
* If the runner is running in dry run mode (repair not allowed) then the operation will be rolled back once the run
346+
* is complete, making no change to the header.
347+
* @param userVersion the user version for the header repair
348+
* @param minimumPossibleFormatVersion the minimum store format version to use for the repair
349+
* Default: null minimumPossibleFormatVersion will not attempt to repair the header
350+
* @return this builder
351+
*/
352+
public Builder withHeaderRepairParameters(int userVersion, @Nullable FormatVersion minimumPossibleFormatVersion) {
353+
this.userVersion = userVersion;
354+
this.minimumPossibleFormatVersion = minimumPossibleFormatVersion;
355+
return this;
356+
}
357+
332358
@Nonnull
333359
public FDBDatabase getDatabase() {
334360
return database;
335361
}
336362

337363
@Nonnull
338364
public FDBRecordStore.Builder getStoreBuilder() {
339-
return storeBuilder;
365+
if (minimumPossibleFormatVersion != null) {
366+
// override the store builder to repair the header if necessary
367+
return new StoreBuilderWithRepair(storeBuilder, userVersion, minimumPossibleFormatVersion);
368+
} else {
369+
return storeBuilder;
370+
}
340371
}
341372

342373
@Nonnull

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/recordrepair/RecordRepairStatsRunner.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ public class RecordRepairStatsRunner extends RecordRepair {
3838
private final RepairStatsResults statsResult;
3939

4040
RecordRepairStatsRunner(@Nonnull final Builder config) {
41-
super(config);
41+
// stats runner never commits a transaction
42+
super(config, false);
4243
statsResult = new RepairStatsResults();
4344
}
4445

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/recordrepair/RecordRepairValidateRunner.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public class RecordRepairValidateRunner extends RecordRepair {
4949
private final AtomicBoolean earlyReturn;
5050

5151
RecordRepairValidateRunner(@Nonnull final Builder config, boolean allowRepair) {
52-
super(config);
52+
super(config, allowRepair);
5353
this.allowRepair = allowRepair;
5454
this.maxResultsReturned = config.getMaxResultsReturned();
5555

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* StoreBuilderWithRepair.java
3+
*
4+
* This source file is part of the FoundationDB open source project
5+
*
6+
* Copyright 2015-2025 Apple Inc. and the FoundationDB project authors
7+
*
8+
* Licensed under the Apache License, Version 2.0 (the "License");
9+
* you may not use this file except in compliance with the License.
10+
* You may obtain a copy of the License at
11+
*
12+
* http://www.apache.org/licenses/LICENSE-2.0
13+
*
14+
* Unless required by applicable law or agreed to in writing, software
15+
* distributed under the License is distributed on an "AS IS" BASIS,
16+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17+
* See the License for the specific language governing permissions and
18+
* limitations under the License.
19+
*/
20+
21+
package com.apple.foundationdb.record.provider.foundationdb.recordrepair;
22+
23+
import com.apple.foundationdb.annotation.API;
24+
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore;
25+
import com.apple.foundationdb.record.provider.foundationdb.FormatVersion;
26+
import com.apple.foundationdb.record.util.pair.NonnullPair;
27+
28+
import javax.annotation.Nonnull;
29+
import java.util.concurrent.CompletableFuture;
30+
31+
/**
32+
* A flavor of {@link FDBRecordStore.Builder} that can handle the case where the store cannot be opened.
33+
* In case the given store builder fails to open the store due to a missing header, the {@link FDBRecordStore.Builder#repairMissingHeader(int, FormatVersion)}
34+
* method is called and the repaired store is returned.
35+
*/
36+
@API(API.Status.INTERNAL)
37+
public class StoreBuilderWithRepair extends FDBRecordStore.Builder {
38+
private final int userVersion;
39+
private final FormatVersion minimumPossibleFormatVersion;
40+
41+
/**
42+
* Constructor.
43+
* @param other the source store builder to delegate to
44+
* @param userVersion the userVersion to use for repairing the header if necessary
45+
* @param minimumPossibleFormatVersion the minimumPossibleFormatVersion to use if necessary
46+
*/
47+
public StoreBuilderWithRepair(@Nonnull FDBRecordStore.Builder other,
48+
final int userVersion,
49+
@Nonnull FormatVersion minimumPossibleFormatVersion) {
50+
super(other);
51+
this.userVersion = userVersion;
52+
this.minimumPossibleFormatVersion = minimumPossibleFormatVersion;
53+
}
54+
55+
/**
56+
* Override the {@link FDBRecordStore.Builder#openAsync()} method to add support for repairing the header.
57+
* In case the store fails to be opened normally, try to repair it given the provided repair
58+
* parameters.
59+
*
60+
* @return a future that will contain the opened store if successful
61+
*/
62+
@Nonnull
63+
@Override
64+
public CompletableFuture<FDBRecordStore> openAsync() {
65+
return repairMissingHeader(userVersion, minimumPossibleFormatVersion)
66+
.thenApply(NonnullPair::getRight);
67+
}
68+
}

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/runners/TransactionalRunner.java

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,37 @@ public TransactionalRunner(@Nonnull FDBDatabase database,
9898
@SuppressWarnings({"PMD.CloseResource", "PMD.UseTryWithResources"})
9999
public <T> CompletableFuture<T> runAsync(final boolean clearWeakReadSemantics,
100100
@Nonnull Function<? super FDBRecordContext, CompletableFuture<? extends T>> runnable) {
101+
return runAsync(clearWeakReadSemantics, true, runnable);
102+
}
103+
104+
/**
105+
* A flavor of the {@link #runAsync(boolean, Function)} method that supports read-only transactions.
106+
* @param clearWeakReadSemantics whether to clear the {@link FDBRecordContextConfig#getWeakReadSemantics()} before
107+
* creating the transaction. These should be cleared if retrying a transaction, particularly in response to a
108+
* conflict, because reusing the old read version would just cause it to re-conflict.
109+
* @param commitWhenDone if FALSE the transaction will not be committed. If TRUE, behaves the same as described in {@link #runAsync(boolean, Function)}
110+
* @param runnable some code to run that uses an {@link FDBRecordContext}
111+
* @param <T> the type of the value returned by the future
112+
* @return a future containing the result of the runnable, if successfully committed.
113+
* Note: the future will not be {@code null}, but if the runnable returns a future containing {@code null} then
114+
* so will the future returned here.
115+
*/
116+
@Nonnull
117+
@SuppressWarnings({"PMD.CloseResource", "PMD.UseTryWithResources"})
118+
public <T> CompletableFuture<T> runAsync(final boolean clearWeakReadSemantics,
119+
boolean commitWhenDone,
120+
@Nonnull Function<? super FDBRecordContext, CompletableFuture<? extends T>> runnable) {
101121
FDBRecordContext context = openContext(clearWeakReadSemantics);
102122
boolean returnedFuture = false;
103123
try {
104124
CompletableFuture<T> future = runnable.apply(context)
105-
.thenCompose((T val) -> context.commitAsync().thenApply(vignore -> val));
125+
.thenCompose((T val) -> {
126+
if (commitWhenDone) {
127+
return context.commitAsync().thenApply(vignore -> val);
128+
} else {
129+
return CompletableFuture.completedFuture(val);
130+
}
131+
});
106132
returnedFuture = true;
107133
return future.whenComplete((result, exception) -> context.close());
108134
} finally {

fdb-record-layer-core/src/main/java/com/apple/foundationdb/record/provider/foundationdb/runners/throttled/ThrottledRetryingIterator.java

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ public class ThrottledRetryingIterator<T> implements AutoCloseable {
9292
@Nullable
9393
private final Consumer<QuotaManager> transactionInitNotification;
9494
private final int numOfRetries;
95+
private final boolean commitWhenDone;
9596

9697
private boolean closed = false;
9798
/** Starting time of the current/most-recent transaction. */
@@ -117,6 +118,7 @@ public ThrottledRetryingIterator(Builder<T> builder) {
117118
this.transactionInitNotification = builder.transactionInitNotification;
118119
this.cursorRowsLimit = 0;
119120
this.numOfRetries = builder.numOfRetries;
121+
this.commitWhenDone = builder.commitWhenDone;
120122
futureManager = new FutureAutoClose();
121123
}
122124

@@ -175,7 +177,7 @@ private CompletableFuture<RecordCursorResult<T>> iterateOneRange(FDBRecordStore.
175177
QuotaManager singleIterationQuotaManager) {
176178
AtomicReference<RecordCursorResult<T>> cont = new AtomicReference<>();
177179

178-
return transactionalRunner.runAsync(true, transaction -> {
180+
return transactionalRunner.runAsync(true, commitWhenDone, transaction -> {
179181
// this layer returns last cursor result
180182
singleIterationQuotaManager.init();
181183

@@ -418,6 +420,7 @@ public static class Builder<T> {
418420
private int maxRecordScannedPerSec;
419421
private int maxRecordDeletesPerSec;
420422
private int numOfRetries;
423+
private boolean commitWhenDone;
421424

422425
private Builder(FDBDatabase database, FDBRecordContextConfig.Builder contextConfigBuilder, CursorFactory<T> cursorCreator, ItemHandler<T> singleItemHandler) {
423426
// Mandatory fields are set in the constructor. Everything else is optional.
@@ -431,6 +434,7 @@ private Builder(FDBDatabase database, FDBRecordContextConfig.Builder contextConf
431434
this.maxRecordScannedPerSec = 0;
432435
this.maxRecordDeletesPerSec = 0;
433436
this.numOfRetries = NUMBER_OF_RETRIES;
437+
this.commitWhenDone = false;
434438
}
435439

436440
/**
@@ -520,6 +524,19 @@ public Builder<T> withNumOfRetries(int numOfRetries) {
520524
return this;
521525
}
522526

527+
/**
528+
* Set whether to commit the transaction when done.
529+
* Setting this to TRUE will commit every transaction created before creating a new one. Setting to FALSE will
530+
* roll back the transactions.
531+
* Defaults to FALSE.
532+
* @param commitWhenDone whether to commit or roll back the transactions created
533+
* @return this builder
534+
*/
535+
public Builder<T> withCommitWhenDone(boolean commitWhenDone) {
536+
this.commitWhenDone = commitWhenDone;
537+
return this;
538+
}
539+
523540
/**
524541
* Set the MDC context for the runner/executor.
525542
* This MDC context will be carried out into the runner and executor and will allow them to pass that down to

fdb-record-layer-core/src/test/java/com/apple/foundationdb/record/provider/foundationdb/recordrepair/RecordValidateAndRepairTest.java

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,17 +23,21 @@
2323
import com.apple.foundationdb.record.RecordMetaData;
2424
import com.apple.foundationdb.record.RecordMetaDataBuilder;
2525
import com.apple.foundationdb.record.RecordMetaDataProto;
26+
import com.apple.foundationdb.record.RecordMetaDataProvider;
2627
import com.apple.foundationdb.record.ScanProperties;
2728
import com.apple.foundationdb.record.TestRecords1Proto;
2829
import com.apple.foundationdb.record.TupleRange;
2930
import com.apple.foundationdb.record.provider.foundationdb.FDBDatabaseRunner;
3031
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordContext;
3132
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStore;
33+
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreKeyspace;
3234
import com.apple.foundationdb.record.provider.foundationdb.FDBRecordStoreTestBase;
3335
import com.apple.foundationdb.record.provider.foundationdb.FDBStoredRecord;
3436
import com.apple.foundationdb.record.provider.foundationdb.FormatVersion;
37+
import com.apple.foundationdb.record.provider.foundationdb.RecordStoreNoInfoAndNotEmptyException;
3538
import com.apple.foundationdb.record.provider.foundationdb.SplitHelper;
3639
import com.apple.foundationdb.tuple.Tuple;
40+
import com.apple.test.BooleanSource;
3741
import com.apple.test.ParameterizedTestUtils;
3842
import com.google.protobuf.Message;
3943
import org.assertj.core.api.Assertions;
@@ -93,6 +97,49 @@ void testValidateRecordsNoIssue(boolean splitLongRecords, FormatVersion formatVe
9397
validateNormalScan(hook, formatVersion, NUM_RECORDS, storeVersions);
9498
}
9599

100+
@ParameterizedTest
101+
@BooleanSource({"allowRepair", "repairHeader"})
102+
void testCorruptStoreHeaderNoCorruptRecords(final boolean allowRepair, final boolean repairHeader) throws Exception {
103+
final boolean splitLongRecords = true;
104+
final FormatVersion storeVersion = FormatVersion.SAVE_VERSION_WITH_RECORD;
105+
final RecordMetaDataHook hook = ValidationTestUtils.getRecordMetaDataHook(splitLongRecords, true);
106+
saveRecords(splitLongRecords, storeVersion, hook);
107+
108+
FDBRecordStore.Builder storeBuilder;
109+
try (FDBRecordContext context = openContext()) {
110+
final FDBRecordStore store = openSimpleRecordStore(context, hook, storeVersion);
111+
storeBuilder = store.asBuilder();
112+
}
113+
clearStoreHeader(simpleMetaData(hook));
114+
115+
RecordRepair.Builder builder = RecordRepair.builder(fdb, storeBuilder)
116+
.withValidationKind(RecordRepair.ValidationKind.RECORD_VALUE_AND_VERSION);
117+
if (repairHeader) {
118+
// This will allow the runner to repair the header before repairing records
119+
builder = builder.withHeaderRepairParameters(1, storeVersion);
120+
}
121+
// Run validation and repair
122+
try (RecordRepairValidateRunner runner = builder.buildRepairRunner(allowRepair)) {
123+
RepairValidationResults repairResults = runner.run().join();
124+
if (repairHeader) {
125+
ValidationTestUtils.assertCompleteResults(repairResults, NUM_RECORDS);
126+
// Verify records: all is OK.
127+
ValidationTestUtils.assertNoInvalidResults(repairResults.getInvalidResults());
128+
} else {
129+
Assertions.assertThat(repairResults.getCaughtException()).hasCauseInstanceOf(RecordStoreNoInfoAndNotEmptyException.class);
130+
}
131+
}
132+
133+
if (repairHeader && allowRepair) {
134+
validateNormalScan(hook, storeVersion, NUM_RECORDS, true);
135+
} else {
136+
try (FDBRecordContext context = openContext()) {
137+
Assertions.assertThatThrownBy(() -> openSimpleRecordStore(context, hook, storeVersion))
138+
.isInstanceOf(RecordStoreNoInfoAndNotEmptyException.class);
139+
}
140+
}
141+
}
142+
96143
public static Stream<Arguments> splitNumberFormatVersion() {
97144
return ParameterizedTestUtils.cartesianProduct(
98145
Stream.of(0, 1, 2, 3),
@@ -742,7 +789,6 @@ private List<FDBStoredRecord<Message>> saveRecords(int initialId, int totalRecor
742789
return result;
743790
}
744791

745-
746792
private void validateNormalScan(final RecordMetaDataHook hook, final FormatVersion formatVersion, final int numRecords, Boolean hasVersion) throws Exception {
747793
// Load the records again to make sure they are all there
748794
try (FDBRecordContext context = openContext()) {
@@ -758,4 +804,12 @@ private void validateNormalScan(final RecordMetaDataHook hook, final FormatVersi
758804
}
759805
}
760806
}
807+
808+
private void clearStoreHeader(final RecordMetaDataProvider metaData) {
809+
try (FDBRecordContext context = openContext()) {
810+
recordStore = getStoreBuilder(context, metaData, path).createOrOpen();
811+
context.ensureActive().clear(recordStore.getSubspace().pack(FDBRecordStoreKeyspace.STORE_INFO.key()));
812+
commit(context);
813+
}
814+
}
761815
}

0 commit comments

Comments
 (0)