Skip to content

Commit 992ae43

Browse files
ywangdDaveCTurner
authored andcommitted
Update shardGenerations for all indices on snapshot finalization (elastic#128650)
If an index is deleted after a snapshot has written its shardGenerations file but before the snapshot is finalized, we exclude this index from the snapshot because its indexMetadata is no longer available. However, the shardGenerations file is still valid in that it is the latest copy with all necessary information despite it containing an extra snapshot entry. This is OK. Instead of dropping this shardGenerations file, this PR changes to carry it forward by updating RepositoryData and relevant in-progress snapshots so that the next finalization builds on top of this one. Co-authored-by: David Turner <david.turner@elastic.co>
1 parent f73e6e4 commit 992ae43

File tree

14 files changed

+360
-53
lines changed

14 files changed

+360
-53
lines changed

docs/changelog/128650.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 128650
2+
summary: Update shardGenerations for all indices on snapshot finalization
3+
area: Snapshot/Restore
4+
type: enhancement
5+
issues:
6+
- 108907

server/src/main/java/org/elasticsearch/repositories/FinalizeSnapshotContext.java

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
*/
2828
public final class FinalizeSnapshotContext extends DelegatingActionListener<RepositoryData, RepositoryData> {
2929

30-
private final ShardGenerations updatedShardGenerations;
30+
private final UpdatedShardGenerations updatedShardGenerations;
3131

3232
/**
3333
* Obsolete shard generations map computed from the cluster state update that this finalization executed in
@@ -46,7 +46,7 @@ public final class FinalizeSnapshotContext extends DelegatingActionListener<Repo
4646
private final Runnable onDone;
4747

4848
/**
49-
* @param updatedShardGenerations updated shard generations
49+
* @param updatedShardGenerations updated shard generations for both live and deleted indices
5050
* @param repositoryStateId the unique id identifying the state of the repository when the snapshot began
5151
* @param clusterMetadata cluster metadata
5252
* @param snapshotInfo SnapshotInfo instance to write for this snapshot
@@ -57,7 +57,7 @@ public final class FinalizeSnapshotContext extends DelegatingActionListener<Repo
5757
* once all cleanup operations after snapshot completion have executed
5858
*/
5959
public FinalizeSnapshotContext(
60-
ShardGenerations updatedShardGenerations,
60+
UpdatedShardGenerations updatedShardGenerations,
6161
long repositoryStateId,
6262
Metadata clusterMetadata,
6363
SnapshotInfo snapshotInfo,
@@ -78,7 +78,7 @@ public long repositoryStateId() {
7878
return repositoryStateId;
7979
}
8080

81-
public ShardGenerations updatedShardGenerations() {
81+
public UpdatedShardGenerations updatedShardGenerations() {
8282
return updatedShardGenerations;
8383
}
8484

@@ -120,4 +120,20 @@ public void onDone() {
120120
public void onResponse(RepositoryData repositoryData) {
121121
delegate.onResponse(repositoryData);
122122
}
123+
124+
/**
125+
* A record used to track the new shard generations that have been written for each shard in a snapshot.
126+
* An index may be deleted after the shard generation is written but before the snapshot is finalized.
127+
* In this case, its shard generation is tracked in {@link #deletedIndices} because it's still a valid
128+
* shard generation blob that exists in the repository and may be used by subsequent snapshots, even though
129+
* the index will not be included in the snapshot being finalized. Otherwise, it is tracked in
130+
* {@link #liveIndices}.
131+
*/
132+
public record UpdatedShardGenerations(ShardGenerations liveIndices, ShardGenerations deletedIndices) {
133+
public static final UpdatedShardGenerations EMPTY = new UpdatedShardGenerations(ShardGenerations.EMPTY, ShardGenerations.EMPTY);
134+
135+
public boolean hasShardGen(RepositoryShardId repositoryShardId) {
136+
return liveIndices.hasShardGen(repositoryShardId) || deletedIndices.hasShardGen(repositoryShardId);
137+
}
138+
}
123139
}

server/src/main/java/org/elasticsearch/repositories/RepositoryData.java

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525
import org.elasticsearch.index.IndexVersions;
2626
import org.elasticsearch.logging.LogManager;
2727
import org.elasticsearch.logging.Logger;
28+
import org.elasticsearch.repositories.FinalizeSnapshotContext.UpdatedShardGenerations;
2829
import org.elasticsearch.snapshots.SnapshotId;
2930
import org.elasticsearch.snapshots.SnapshotInfo;
3031
import org.elasticsearch.snapshots.SnapshotState;
@@ -405,16 +406,16 @@ public Map<IndexId, Collection<String>> indexMetaDataToRemoveAfterRemovingSnapsh
405406
*
406407
* @param snapshotId Id of the new snapshot
407408
* @param details Details of the new snapshot
408-
* @param shardGenerations Updated shard generations in the new snapshot. For each index contained in the snapshot an array of new
409-
* generations indexed by the shard id they correspond to must be supplied.
409+
* @param updatedShardGenerations Updated shard generations in the new snapshot, including both indices that are included
410+
* in the given snapshot and those got deleted while finalizing.
410411
* @param indexMetaBlobs Map of index metadata blob uuids
411412
* @param newIdentifiers Map of new index metadata blob uuids keyed by the identifiers of the
412413
* {@link IndexMetadata} in them
413414
*/
414415
public RepositoryData addSnapshot(
415416
final SnapshotId snapshotId,
416417
final SnapshotDetails details,
417-
final ShardGenerations shardGenerations,
418+
final UpdatedShardGenerations updatedShardGenerations,
418419
@Nullable final Map<IndexId, String> indexMetaBlobs,
419420
@Nullable final Map<String, String> newIdentifiers
420421
) {
@@ -424,12 +425,13 @@ public RepositoryData addSnapshot(
424425
// the new master, so we make the operation idempotent
425426
return this;
426427
}
428+
final var liveIndexIds = updatedShardGenerations.liveIndices().indices();
427429
Map<String, SnapshotId> snapshots = new HashMap<>(snapshotIds);
428430
snapshots.put(snapshotId.getUUID(), snapshotId);
429431
Map<String, SnapshotDetails> newSnapshotDetails = new HashMap<>(snapshotsDetails);
430432
newSnapshotDetails.put(snapshotId.getUUID(), details);
431433
Map<IndexId, List<SnapshotId>> allIndexSnapshots = new HashMap<>(indexSnapshots);
432-
for (final IndexId indexId : shardGenerations.indices()) {
434+
for (final IndexId indexId : liveIndexIds) {
433435
final List<SnapshotId> snapshotIds = allIndexSnapshots.get(indexId);
434436
if (snapshotIds == null) {
435437
allIndexSnapshots.put(indexId, List.of(snapshotId));
@@ -445,11 +447,8 @@ public RepositoryData addSnapshot(
445447
: "Index meta generations should have been empty but was [" + indexMetaDataGenerations + "]";
446448
newIndexMetaGenerations = IndexMetaDataGenerations.EMPTY;
447449
} else {
448-
assert indexMetaBlobs.isEmpty() || shardGenerations.indices().equals(indexMetaBlobs.keySet())
449-
: "Shard generations contained indices "
450-
+ shardGenerations.indices()
451-
+ " but indexMetaData was given for "
452-
+ indexMetaBlobs.keySet();
450+
assert indexMetaBlobs.isEmpty() || liveIndexIds.equals(indexMetaBlobs.keySet())
451+
: "Shard generations contained indices " + liveIndexIds + " but indexMetaData was given for " + indexMetaBlobs.keySet();
453452
newIndexMetaGenerations = indexMetaDataGenerations.withAddedSnapshot(snapshotId, indexMetaBlobs, newIdentifiers);
454453
}
455454

@@ -459,7 +458,7 @@ public RepositoryData addSnapshot(
459458
snapshots,
460459
newSnapshotDetails,
461460
allIndexSnapshots,
462-
ShardGenerations.builder().putAll(this.shardGenerations).putAll(shardGenerations).build(),
461+
ShardGenerations.builder().putAll(this.shardGenerations).update(updatedShardGenerations).build(),
463462
newIndexMetaGenerations,
464463
clusterUUID
465464
);

server/src/main/java/org/elasticsearch/repositories/ShardGenerations.java

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@
2828
import java.util.regex.Pattern;
2929
import java.util.stream.Collectors;
3030

31+
import static org.elasticsearch.repositories.FinalizeSnapshotContext.UpdatedShardGenerations;
32+
3133
/**
3234
* Represents the current {@link ShardGeneration} for each shard in a repository.
3335
*/
@@ -231,6 +233,14 @@ public Builder putAll(ShardGenerations shardGenerations) {
231233
return this;
232234
}
233235

236+
public Builder update(UpdatedShardGenerations updatedShardGenerations) {
237+
putAll(updatedShardGenerations.liveIndices());
238+
// For deleted indices, we only update the generations if they are present in the existing generations, i.e.
239+
// they are referenced by other snapshots.
240+
updateIfPresent(updatedShardGenerations.deletedIndices());
241+
return this;
242+
}
243+
234244
public Builder put(IndexId indexId, int shardId, SnapshotsInProgress.ShardSnapshotStatus status) {
235245
// only track generations for successful shard status values
236246
return put(indexId, shardId, status.state().failed() ? null : status.generation());
@@ -244,6 +254,20 @@ public Builder put(IndexId indexId, int shardId, ShardGeneration generation) {
244254
return this;
245255
}
246256

257+
private void updateIfPresent(ShardGenerations shardGenerations) {
258+
shardGenerations.shardGenerations.forEach((indexId, gens) -> {
259+
final Map<Integer, ShardGeneration> existingShardGens = generations.get(indexId);
260+
if (existingShardGens != null) {
261+
for (int i = 0; i < gens.size(); i++) {
262+
final ShardGeneration gen = gens.get(i);
263+
if (gen != null) {
264+
existingShardGens.put(i, gen);
265+
}
266+
}
267+
}
268+
});
269+
}
270+
247271
private boolean noDuplicateIndicesWithSameName(IndexId newId) {
248272
for (IndexId id : generations.keySet()) {
249273
if (id.getName().equals(newId.getName()) && id.equals(newId) == false) {
@@ -254,6 +278,9 @@ private boolean noDuplicateIndicesWithSameName(IndexId newId) {
254278
}
255279

256280
public ShardGenerations build() {
281+
if (generations.isEmpty()) {
282+
return EMPTY;
283+
}
257284
return new ShardGenerations(generations.entrySet().stream().collect(Collectors.toMap(Map.Entry::getKey, entry -> {
258285
final Set<Integer> shardIds = entry.getValue().keySet();
259286
assert shardIds.isEmpty() == false;

server/src/main/java/org/elasticsearch/repositories/blobstore/BlobStoreRepository.java

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1749,11 +1749,10 @@ int sizeInBytes() {
17491749
public void finalizeSnapshot(final FinalizeSnapshotContext finalizeSnapshotContext) {
17501750
assert ThreadPool.assertCurrentThreadPool(ThreadPool.Names.SNAPSHOT);
17511751
final long repositoryStateId = finalizeSnapshotContext.repositoryStateId();
1752-
final ShardGenerations shardGenerations = finalizeSnapshotContext.updatedShardGenerations();
17531752
final SnapshotInfo snapshotInfo = finalizeSnapshotContext.snapshotInfo();
17541753
assert repositoryStateId > RepositoryData.UNKNOWN_REPO_GEN
17551754
: "Must finalize based on a valid repository generation but received [" + repositoryStateId + "]";
1756-
final Collection<IndexId> indices = shardGenerations.indices();
1755+
final Collection<IndexId> indices = finalizeSnapshotContext.updatedShardGenerations().liveIndices().indices();
17571756
final SnapshotId snapshotId = snapshotInfo.snapshotId();
17581757
// Once we are done writing the updated index-N blob we remove the now unreferenced index-${uuid} blobs in each shard
17591758
// directory if all nodes are at least at version SnapshotsService#SHARD_GEN_IN_REPO_DATA_VERSION
@@ -1867,7 +1866,7 @@ record RootBlobUpdateResult(RepositoryData oldRepositoryData, RepositoryData new
18671866
existingRepositoryData.addSnapshot(
18681867
snapshotId,
18691868
snapshotDetails,
1870-
shardGenerations,
1869+
finalizeSnapshotContext.updatedShardGenerations(),
18711870
metadataWriteResult.indexMetas(),
18721871
metadataWriteResult.indexMetaIdentifiers()
18731872
),

0 commit comments

Comments
 (0)