Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
305 changes: 304 additions & 1 deletion yarn-project/archiver/src/archiver-sync.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ describe('Archiver Sync', () => {
// beforeEach default instance and by tests that need a second archiver with a different config.
const buildArchiver = async (
storeName: string,
configOverrides: { skipOrphanProposedBlockPruning?: boolean } = {},
configOverrides: { skipOrphanProposedBlockPruning?: boolean; batchSize?: number } = {},
): Promise<{ archiver: Archiver; synchronizer: ArchiverL1Synchronizer; archiverStore: ArchiverDataStores }> => {
const store = createArchiverDataStores(await openTmpStore(storeName), GENESIS_BLOCK_HEADER_HASH);

Expand Down Expand Up @@ -920,6 +920,251 @@ describe('Archiver Sync', () => {
expect(rejectedBad).toBeDefined();
expect(rejectedValid).toBeDefined();
}, 15_000);

it('rejects a checkpoint with invalid attestations even when its blob data is malformed', async () => {
// Regression for A-1252: the archiver fetched and decoded checkpoint blobs before validating
// committee attestations. A checkpoint with BOTH invalid attestations and malformed blob data threw
// BlobDeserializationError during decode before the invalid-attestation skip path ran, so it was
// never recorded as rejected and sync looped on it forever (taking the valid CP1 in the same batch
// down with it). Attestations must be validated from calldata first, so the malformed blob is never
// fetched/decoded.
expect(await archiver.getCheckpointNumber()).toEqual(CheckpointNumber(0));

// Committee of 3 signers.
fake.setTargetCommitteeSize(3);
const signers = times(3, Secp256k1Signer.random);
const committee = signers.map(signer => signer.address);
epochCache.getCommitteeForEpoch.mockResolvedValue({ committee } as EpochCommitteeInfo);

const invalidCheckpointDetectedSpy = jest.fn();
archiver.events.on(L2BlockSourceEvents.InvalidAttestationsCheckpointDetected, invalidCheckpointDetectedSpy);

// Valid CP1 with correct attestations and well-formed blobs.
await fake.addCheckpoint(CheckpointNumber(1), {
l1BlockNumber: 70n,
messagesL1BlockNumber: 50n,
numL1ToL2Messages: 3,
signers,
});

// CP2 with BAD attestations (random signers not in committee).
const badSigners = times(3, Secp256k1Signer.random);
const { checkpoint: badCp2 } = await fake.addCheckpoint(CheckpointNumber(2), {
l1BlockNumber: 80n,
numL1ToL2Messages: 0,
signers: badSigners,
});

// Make ONLY CP2's blob malformed; CP1 keeps its real blobs. The default mock maps a blob sidecar to a
// checkpoint by its L1 block hash (Buffer32 of the L1 block number).
const cp2BlockId = Buffer32.fromBigInt(80n).toString();
const malformedBlob = await makeRandomBlob(3);
const defaultGetBlobSidecar = blobClient.getBlobSidecar.getMockImplementation()!;
blobClient.getBlobSidecar.mockImplementation((...args: Parameters<typeof blobClient.getBlobSidecar>) =>
args[0] === cp2BlockId ? Promise.resolve([malformedBlob]) : defaultGetBlobSidecar(...args),
);

fake.setL1BlockNumber(82n);

// Must not throw: attestations are checked from calldata before the malformed CP2 blob is fetched.
await expect(archiver.syncImmediate()).resolves.toBeUndefined();

// CP1 syncs; CP2 is rejected for invalid attestations (not a blob-decode failure).
expect(await archiver.getCheckpointNumber()).toEqual(CheckpointNumber(1));
expect(invalidCheckpointDetectedSpy).toHaveBeenCalledWith(
expect.objectContaining({
type: L2BlockSourceEvents.InvalidAttestationsCheckpointDetected,
validationResult: expect.objectContaining({
valid: false,
checkpoint: expect.objectContaining({ checkpointNumber: 2 }),
}),
}),
);
const rejected = await archiverStore.blocks.getRejectedCheckpointByArchiveRoot(badCp2.archive.root);
expect(rejected).toBeDefined();

// Repeated polling over the same L1 state stays stable. Without the fix, the malformed CP2 blob
// throws on every sync and the batch never commits -- even the valid CP1 stays unsynced and the
// archiver is stuck re-querying the same L1 blocks forever (the sync point never advances past the
// throw). With the fix, CP2 is rejected from calldata, CP1 is synced, and re-polling is a no-op.
for (let i = 0; i < 3; i++) {
await expect(archiver.syncImmediate()).resolves.toBeUndefined();
expect(await archiver.getCheckpointNumber()).toEqual(CheckpointNumber(1));
}
}, 20_000);

it('throws on a malformed blob with valid attestations while the epoch can still be proven', async () => {
// A-1252 rows 4/5 boundary: a checkpoint with VALID attestations but an unfetchable/undecodable blob
// must still be treated as fatal while its epoch can be proven (rollup cannot prune yet). The blob is
// canonical and must eventually become available, so we keep retrying rather than skipping it.
expect(await archiver.getCheckpointNumber()).toEqual(CheckpointNumber(0));

fake.setTargetCommitteeSize(3);
const signers = times(3, Secp256k1Signer.random);
const committee = signers.map(signer => signer.address);
epochCache.getCommitteeForEpoch.mockResolvedValue({ committee } as EpochCommitteeInfo);

// CP1 valid with well-formed blobs.
await fake.addCheckpoint(CheckpointNumber(1), {
l1BlockNumber: 70n,
messagesL1BlockNumber: 50n,
numL1ToL2Messages: 3,
signers,
});

// CP2 with VALID attestations (signed by the committee) but a malformed blob.
await fake.addCheckpoint(CheckpointNumber(2), {
l1BlockNumber: 80n,
numL1ToL2Messages: 0,
signers,
});

const cp2BlockId = Buffer32.fromBigInt(80n).toString();
const malformedBlob = await makeRandomBlob(3);
const defaultGetBlobSidecar = blobClient.getBlobSidecar.getMockImplementation()!;
blobClient.getBlobSidecar.mockImplementation((...args: Parameters<typeof blobClient.getBlobSidecar>) =>
args[0] === cp2BlockId ? Promise.resolve([malformedBlob]) : defaultGetBlobSidecar(...args),
);

// Rollup cannot prune yet: the epoch is still provable, so the malformed blob is fatal.
fake.setCanPrune(false);
fake.setL1BlockNumber(82n);

await expect(archiver.syncImmediate()).rejects.toThrow();
}, 20_000);

it('skips a malformed-blob checkpoint and recovers once its epoch can be pruned', async () => {
// A-1252 rows 4/5: a bribed-committee checkpoint (valid attestations) whose blob is withheld would,
// before this fix, throw during blob decode on every iteration — freezing the L1 sync clock and
// halting every honest proposer so the prune that recovers the chain never fires. Once the proof
// window has expired (rollup can prune), the archiver must skip the unfetchable checkpoint, advance
// its clock, and let the epoch-prune machinery roll the pending chain back to the proven tip.
expect(await archiver.getCheckpointNumber()).toEqual(CheckpointNumber(0));

fake.setTargetCommitteeSize(3);
const signers = times(3, Secp256k1Signer.random);
const committee = signers.map(signer => signer.address);
epochCache.getCommitteeForEpoch.mockResolvedValue({ committee } as EpochCommitteeInfo);

// CP1 and CP2: valid attestations, well-formed blobs. Both sync normally.
const { checkpoint: cp1 } = await fake.addCheckpoint(CheckpointNumber(1), {
l1BlockNumber: 70n,
messagesL1BlockNumber: 50n,
numL1ToL2Messages: 3,
signers,
});
await fake.addCheckpoint(CheckpointNumber(2), {
l1BlockNumber: 80n,
messagesL1BlockNumber: 60n,
numL1ToL2Messages: 3,
signers,
});

fake.setL1BlockNumber(90n);
await archiver.syncImmediate();
expect(await archiver.getCheckpointNumber()).toEqual(CheckpointNumber(2));
const l1TimestampBefore = await archiver.getL1Timestamp();

// CP1 gets proven; CP2 sits in the unproven epoch.
fake.markCheckpointAsProven(CheckpointNumber(1));

// CP3 arrives with VALID attestations (committee-signed) but a withheld/undecodable blob.
await fake.addCheckpoint(CheckpointNumber(3), {
l1BlockNumber: 100n,
numL1ToL2Messages: 0,
signers,
});
const cp3BlockId = Buffer32.fromBigInt(100n).toString();
const malformedBlob = await makeRandomBlob(3);
const defaultGetBlobSidecar = blobClient.getBlobSidecar.getMockImplementation()!;
blobClient.getBlobSidecar.mockImplementation((...args: Parameters<typeof blobClient.getBlobSidecar>) =>
args[0] === cp3BlockId ? Promise.resolve([malformedBlob]) : defaultGetBlobSidecar(...args),
);

const pruneSpy = jest.fn();
archiver.events.on(L2BlockSourceEvents.L2PruneUnproven, pruneSpy);

// Proof window expired: the rollup would prune on the next L1 block.
fake.setCanPrune(true);
fake.setL1BlockNumber(101n);

// Must not throw: the unfetchable CP3 is in a prunable epoch, so it is skipped rather than fatal.
await expect(archiver.syncImmediate()).resolves.toBeUndefined();

// CP3 is never ingested, and the unproven CP2 is rolled back to the proven tip (CP1).
expect(await archiver.getCheckpointNumber()).toEqual(CheckpointNumber(1));
expect(await archiver.getProvenCheckpointNumber()).toEqual(CheckpointNumber(1));
expect(await archiver.getCheckpoints({ from: CheckpointNumber(3), limit: 1 })).toEqual([]);
expect(pruneSpy).toHaveBeenCalled();

// The sync clock advanced rather than freezing on the blob error — honest proposers stay live.
const l1TimestampAfter = await archiver.getL1Timestamp();
expect(l1TimestampAfter).toEqual(fake.getTimestampAtL1Block(101n));
expect(l1TimestampAfter!).toBeGreaterThan(l1TimestampBefore!);

// L2Tips reflect the rollback to checkpoint 1.
const tips = await archiver.getL2Tips();
expect(tips.checkpointed.checkpoint.number).toEqual(CheckpointNumber(1));
expect(tips.checkpointed.block.number).toEqual(cp1.blocks[cp1.blocks.length - 1].number);

archiver.events.off(L2BlockSourceEvents.L2PruneUnproven, pruneSpy);
}, 20_000);

it('does not ingest a later-batch checkpoint that builds on a skipped prunable one', async () => {
// Covers the loop-break (stopAfterBatch) in handleCheckpoints, distinct from the in-batch filter:
// once a prunable blob failure skips checkpoint N, every later checkpoint this iteration must be
// skipped too — including ones that land in a *later* L1-block batch. If such a checkpoint (valid
// attestations, fetchable blob, but building on the skipped N) were pulled in, addCheckpoints would
// throw InitialCheckpointNumberNotSequentialError on the gap and re-freeze sync. We use a tiny batch
// size so CP2 (bad blob) and CP3 (good blob, builds on CP2) fall in separate batches.
const { archiver: smallBatchArchiver } = await buildArchiver('archiver_small_batch', { batchSize: 1 });
try {
fake.setTargetCommitteeSize(3);
const signers = times(3, Secp256k1Signer.random);
epochCache.getCommitteeForEpoch.mockResolvedValue({
committee: signers.map(s => s.address),
} as EpochCommitteeInfo);

// CP1: valid attestations + good blob. Synced and proven — the tip we expect to roll back to.
await fake.addCheckpoint(CheckpointNumber(1), {
l1BlockNumber: 2n,
messagesL1BlockNumber: 1n,
numL1ToL2Messages: 3,
signers,
});
fake.setL1BlockNumber(3n);
await smallBatchArchiver.syncImmediate();
expect(await smallBatchArchiver.getCheckpointNumber()).toEqual(CheckpointNumber(1));
fake.markCheckpointAsProven(CheckpointNumber(1));

// CP2 (valid attestations, malformed blob) and CP3 (valid attestations, good blob, chains off CP2),
// spaced >2 L1 blocks apart so they land in separate batches given batchSize 1 (2 L1 blocks/batch).
await fake.addCheckpoint(CheckpointNumber(2), { l1BlockNumber: 5n, numL1ToL2Messages: 0, signers });
await fake.addCheckpoint(CheckpointNumber(3), { l1BlockNumber: 8n, numL1ToL2Messages: 0, signers });
const cp2BlockId = Buffer32.fromBigInt(5n).toString();
const malformedBlob = await makeRandomBlob(3);
const defaultGetBlobSidecar = blobClient.getBlobSidecar.getMockImplementation()!;
blobClient.getBlobSidecar.mockImplementation((...args: Parameters<typeof blobClient.getBlobSidecar>) =>
args[0] === cp2BlockId ? Promise.resolve([malformedBlob]) : defaultGetBlobSidecar(...args),
);

fake.setCanPrune(true);
fake.setL1BlockNumber(10n);

// Must not throw: CP2 is skipped as prunable, and CP3 (in a later batch, building on CP2) must not
// be pulled in — otherwise its ingestion would hit the sequential-number gap and rethrow.
await expect(smallBatchArchiver.syncImmediate()).resolves.toBeUndefined();

// Neither CP2 nor CP3 ingested; the chain stayed at the proven tip CP1.
expect(await smallBatchArchiver.getCheckpointNumber()).toEqual(CheckpointNumber(1));
expect(await smallBatchArchiver.getProvenCheckpointNumber()).toEqual(CheckpointNumber(1));
expect(await smallBatchArchiver.getCheckpoints({ from: CheckpointNumber(2), limit: 2 })).toEqual([]);

blobClient.getBlobSidecar.mockImplementation(defaultGetBlobSidecar);
} finally {
await smallBatchArchiver.stop();
}
}, 20_000);
});

describe('reorg handling', () => {
Expand Down Expand Up @@ -1666,6 +1911,64 @@ describe('Archiver Sync', () => {
expect(checkpointedBlocks[0].checkpointNumber).toEqual(2);
}, 10_000);

it('promotes a matching local checkpoint even when its on-chain blob is malformed', async () => {

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm on the fence on whether this is a good idea. I agree we should promote if blob data is unreachable. But if it's incorrect, shouldn't we fail loudly? It means the on-chain data is corrupt.

No need to change on this PR though, since it's how it worked before.

// A-1252 row 2: a checkpoint with a withheld/malformed blob is immune to the blob-decode stall when a
// matching local proposed copy exists, because it is promoted from local blocks and the blob fetch is
// skipped entirely. This must hold regardless of the blob being unfetchable.
await fake.addCheckpoint(CheckpointNumber(1), {
l1BlockNumber: 70n,
messagesL1BlockNumber: 60n,
numL1ToL2Messages: 3,
});

fake.setL1BlockNumber(100n);
await archiver.syncImmediate();
expect(await archiver.getCheckpointNumber()).toEqual(CheckpointNumber(1));

// Checkpoint 2 on L1 at a far-future block, with a malformed blob that would throw if ever fetched.
const { checkpoint: cp2 } = await fake.addCheckpoint(CheckpointNumber(2), {
l1BlockNumber: 5000n,
messagesL1BlockNumber: 4990n,
numL1ToL2Messages: 3,
});
const cp2BlockId = Buffer32.fromBigInt(5000n).toString();
const malformedBlob = await makeRandomBlob(3);
const defaultGetBlobSidecar = blobClient.getBlobSidecar.getMockImplementation()!;
blobClient.getBlobSidecar.mockImplementation((...args: Parameters<typeof blobClient.getBlobSidecar>) =>
args[0] === cp2BlockId ? Promise.resolve([malformedBlob]) : defaultGetBlobSidecar(...args),
);

// Register checkpoint 2's blocks and a matching proposed checkpoint directly on the store, so the
// archiver has a local copy to promote (the archive root is computed from the stored blocks). We go
// through the store rather than archiver.addBlock/addProposedCheckpoint to avoid those methods firing
// background sync runs that would race with the explicit sync below.
for (const block of cp2.blocks) {
await archiverStore.blocks.addProposedBlock(block);
}
await archiverStore.blocks.addProposedCheckpoint({
checkpointNumber: CheckpointNumber(2),
header: cp2.header,
startBlock: cp2.blocks[0].number,
blockCount: cp2.blocks.length,
totalManaUsed: 0n,
feeAssetPriceModifier: cp2.feeAssetPriceModifier,
});

blobClient.getBlobSidecar.mockClear();

fake.setL1BlockNumber(5010n);
await expect(archiver.syncImmediate()).resolves.toBeUndefined();

// Checkpoint 2 is ingested via promotion; its malformed blob was never fetched.
expect(await archiver.getCheckpointNumber()).toEqual(CheckpointNumber(2));
expect(pruneSpy).not.toHaveBeenCalled();
expect(blobClient.getBlobSidecar).not.toHaveBeenCalledWith(cp2BlockId, expect.anything(), expect.anything());

const tips = await archiver.getL2Tips();
expect(tips.checkpointed.checkpoint.number).toEqual(CheckpointNumber(2));
expect(tips.checkpointed.block.number).toEqual(cp2.blocks[cp2.blocks.length - 1].number);
}, 10_000);

it('rejects adding blocks that are already checkpointed', async () => {
// First, sync checkpoint 1 from L1 to establish a baseline
const { checkpoint: cp1 } = await fake.addCheckpoint(CheckpointNumber(1), {
Expand Down
Loading
Loading