Skip to content

Commit aa0ee8b

Browse files
committed
POC of prefiltering.
1 parent 9d13136 commit aa0ee8b

10 files changed

Lines changed: 249 additions & 167 deletions

File tree

modules/module-mongodb-storage/src/storage/implementation/MergedSyncRules.ts

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -105,19 +105,19 @@ export class MergedSyncRules implements RowProcessor {
105105
* @param table The source database table definition, _not_ the individually derived SourceTables.
106106
* @returns
107107
*/
108-
getMatchingSources(table: SourceTableInterface): {
108+
getMatchingSources(pattern: TablePattern): {
109109
bucketDataSources: BucketDataSource[];
110110
parameterIndexLookupCreators: ParameterIndexLookupCreator[];
111111
} {
112112
const bucketDataSources = [...this.resolvedDataSources.values()]
113113
.map((dataSource) => dataSource.source)
114-
.filter((ds) => ds.tableSyncsData(table));
114+
.filter((ds) => ds.getSourceTables().some((table) => table.equals(pattern)));
115115

116116
const parameterIndexLookupCreators: ParameterIndexLookupCreator[] = [
117117
...this.resolvedParameterLookupSources.values()
118118
]
119119
.map((dataSource) => dataSource.source)
120-
.filter((ds) => ds.tableSyncsParameters(table));
120+
.filter((ds) => ds.getSourceTables().some((table) => table.equals(pattern)));
121121
return {
122122
bucketDataSources,
123123
parameterIndexLookupCreators
@@ -128,7 +128,6 @@ export class MergedSyncRules implements RowProcessor {
128128
compatibility: CompatibilityContext = CompatibilityContext.FULL_BACKWARDS_COMPATIBILITY;
129129

130130
getSourceTables(): TablePattern[] {
131-
console.log('tables', this.sourcePatterns);
132131
return this.sourcePatterns;
133132
}
134133

modules/module-mongodb-storage/src/storage/implementation/MongoBucketBatch.ts

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,8 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter {
284284
replicaIdColumns: ref.replicaIdColumns,
285285
snapshotComplete: doc.snapshot_done ?? true,
286286
bucketDataSourceIds: doc.bucket_data_source_ids ?? [],
287-
parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? []
287+
parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [],
288+
pattern: ref.pattern
288289
});
289290
sourceTable.snapshotStatus =
290291
doc.snapshot_status == null
@@ -302,11 +303,7 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter {
302303
}
303304

304305
async resolveTables(options: storage.ResolveTablesOptions): Promise<storage.ResolveTablesResult> {
305-
const sources = this.rowProcessor.getMatchingSources({
306-
connectionTag: options.connection_tag,
307-
name: options.entity_descriptor.name,
308-
schema: options.entity_descriptor.schema
309-
});
306+
const sources = this.rowProcessor.getMatchingSources(options.pattern);
310307
const bucketDataSourceIds = sources.bucketDataSources.map((source) => this.mapping.bucketSourceId(source));
311308
const parameterLookupSourceIds = sources.parameterIndexLookupCreators.map((source) =>
312309
this.mapping.parameterLookupId(source)
@@ -390,7 +387,8 @@ export class MongoBucketDataWriter implements storage.BucketDataWriter {
390387
replicaIdColumns: replicaIdColumns,
391388
snapshotComplete: doc.snapshot_done ?? true,
392389
bucketDataSourceIds: doc.bucket_data_source_ids ?? [],
393-
parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? []
390+
parameterLookupSourceIds: doc.parameter_lookup_source_ids ?? [],
391+
pattern: options.pattern
394392
});
395393
sourceTable.snapshotStatus =
396394
doc.snapshot_status == null

modules/module-mongodb/src/replication/ChangeStream.ts

Lines changed: 66 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,8 @@ import {
3434
} from './MongoRelation.js';
3535
import { MongoSnapshotter } from './MongoSnapshotter.js';
3636
import { CHECKPOINTS_COLLECTION, timestampToDate } from './replication-utils.js';
37+
import { staticFilterToMongoExpression } from './staticFilters.js';
38+
import { inspect } from 'node:util';
3739

3840
export interface ChangeStreamOptions {
3941
connections: MongoManager;
@@ -212,6 +214,7 @@ export class ChangeStream {
212214
private getSourceNamespaceFilters(writer: BucketDataWriter): {
213215
$match: any;
214216
multipleDatabases: boolean;
217+
filters: any[];
215218
} {
216219
const sourceTables = writer.rowProcessor.getSourceTables();
217220

@@ -230,20 +233,31 @@ export class ChangeStream {
230233
multipleDatabases = true;
231234
}
232235

236+
let filterExpression =
237+
tablePattern.filter == null
238+
? { $literal: true }
239+
: staticFilterToMongoExpression(tablePattern.filter, { columnPrefix: '$fullDocument.' });
240+
233241
if (tablePattern.isWildcard) {
234242
$refilters.push({
235243
'ns.db': tablePattern.schema,
236244
'ns.coll': new RegExp('^' + escapeRegExp(tablePattern.tablePrefix))
237245
});
238246
filters.push({
239247
'ns.db': tablePattern.schema,
240-
'ns.coll': new RegExp('^' + escapeRegExp(tablePattern.tablePrefix))
248+
'ns.coll': new RegExp('^' + escapeRegExp(tablePattern.tablePrefix)),
249+
$expr: filterExpression
241250
});
242251
} else {
243252
$inFilters.push({
244253
db: tablePattern.schema,
245254
coll: tablePattern.name
246255
});
256+
filters.push({
257+
'ns.db': tablePattern.schema,
258+
'ns.coll': tablePattern.name,
259+
$expr: filterExpression
260+
});
247261
}
248262
}
249263

@@ -265,9 +279,9 @@ export class ChangeStream {
265279
: // collection-level: filter on coll only
266280
{ 'ns.coll': { $in: $inFilters.map((ns) => ns.coll) } };
267281
if ($refilters.length > 0) {
268-
return { $match: { $or: [nsFilter, ...$refilters] }, multipleDatabases };
282+
return { $match: { $or: [nsFilter, ...$refilters] }, multipleDatabases, filters };
269283
}
270-
return { $match: nsFilter, multipleDatabases };
284+
return { $match: nsFilter, multipleDatabases, filters };
271285
}
272286

273287
private async checkPostImages(db: string, collectionInfo: mongo.CollectionInfo) {
@@ -420,6 +434,12 @@ export class ChangeStream {
420434
{
421435
$match: filters.$match
422436
},
437+
// Not working currently - getting "resumeToken not found"
438+
// {
439+
// $match: {
440+
// $or: filters.filters
441+
// }
442+
// },
423443
{ $changeStreamSplitLargeEvent: {} }
424444
];
425445

@@ -485,37 +505,56 @@ export class ChangeStream {
485505
// Ignore the postImages check in this case.
486506
}
487507

488-
const result = await writer.resolveTables({
489-
connection_id: this.connection_id,
490-
connection_tag: this.connections.connectionTag,
491-
entity_descriptor: descriptor
508+
// What happens here:
509+
// 1. We see a new collection that we haven't observed before.
510+
// 2. We check which table pattern(s) match this collection, _regardless of specific row filters_.
511+
// 3. We resolve the tables for those patterns.
512+
513+
// FIXME: don't scan through it all
514+
// FIXME: handle wildcards
515+
const patterns = writer.rowProcessor.getSourceTables().filter((t) => {
516+
return (
517+
t.connectionTag == this.connections.connectionTag && t.name == descriptor.name && t.schema == descriptor.schema
518+
);
492519
});
493520

494-
const snapshot = options.snapshot;
495-
this.relationCache.set(getCacheIdentifier(descriptor), result.tables);
521+
let allTables: SourceTable[] = [];
522+
for (let pattern of patterns) {
523+
const result = await writer.resolveTables({
524+
connection_id: this.connection_id,
525+
connection_tag: this.connections.connectionTag,
526+
entity_descriptor: descriptor,
527+
pattern: pattern
528+
});
496529

497-
// Drop conflicting collections.
498-
// This is generally not expected for MongoDB source dbs, so we log an error.
499-
if (result.dropTables.length > 0) {
500-
this.logger.error(
501-
`Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}`
502-
);
503-
await writer.drop(result.dropTables);
504-
}
530+
const snapshot = options.snapshot;
531+
this.relationCache.set(getCacheIdentifier(descriptor), result.tables);
505532

506-
// Snapshot if:
507-
// 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere)
508-
// 2. Snapshot is not already done, AND:
509-
// 3. The table is used in sync rules.
510-
for (let table of result.tables) {
511-
const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny;
512-
if (shouldSnapshot) {
513-
this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`);
514-
await this.snapshotter.queueSnapshot(writer, table);
533+
// Drop conflicting collections.
534+
// This is generally not expected for MongoDB source dbs, so we log an error.
535+
if (result.dropTables.length > 0) {
536+
this.logger.error(
537+
`Conflicting collections found for ${JSON.stringify(descriptor)}. Dropping: ${result.dropTables.map((t) => t.id).join(', ')}`
538+
);
539+
await writer.drop(result.dropTables);
515540
}
541+
542+
// Snapshot if:
543+
// 1. Snapshot is requested (false for initial snapshot, since that process handles it elsewhere)
544+
// 2. Snapshot is not already done, AND:
545+
// 3. The table is used in sync rules.
546+
for (let table of result.tables) {
547+
const shouldSnapshot = snapshot && !table.snapshotComplete && table.syncAny;
548+
if (shouldSnapshot) {
549+
this.logger.info(`New collection: ${descriptor.schema}.${descriptor.name}`);
550+
await this.snapshotter.queueSnapshot(writer, table);
551+
}
552+
}
553+
554+
allTables.push(...result.tables);
516555
}
517556

518-
return result.tables;
557+
return allTables;
519558
}
520559

521560
private async drop(writer: storage.BucketDataWriter, entity: SourceEntityDescriptor): Promise<void> {

modules/module-mongodb/src/replication/MongoSnapshotQuery.ts

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import { mongo } from '@powersync/lib-service-mongodb';
22
import { ReplicationAssertionError } from '@powersync/lib-services-framework';
33
import { bson } from '@powersync/service-core';
4+
import { MongoExpression, StaticFilter } from '@powersync/service-sync-rules';
5+
import { staticFilterToMongoExpression } from './staticFilters.js';
46

57
/**
68
* Performs a collection snapshot query, chunking by ranges of _id.
@@ -13,12 +15,21 @@ export class ChunkedSnapshotQuery implements AsyncDisposable {
1315
private lastCursor: mongo.FindCursor | null = null;
1416
private collection: mongo.Collection;
1517
private batchSize: number;
18+
private filter: MongoExpression | null = null;
1619

17-
public constructor(options: { collection: mongo.Collection; batchSize: number; key?: Uint8Array | null }) {
20+
public constructor(options: {
21+
collection: mongo.Collection;
22+
batchSize: number;
23+
key?: Uint8Array | null;
24+
filter?: StaticFilter;
25+
}) {
1826
this.lastKey = options.key ? bson.deserialize(options.key, { useBigInt64: true })._id : null;
1927
this.lastCursor = null;
2028
this.collection = options.collection;
2129
this.batchSize = options.batchSize;
30+
if (options.filter) {
31+
this.filter = staticFilterToMongoExpression(options.filter);
32+
}
2233
}
2334

2435
async nextChunk(): Promise<{ docs: mongo.Document[]; lastKey: Uint8Array } | { docs: []; lastKey: null }> {
@@ -35,8 +46,17 @@ export class ChunkedSnapshotQuery implements AsyncDisposable {
3546
// any parsing as an operator.
3647
// Starting in MongoDB 5.0, this filter can use the _id index. Source:
3748
// https://www.mongodb.com/docs/manual/release-notes/5.0/#general-aggregation-improvements
38-
const filter: mongo.Filter<mongo.Document> =
39-
this.lastKey == null ? {} : { $expr: { $gt: ['$_id', { $literal: this.lastKey }] } };
49+
let filter: mongo.Filter<mongo.Document>;
50+
if (this.lastKey == null) {
51+
filter = this.filter == null ? {} : { $expr: this.filter };
52+
} else {
53+
if (this.filter == null) {
54+
filter = { $expr: { $gt: ['$_id', { $literal: this.lastKey }] } };
55+
} else {
56+
filter = { $and: [{ $expr: { $gt: ['$_id', { $literal: this.lastKey }] } }, { $expr: this.filter }] };
57+
}
58+
}
59+
4060
cursor = this.collection.find(filter, {
4161
readConcern: 'majority',
4262
limit: this.batchSize,

modules/module-mongodb/src/replication/MongoSnapshotter.ts

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,8 @@ export class MongoSnapshotter {
266266
const sourceTables = await writer.resolveTables({
267267
connection_id: this.connection_id,
268268
connection_tag: this.connections.connectionTag,
269-
entity_descriptor: getMongoRelation({ db: schema, coll: collection.name })
269+
entity_descriptor: getMongoRelation({ db: schema, coll: collection.name }),
270+
pattern: tablePattern
270271
});
271272
// TODO: dropTables?
272273
result.push(...sourceTables.tables);
@@ -280,10 +281,12 @@ export class MongoSnapshotter {
280281
let at = table.snapshotStatus?.replicatedCount ?? 0;
281282
const db = this.client.db(table.schema);
282283
const collection = db.collection(table.name);
284+
console.log('snapshot with filter', table.pattern?.filter, table.pattern);
283285
await using query = new ChunkedSnapshotQuery({
284286
collection,
285287
key: table.snapshotStatus?.lastKey,
286-
batchSize: this.snapshotChunkLength
288+
batchSize: this.snapshotChunkLength,
289+
filter: table.pattern?.filter
287290
});
288291
if (query.lastKey != null) {
289292
this.logger.info(

0 commit comments

Comments
 (0)