- Add storage module with SQLite persistence via better-sqlite3 - Add deduplication module for feed item dedup - Add infrastructure directory for deployment config - Add .env.example for environment variables - Update dependencies: kysely, better-sqlite3, pg
56 lines
1.4 KiB
TypeScript
56 lines
1.4 KiB
TypeScript
/**
|
|
* Deduplication module implementation.
|
|
* Tracks seen item IDs to filter duplicates from feeds.
|
|
*/
|
|
|
|
import type { Kysely } from 'kysely';
|
|
import type { FeedItem } from '../../interfaces/feed.types.js';
|
|
import type { IDedup } from '../../interfaces/dedup.interface.js';
|
|
import type { Database, SeenIdTable } from '../../infrastructure/db/database.js';
|
|
|
|
export class DatabaseDedup implements IDedup {
|
|
private readonly db: Kysely<Database>;
|
|
|
|
constructor(db: Kysely<Database>) {
|
|
this.db = db;
|
|
}
|
|
|
|
async filter(items: FeedItem[]): Promise<FeedItem[]> {
|
|
if (items.length === 0) {
|
|
return [];
|
|
}
|
|
|
|
const ids = items.map((item) => item.id);
|
|
|
|
// Query which IDs are already in the seen table
|
|
const seenRows = await this.db
|
|
.selectFrom('seen_ids')
|
|
.select('id')
|
|
.where('id', 'in', ids)
|
|
.execute();
|
|
|
|
const seenIds = new Set(seenRows.map((row) => row.id));
|
|
|
|
// Return only items NOT in seen table
|
|
return items.filter((item) => !seenIds.has(item.id));
|
|
}
|
|
|
|
async markSeen(items: FeedItem[]): Promise<void> {
|
|
if (items.length === 0) {
|
|
return;
|
|
}
|
|
|
|
const rows: SeenIdTable[] = items.map((item) => ({
|
|
id: item.id,
|
|
seen_at: new Date().toISOString(),
|
|
}));
|
|
|
|
// Insert or ignore (idempotent)
|
|
await this.db
|
|
.insertInto('seen_ids')
|
|
.values(rows)
|
|
.onConflict((oc) => oc.column('id').doNothing())
|
|
.execute();
|
|
}
|
|
}
|