/** * Deduplication module implementation. * Tracks seen item IDs to filter duplicates from feeds. */ import type { Kysely } from 'kysely'; import type { FeedItem } from '../../interfaces/feed.types.js'; import type { IDedup } from '../../interfaces/dedup.interface.js'; import type { Database, SeenIdTable } from '../../infrastructure/db/database.js'; export class DatabaseDedup implements IDedup { private readonly db: Kysely; constructor(db: Kysely) { this.db = db; } async filter(items: FeedItem[]): Promise { if (items.length === 0) { return []; } const ids = items.map((item) => item.id); // Query which IDs are already in the seen table const seenRows = await this.db .selectFrom('seen_ids') .select('id') .where('id', 'in', ids) .execute(); const seenIds = new Set(seenRows.map((row) => row.id)); // Return only items NOT in seen table return items.filter((item) => !seenIds.has(item.id)); } async markSeen(items: FeedItem[]): Promise { if (items.length === 0) { return; } const rows: SeenIdTable[] = items.map((item) => ({ id: item.id, seen_at: new Date().toISOString(), })); // Insert or ignore (idempotent) await this.db .insertInto('seen_ids') .values(rows) .onConflict((oc) => oc.column('id').doNothing()) .execute(); } }