import type { FeedItem } from '../../interfaces/feed.types.js'; import type { IParser } from '../../interfaces/parser.interface.js'; import { XMLParser } from 'fast-xml-parser'; import { generateId, parseDate, isValidXml } from './utils.js'; interface RssChannel { title?: string; link?: string; description?: string; } interface RssItem { title?: string; link?: string; description?: string; 'content:encoded'?: string; pubDate?: string; guid?: string; } interface RssFeed { rss?: { channel?: { item?: RssItem[] | RssItem; }; }; } /** * Parser for RSS 2.0 feeds. */ export class RssParser implements IParser { private xmlParser: XMLParser; constructor() { this.xmlParser = new XMLParser({ ignoreAttributes: false, attributeNamePrefix: '@_', parseAttributeValue: false, trimValues: true, }); } async parse(xml: string, source: string): Promise { if (!isValidXml(xml)) { throw new Error('Invalid XML: does not appear to be valid RSS/XML'); } let parsed: RssFeed; try { parsed = this.xmlParser.parse(xml) as RssFeed; } catch (error) { throw new Error(`XML parsing failed: ${(error as Error).message}`); } if (!parsed.rss) { throw new Error('Invalid RSS: missing root element'); } const channel = parsed.rss.channel; if (!channel) { throw new Error('Invalid RSS: missing element'); } const items = channel.item; if (!items) { return []; } const itemArray = Array.isArray(items) ? items : [items]; return itemArray.map((item) => this.parseItem(item, source)); } private parseItem(item: RssItem, source: string): FeedItem { if (!item.title) { throw new Error('RSS item missing required field: title'); } if (!item.link) { throw new Error('RSS item missing required field: link'); } const publishedAt = item.pubDate ? parseDate(item.pubDate) : new Date(); const url = item.link; return { id: generateId(url, publishedAt), source, title: this.cleanText(item.title), url, publishedAt, summary: item.description ? this.cleanText(item.description) : undefined, content: item['content:encoded'] ? this.cleanText(item['content:encoded']) : undefined, }; } private cleanText(text: string): string { if (!text) return ''; // Remove CDATA wrappers if present return text .replace(/^$/, '') .trim(); } supports(contentType: string): boolean { return contentType.toLowerCase().includes('rss'); } }