import { describe, it, expect } from 'vitest'; import { RssParser } from './rss.parser.js'; describe('RssParser', () => { const parser = new RssParser(); describe('parse', () => { it('parses valid RSS 2.0 feed with all fields', async () => { const xml = ` Test Article https://example.com/article This is a summary Full content

]]>
Mon, 06 Sep 2024 09:00:00 GMT
`; const items = await parser.parse(xml, 'https://example.com/feed.xml'); expect(items).toHaveLength(1); expect(items[0].title).toBe('Test Article'); expect(items[0].url).toBe('https://example.com/article'); expect(items[0].summary).toBe('This is a summary'); expect(items[0].content).toBe('

Full content

'); expect(items[0].publishedAt).toEqual(new Date('Mon, 06 Sep 2024 09:00:00 GMT')); expect(items[0].source).toBe('https://example.com/feed.xml'); expect(items[0].id).toBeDefined(); }); it('parses RSS with only required fields', async () => { const xml = ` Minimal Article https://example.com/minimal `; const items = await parser.parse(xml, 'https://example.com/feed.xml'); expect(items).toHaveLength(1); expect(items[0].title).toBe('Minimal Article'); expect(items[0].url).toBe('https://example.com/minimal'); expect(items[0].summary).toBeUndefined(); expect(items[0].content).toBeUndefined(); expect(items[0].publishedAt).toBeInstanceOf(Date); }); it('parses multiple items', async () => { const xml = ` Article 1 https://example.com/1 Mon, 06 Sep 2024 09:00:00 GMT Article 2 https://example.com/2 Tue, 07 Sep 2024 10:00:00 GMT `; const items = await parser.parse(xml, 'https://example.com/feed.xml'); expect(items).toHaveLength(2); expect(items[0].title).toBe('Article 1'); expect(items[1].title).toBe('Article 2'); }); it('returns empty array when no items', async () => { const xml = ` Empty Feed `; const items = await parser.parse(xml, 'https://example.com/feed.xml'); expect(items).toHaveLength(0); }); it('throws on missing title', async () => { const xml = ` https://example.com/article `; await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow( 'missing required field: title' ); }); it('throws on missing link', async () => { const xml = ` Article Without Link `; await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow( 'missing required field: link' ); }); it('throws on invalid XML', async () => { const xml = 'not xml at all'; await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow( 'Invalid XML' ); }); it('throws on missing rss root element', async () => { const xml = ''; await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow( 'missing root element' ); }); it('parses ISO 8601 date as fallback', async () => { const xml = ` Test https://example.com/article 2024-09-06T09:00:00Z `; const items = await parser.parse(xml, 'https://example.com/feed.xml'); expect(items[0].publishedAt).toEqual(new Date('2024-09-06T09:00:00Z')); }); it('generates deterministic IDs', async () => { const xml = ` Test https://example.com/article Mon, 06 Sep 2024 09:00:00 GMT `; const items1 = await parser.parse(xml, 'https://example.com/feed.xml'); const items2 = await parser.parse(xml, 'https://example.com/feed.xml'); expect(items1[0].id).toBe(items2[0].id); }); it('uses description as content when no content:encoded and description contains HTML', async () => { // Simulates feeds like NOS that put full HTML content in description const xml = ` <![CDATA[Iran ontkent aanvallen VAE]]> https://nos.nl/l/2613264 Iran ontkent aanvallen te hebben uitgevoerd op de Verenigde Arabische Emiraten.

Gisteren werden er ook al aanvallen gemeld door de VAE.

Onderhandelingen onmogelijk

Iraanse staatsmedia melden dat de Iraanse president Pezeshkian heeft gezegd dat de VS aan de ene kant de druk op Iran opvoert.

]]>
Tue, 5 May 2026 21:44:46 +0200
`; const items = await parser.parse(xml, 'https://feeds.nos.nl/nosnieuwsalgemeen'); expect(items).toHaveLength(1); expect(items[0].title).toBe('Iran ontkent aanvallen VAE'); // Content should contain the full HTML expect(items[0].content).toContain('

Iran ontkent aanvallen'); expect(items[0].content).toContain('

Onderhandelingen onmogelijk

'); // Summary should be extracted from content expect(items[0].summary).toBeDefined(); expect(items[0].summary).toContain('Iran ontkent aanvallen'); expect(items[0].summary?.length).toBeLessThanOrEqual(210); // 200 + "..." }); it('uses description as summary when it looks like plain text summary', async () => { const xml = ` Short Summary Article https://example.com/article This is just a brief summary without HTML tags Mon, 06 Sep 2024 09:00:00 GMT `; const items = await parser.parse(xml, 'https://example.com/feed.xml'); expect(items).toHaveLength(1); expect(items[0].summary).toBe('This is just a brief summary without HTML tags'); expect(items[0].content).toBeUndefined(); }); it('strips CDATA wrappers from description and content', async () => { const xml = ` <![CDATA[CDATA Title]]> https://example.com/article This is a very long content with formatting and lots of text to ensure it exceeds the 500 character threshold for being considered full content. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.

]]>
Mon, 06 Sep 2024 09:00:00 GMT
`; const items = await parser.parse(xml, 'https://example.com/feed.xml'); expect(items[0].title).toBe('CDATA Title'); expect(items[0].content).toContain('

This is a very long content'); expect(items[0].content).toContain('formatting'); }); it('extracts image URL from enclosure', async () => { const xml = ` Article with Image https://example.com/article Article summary Mon, 06 Sep 2024 09:00:00 GMT `; const items = await parser.parse(xml, 'https://example.com/feed.xml'); expect(items).toHaveLength(1); expect(items[0].imageUrl).toBe('https://example.com/image.jpg'); }); it('extracts first image from multiple enclosures', async () => { const xml = ` Article with Multiple Enclosures https://example.com/article Article summary Mon, 06 Sep 2024 09:00:00 GMT `; const items = await parser.parse(xml, 'https://example.com/feed.xml'); expect(items).toHaveLength(1); expect(items[0].imageUrl).toBe('https://example.com/image.webp'); }); it('handles items without enclosure', async () => { const xml = ` Article without Image https://example.com/article Article summary Mon, 06 Sep 2024 09:00:00 GMT `; const items = await parser.parse(xml, 'https://example.com/feed.xml'); expect(items).toHaveLength(1); expect(items[0].imageUrl).toBeUndefined(); }); }); describe('supports', () => { it('returns true for application/rss+xml', () => { expect(parser.supports('application/rss+xml')).toBe(true); }); it('returns true for text/xml with rss', () => { expect(parser.supports('text/xml')).toBe(false); // Not strictly RSS expect(parser.supports('application/rss')).toBe(true); }); it('returns false for atom content type', () => { expect(parser.supports('application/atom+xml')).toBe(false); }); it('is case insensitive', () => { expect(parser.supports('APPLICATION/RSS+XML')).toBe(true); }); }); });