');
expect(items[0].publishedAt).toEqual(new Date('Mon, 06 Sep 2024 09:00:00 GMT'));
expect(items[0].source).toBe('https://example.com/feed.xml');
expect(items[0].id).toBeDefined();
});
it('parses RSS with only required fields', async () => {
const xml = `
Minimal Article
https://example.com/minimal
`;
const items = await parser.parse(xml, 'https://example.com/feed.xml');
expect(items).toHaveLength(1);
expect(items[0].title).toBe('Minimal Article');
expect(items[0].url).toBe('https://example.com/minimal');
expect(items[0].summary).toBeUndefined();
expect(items[0].content).toBeUndefined();
expect(items[0].publishedAt).toBeInstanceOf(Date);
});
it('parses multiple items', async () => {
const xml = `
Article 1
https://example.com/1
Mon, 06 Sep 2024 09:00:00 GMTArticle 2
https://example.com/2
Tue, 07 Sep 2024 10:00:00 GMT`;
const items = await parser.parse(xml, 'https://example.com/feed.xml');
expect(items).toHaveLength(2);
expect(items[0].title).toBe('Article 1');
expect(items[1].title).toBe('Article 2');
});
it('returns empty array when no items', async () => {
const xml = `
Empty Feed`;
const items = await parser.parse(xml, 'https://example.com/feed.xml');
expect(items).toHaveLength(0);
});
it('throws on missing title', async () => {
const xml = `
https://example.com/article
`;
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
'missing required field: title'
);
});
it('throws on missing link', async () => {
const xml = `
Article Without Link`;
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
'missing required field: link'
);
});
it('throws on invalid XML', async () => {
const xml = 'not xml at all';
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
'Invalid XML'
);
});
it('throws on missing rss root element', async () => {
const xml = '';
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
'missing root element'
);
});
it('parses ISO 8601 date as fallback', async () => {
const xml = `
Test
https://example.com/article
2024-09-06T09:00:00Z`;
const items = await parser.parse(xml, 'https://example.com/feed.xml');
expect(items[0].publishedAt).toEqual(new Date('2024-09-06T09:00:00Z'));
});
it('generates deterministic IDs', async () => {
const xml = `
Test
https://example.com/article
Mon, 06 Sep 2024 09:00:00 GMT`;
const items1 = await parser.parse(xml, 'https://example.com/feed.xml');
const items2 = await parser.parse(xml, 'https://example.com/feed.xml');
expect(items1[0].id).toBe(items2[0].id);
});
it('uses description as content when no content:encoded and description contains HTML', async () => {
// Simulates feeds like NOS that put full HTML content in description
const xml = `
https://nos.nl/l/2613264
Iran ontkent aanvallen te hebben uitgevoerd op de Verenigde Arabische Emiraten.
Gisteren werden er ook al aanvallen gemeld door de VAE.
Onderhandelingen onmogelijk
Iraanse staatsmedia melden dat de Iraanse president Pezeshkian heeft gezegd dat de VS aan de ene kant de druk op Iran opvoert.
]]>Tue, 5 May 2026 21:44:46 +0200`;
const items = await parser.parse(xml, 'https://feeds.nos.nl/nosnieuwsalgemeen');
expect(items).toHaveLength(1);
expect(items[0].title).toBe('Iran ontkent aanvallen VAE');
// Content should contain the full HTML
expect(items[0].content).toContain('
');
// Summary should be extracted from content
expect(items[0].summary).toBeDefined();
expect(items[0].summary).toContain('Iran ontkent aanvallen');
expect(items[0].summary?.length).toBeLessThanOrEqual(210); // 200 + "..."
});
it('uses description as summary when it looks like plain text summary', async () => {
const xml = `
Short Summary Article
https://example.com/article
This is just a brief summary without HTML tagsMon, 06 Sep 2024 09:00:00 GMT`;
const items = await parser.parse(xml, 'https://example.com/feed.xml');
expect(items).toHaveLength(1);
expect(items[0].summary).toBe('This is just a brief summary without HTML tags');
expect(items[0].content).toBeUndefined();
});
it('strips CDATA wrappers from description and content', async () => {
const xml = `
https://example.com/article
This is a very long content with formatting and lots of text to ensure it exceeds the 500 character threshold for being considered full content. Lorem ipsum dolor sit amet, consectetur adipiscing elit. Sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur.]]>Mon, 06 Sep 2024 09:00:00 GMT`;
const items = await parser.parse(xml, 'https://example.com/feed.xml');
expect(items[0].title).toBe('CDATA Title');
expect(items[0].content).toContain('
This is a very long content');
expect(items[0].content).toContain('formatting');
});
it('extracts image URL from enclosure', async () => {
const xml = `
Article with Image
https://example.com/article
Article summaryMon, 06 Sep 2024 09:00:00 GMT`;
const items = await parser.parse(xml, 'https://example.com/feed.xml');
expect(items).toHaveLength(1);
expect(items[0].imageUrl).toBe('https://example.com/image.jpg');
});
it('extracts first image from multiple enclosures', async () => {
const xml = `
Article with Multiple Enclosures
https://example.com/article
Article summaryMon, 06 Sep 2024 09:00:00 GMT`;
const items = await parser.parse(xml, 'https://example.com/feed.xml');
expect(items).toHaveLength(1);
expect(items[0].imageUrl).toBe('https://example.com/image.webp');
});
it('handles items without enclosure', async () => {
const xml = `
Article without Image
https://example.com/article
Article summaryMon, 06 Sep 2024 09:00:00 GMT`;
const items = await parser.parse(xml, 'https://example.com/feed.xml');
expect(items).toHaveLength(1);
expect(items[0].imageUrl).toBeUndefined();
});
});
describe('supports', () => {
it('returns true for application/rss+xml', () => {
expect(parser.supports('application/rss+xml')).toBe(true);
});
it('returns true for text/xml with rss', () => {
expect(parser.supports('text/xml')).toBe(false); // Not strictly RSS
expect(parser.supports('application/rss')).toBe(true);
});
it('returns false for atom content type', () => {
expect(parser.supports('application/atom+xml')).toBe(false);
});
it('is case insensitive', () => {
expect(parser.supports('APPLICATION/RSS+XML')).toBe(true);
});
});
});