- Create separate RssParser and AtomParser implementing IParser interface - Add utility functions for ID generation (djb2 hash) and date parsing - Support both RSS (RFC 822) and Atom (ISO 8601) date formats - Handle Atom elements with attributes (type="html") via #text property - Map RSS <description> to summary and <content:encoded> to content - Map Atom <summary> to summary and <content> to content - Prefer Atom link[@rel="alternate"] for article URLs - Throw descriptive errors for malformed XML and missing required fields - Add comprehensive test coverage for both parsers (32 tests total)
193 lines
6.0 KiB
TypeScript
193 lines
6.0 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
|
import { RssParser } from './rss.parser.js';
|
|
|
|
describe('RssParser', () => {
|
|
const parser = new RssParser();
|
|
|
|
describe('parse', () => {
|
|
it('parses valid RSS 2.0 feed with all fields', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<item>
|
|
<title>Test Article</title>
|
|
<link>https://example.com/article</link>
|
|
<description>This is a summary</description>
|
|
<content:encoded><![CDATA[<p>Full content</p>]]></content:encoded>
|
|
<pubDate>Mon, 06 Sep 2024 09:00:00 GMT</pubDate>
|
|
</item>
|
|
</channel>
|
|
</rss>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items).toHaveLength(1);
|
|
expect(items[0].title).toBe('Test Article');
|
|
expect(items[0].url).toBe('https://example.com/article');
|
|
expect(items[0].summary).toBe('This is a summary');
|
|
expect(items[0].content).toBe('<p>Full content</p>');
|
|
expect(items[0].publishedAt).toEqual(new Date('Mon, 06 Sep 2024 09:00:00 GMT'));
|
|
expect(items[0].source).toBe('https://example.com/feed.xml');
|
|
expect(items[0].id).toBeDefined();
|
|
});
|
|
|
|
it('parses RSS with only required fields', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<item>
|
|
<title>Minimal Article</title>
|
|
<link>https://example.com/minimal</link>
|
|
</item>
|
|
</channel>
|
|
</rss>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items).toHaveLength(1);
|
|
expect(items[0].title).toBe('Minimal Article');
|
|
expect(items[0].url).toBe('https://example.com/minimal');
|
|
expect(items[0].summary).toBeUndefined();
|
|
expect(items[0].content).toBeUndefined();
|
|
expect(items[0].publishedAt).toBeInstanceOf(Date);
|
|
});
|
|
|
|
it('parses multiple items', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<item>
|
|
<title>Article 1</title>
|
|
<link>https://example.com/1</link>
|
|
<pubDate>Mon, 06 Sep 2024 09:00:00 GMT</pubDate>
|
|
</item>
|
|
<item>
|
|
<title>Article 2</title>
|
|
<link>https://example.com/2</link>
|
|
<pubDate>Tue, 07 Sep 2024 10:00:00 GMT</pubDate>
|
|
</item>
|
|
</channel>
|
|
</rss>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items).toHaveLength(2);
|
|
expect(items[0].title).toBe('Article 1');
|
|
expect(items[1].title).toBe('Article 2');
|
|
});
|
|
|
|
it('returns empty array when no items', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<title>Empty Feed</title>
|
|
</channel>
|
|
</rss>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items).toHaveLength(0);
|
|
});
|
|
|
|
it('throws on missing title', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<item>
|
|
<link>https://example.com/article</link>
|
|
</item>
|
|
</channel>
|
|
</rss>`;
|
|
|
|
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
|
|
'missing required field: title'
|
|
);
|
|
});
|
|
|
|
it('throws on missing link', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<item>
|
|
<title>Article Without Link</title>
|
|
</item>
|
|
</channel>
|
|
</rss>`;
|
|
|
|
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
|
|
'missing required field: link'
|
|
);
|
|
});
|
|
|
|
it('throws on invalid XML', async () => {
|
|
const xml = 'not xml at all';
|
|
|
|
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
|
|
'Invalid XML'
|
|
);
|
|
});
|
|
|
|
it('throws on missing rss root element', async () => {
|
|
const xml = '<?xml version="1.0"?><feed></feed>';
|
|
|
|
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
|
|
'missing <rss> root element'
|
|
);
|
|
});
|
|
|
|
it('parses ISO 8601 date as fallback', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<item>
|
|
<title>Test</title>
|
|
<link>https://example.com/article</link>
|
|
<pubDate>2024-09-06T09:00:00Z</pubDate>
|
|
</item>
|
|
</channel>
|
|
</rss>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items[0].publishedAt).toEqual(new Date('2024-09-06T09:00:00Z'));
|
|
});
|
|
|
|
it('generates deterministic IDs', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<rss version="2.0">
|
|
<channel>
|
|
<item>
|
|
<title>Test</title>
|
|
<link>https://example.com/article</link>
|
|
<pubDate>Mon, 06 Sep 2024 09:00:00 GMT</pubDate>
|
|
</item>
|
|
</channel>
|
|
</rss>`;
|
|
|
|
const items1 = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
const items2 = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items1[0].id).toBe(items2[0].id);
|
|
});
|
|
});
|
|
|
|
describe('supports', () => {
|
|
it('returns true for application/rss+xml', () => {
|
|
expect(parser.supports('application/rss+xml')).toBe(true);
|
|
});
|
|
|
|
it('returns true for text/xml with rss', () => {
|
|
expect(parser.supports('text/xml')).toBe(false); // Not strictly RSS
|
|
expect(parser.supports('application/rss')).toBe(true);
|
|
});
|
|
|
|
it('returns false for atom content type', () => {
|
|
expect(parser.supports('application/atom+xml')).toBe(false);
|
|
});
|
|
|
|
it('is case insensitive', () => {
|
|
expect(parser.supports('APPLICATION/RSS+XML')).toBe(true);
|
|
});
|
|
});
|
|
});
|