- Create separate RssParser and AtomParser implementing IParser interface - Add utility functions for ID generation (djb2 hash) and date parsing - Support both RSS (RFC 822) and Atom (ISO 8601) date formats - Handle Atom elements with attributes (type="html") via #text property - Map RSS <description> to summary and <content:encoded> to content - Map Atom <summary> to summary and <content> to content - Prefer Atom link[@rel="alternate"] for article URLs - Throw descriptive errors for malformed XML and missing required fields - Add comprehensive test coverage for both parsers (32 tests total)
237 lines
7.9 KiB
TypeScript
237 lines
7.9 KiB
TypeScript
import { describe, it, expect } from 'vitest';
|
|
import { AtomParser } from './atom.parser.js';
|
|
|
|
describe('AtomParser', () => {
|
|
const parser = new AtomParser();
|
|
|
|
describe('parse', () => {
|
|
it('parses valid Atom feed with all fields', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title>Test Article</title>
|
|
<link rel="alternate" href="https://example.com/article"/>
|
|
<summary type="html"><![CDATA[This is a summary]]></summary>
|
|
<content type="html"><![CDATA[<p>Full content</p>]]></content>
|
|
<published>2024-09-06T09:00:00Z</published>
|
|
</entry>
|
|
</feed>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items).toHaveLength(1);
|
|
expect(items[0].title).toBe('Test Article');
|
|
expect(items[0].url).toBe('https://example.com/article');
|
|
expect(items[0].summary).toBe('This is a summary');
|
|
expect(items[0].content).toBe('<p>Full content</p>');
|
|
expect(items[0].publishedAt).toEqual(new Date('2024-09-06T09:00:00Z'));
|
|
expect(items[0].source).toBe('https://example.com/feed.xml');
|
|
expect(items[0].id).toBeDefined();
|
|
});
|
|
|
|
it('parses Atom with only required fields', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title>Minimal Article</title>
|
|
<link href="https://example.com/minimal"/>
|
|
</entry>
|
|
</feed>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items).toHaveLength(1);
|
|
expect(items[0].title).toBe('Minimal Article');
|
|
expect(items[0].url).toBe('https://example.com/minimal');
|
|
expect(items[0].summary).toBeUndefined();
|
|
expect(items[0].content).toBeUndefined();
|
|
expect(items[0].publishedAt).toBeInstanceOf(Date);
|
|
});
|
|
|
|
it('parses multiple entries', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title>Article 1</title>
|
|
<link href="https://example.com/1"/>
|
|
<published>2024-09-06T09:00:00Z</published>
|
|
</entry>
|
|
<entry>
|
|
<title>Article 2</title>
|
|
<link href="https://example.com/2"/>
|
|
<published>2024-09-07T10:00:00Z</published>
|
|
</entry>
|
|
</feed>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items).toHaveLength(2);
|
|
expect(items[0].title).toBe('Article 1');
|
|
expect(items[1].title).toBe('Article 2');
|
|
});
|
|
|
|
it('returns empty array when no entries', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<title>Empty Feed</title>
|
|
</feed>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items).toHaveLength(0);
|
|
});
|
|
|
|
it('prefers rel="alternate" link', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title>Test</title>
|
|
<link rel="self" href="https://example.com/feed"/>
|
|
<link rel="alternate" href="https://example.com/article"/>
|
|
</entry>
|
|
</feed>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items[0].url).toBe('https://example.com/article');
|
|
});
|
|
|
|
it('falls back to first non-self link', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title>Test</title>
|
|
<link rel="self" href="https://example.com/feed"/>
|
|
<link href="https://example.com/article"/>
|
|
</entry>
|
|
</feed>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items[0].url).toBe('https://example.com/article');
|
|
});
|
|
|
|
it('throws on missing title', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<link href="https://example.com/article"/>
|
|
</entry>
|
|
</feed>`;
|
|
|
|
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
|
|
'missing required field: title'
|
|
);
|
|
});
|
|
|
|
it('throws on missing link with href', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title>Article Without Link</title>
|
|
</entry>
|
|
</feed>`;
|
|
|
|
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
|
|
'missing required field: link with href'
|
|
);
|
|
});
|
|
|
|
it('throws on invalid XML', async () => {
|
|
const xml = 'not xml at all';
|
|
|
|
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
|
|
'Invalid XML'
|
|
);
|
|
});
|
|
|
|
it('throws on missing feed root element', async () => {
|
|
const xml = '<?xml version="1.0"?><rss></rss>';
|
|
|
|
await expect(parser.parse(xml, 'https://example.com/feed.xml')).rejects.toThrow(
|
|
'missing <feed> root element'
|
|
);
|
|
});
|
|
|
|
it('uses <updated> when <published> is missing', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title>Test</title>
|
|
<link href="https://example.com/article"/>
|
|
<updated>2024-09-06T09:00:00Z</updated>
|
|
</entry>
|
|
</feed>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items[0].publishedAt).toEqual(new Date('2024-09-06T09:00:00Z'));
|
|
});
|
|
|
|
it('prefers <published> over <updated>', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title>Test</title>
|
|
<link href="https://example.com/article"/>
|
|
<published>2024-09-06T09:00:00Z</published>
|
|
<updated>2024-09-07T10:00:00Z</updated>
|
|
</entry>
|
|
</feed>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items[0].publishedAt).toEqual(new Date('2024-09-06T09:00:00Z'));
|
|
});
|
|
|
|
it('generates deterministic IDs', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title>Test</title>
|
|
<link href="https://example.com/article"/>
|
|
<published>2024-09-06T09:00:00Z</published>
|
|
</entry>
|
|
</feed>`;
|
|
|
|
const items1 = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
const items2 = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items1[0].id).toBe(items2[0].id);
|
|
});
|
|
|
|
it('handles multiple links in array format', async () => {
|
|
const xml = `<?xml version="1.0"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title>Test</title>
|
|
<link rel="self" href="https://example.com/feed"/>
|
|
<link rel="alternate" href="https://example.com/article"/>
|
|
</entry>
|
|
</feed>`;
|
|
|
|
const items = await parser.parse(xml, 'https://example.com/feed.xml');
|
|
|
|
expect(items[0].url).toBe('https://example.com/article');
|
|
});
|
|
});
|
|
|
|
describe('supports', () => {
|
|
it('returns true for application/atom+xml', () => {
|
|
expect(parser.supports('application/atom+xml')).toBe(true);
|
|
});
|
|
|
|
it('returns true for atom in content type', () => {
|
|
expect(parser.supports('application/atom')).toBe(true);
|
|
});
|
|
|
|
it('returns false for rss content type', () => {
|
|
expect(parser.supports('application/rss+xml')).toBe(false);
|
|
});
|
|
|
|
it('is case insensitive', () => {
|
|
expect(parser.supports('APPLICATION/ATOM+XML')).toBe(true);
|
|
});
|
|
});
|
|
});
|