Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 24 additions & 6 deletions src/__tests__/commands/scrape.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ describe('executeScrape', () => {

await executeScrape({
url: 'https://example.com',
format: 'html',
formats: ['html'],
});

expect(mockClient.scrape).toHaveBeenCalledWith('https://example.com', {
Expand Down Expand Up @@ -97,7 +97,7 @@ describe('executeScrape', () => {

await executeScrape({
url: 'https://example.com',
format: 'markdown',
formats: ['markdown'],
screenshot: true,
});

Expand Down Expand Up @@ -172,7 +172,7 @@ describe('executeScrape', () => {

await executeScrape({
url: 'https://example.com',
format: 'markdown',
formats: ['markdown'],
screenshot: true,
onlyMainContent: true,
waitFor: 3000,
Expand Down Expand Up @@ -256,21 +256,39 @@ describe('executeScrape', () => {

describe('Type safety', () => {
it('should accept valid ScrapeFormat types', async () => {
const formats: Array<'markdown' | 'html' | 'rawHtml' | 'links'> = [
const formatList: Array<'markdown' | 'html' | 'rawHtml' | 'links'> = [
'markdown',
'html',
'rawHtml',
'links',
];

for (const format of formats) {
for (const format of formatList) {
mockClient.scrape.mockResolvedValue({ [format]: 'test' });
const result = await executeScrape({
url: 'https://example.com',
format,
formats: [format],
});
expect(result.success).toBe(true);
}
});

it('should accept multiple formats', async () => {
mockClient.scrape.mockResolvedValue({
markdown: '# Test',
links: ['http://a.com'],
images: ['http://img.com/a.png'],
});

const result = await executeScrape({
url: 'https://example.com',
formats: ['markdown', 'links', 'images'],
});

expect(result.success).toBe(true);
expect(mockClient.scrape).toHaveBeenCalledWith('https://example.com', {
formats: ['markdown', 'links', 'images'],
});
});
});
});
33 changes: 24 additions & 9 deletions src/commands/scrape.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@
*/

import type { FormatOption } from '@mendable/firecrawl-js';
import type { ScrapeOptions, ScrapeResult } from '../types/scrape';
import type {
ScrapeOptions,
ScrapeResult,
ScrapeFormat,
} from '../types/scrape';
import { getClient } from '../utils/client';
import { handleScrapeOutput } from '../utils/output';

Expand Down Expand Up @@ -51,15 +55,14 @@ export async function executeScrape(
// Build scrape options
const formats: FormatOption[] = [];

if (options.format) {
formats.push(options.format);
// Add requested formats
if (options.formats && options.formats.length > 0) {
formats.push(...options.formats);
}

if (options.screenshot) {
// Add screenshot format if not already included
if (!formats.includes('screenshot')) {
formats.push('screenshot');
}
// Add screenshot format if requested and not already included
if (options.screenshot && !formats.includes('screenshot')) {
formats.push('screenshot');
}

// If no formats specified, default to markdown
Expand Down Expand Up @@ -123,5 +126,17 @@ export async function handleScrapeCommand(
options: ScrapeOptions
): Promise<void> {
const result = await executeScrape(options);
handleScrapeOutput(result, options.format, options.output, options.pretty);

// Determine effective formats for output handling
const effectiveFormats: ScrapeFormat[] =
options.formats && options.formats.length > 0
? [...options.formats]
: ['markdown'];

// Add screenshot to effective formats if it was requested separately
if (options.screenshot && !effectiveFormats.includes('screenshot')) {
effectiveFormats.push('screenshot');
}

handleScrapeOutput(result, effectiveFormats, options.output, options.pretty);
}
4 changes: 2 additions & 2 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -54,8 +54,8 @@ function createScrapeCommand(): Command {
)
.option('-H, --html', 'Output raw HTML (shortcut for --format html)')
.option(
'-f, --format <format>',
'Output format: markdown, html, rawHtml, links, images, screenshot, summary, changeTracking, json, attributes, branding',
'-f, --format <formats>',
'Output format(s). Multiple formats can be specified with commas (e.g., "markdown,links,images"). Available: markdown, html, rawHtml, links, images, screenshot, summary, changeTracking, json, attributes, branding. Single format outputs raw content; multiple formats output JSON.',
'markdown'
)
.option('--only-main-content', 'Include only main content', false)
Expand Down
4 changes: 2 additions & 2 deletions src/types/scrape.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ export type ScrapeFormat =
export interface ScrapeOptions {
/** URL to scrape */
url: string;
/** Output format (markdown, html, etc.) */
format?: ScrapeFormat;
/** Output format(s) - single format or array of formats */
formats?: ScrapeFormat[];
/** Include only main content */
onlyMainContent?: boolean;
/** Wait time before scraping (ms) */
Expand Down
68 changes: 66 additions & 2 deletions src/utils/options.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,79 @@
* Option parsing utilities
*/

import type { ScrapeOptions } from '../types/scrape';
import type { ScrapeOptions, ScrapeFormat } from '../types/scrape';

/**
* Valid scrape format values
*/
const VALID_FORMATS: ScrapeFormat[] = [
'markdown',
'html',
'rawHtml',
'links',
'images',
'screenshot',
'summary',
'changeTracking',
'json',
'attributes',
'branding',
];

/**
* Map from lowercase to correct camelCase format
*/
const FORMAT_MAP: Record<string, ScrapeFormat> = Object.fromEntries(
VALID_FORMATS.map((f) => [f.toLowerCase(), f])
) as Record<string, ScrapeFormat>;

/**
* Parse format string into array of ScrapeFormat
* Handles comma-separated values: "markdown,links,images"
* Case-insensitive input, returns correct camelCase for API
*/
export function parseFormats(formatString: string): ScrapeFormat[] {
const inputFormats = formatString
.split(',')
.map((f) => f.trim().toLowerCase())
.filter((f) => f.length > 0);

// Validate and map to correct casing
const invalidFormats: string[] = [];
const validFormats: ScrapeFormat[] = [];

for (const input of inputFormats) {
const mapped = FORMAT_MAP[input];
if (mapped) {
validFormats.push(mapped);
} else {
invalidFormats.push(input);
}
}

if (invalidFormats.length > 0) {
throw new Error(
`Invalid format(s): ${invalidFormats.join(', ')}. Valid formats are: ${VALID_FORMATS.join(', ')}`
);
}

// Remove duplicates while preserving order
return [...new Set(validFormats)];
}

/**
* Convert commander options to ScrapeOptions
*/
export function parseScrapeOptions(options: any): ScrapeOptions {
// Parse formats from comma-separated string
let formats: ScrapeFormat[] | undefined;
if (options.format) {
formats = parseFormats(options.format);
}

return {
url: options.url,
format: options.format,
formats,
onlyMainContent: options.onlyMainContent,
waitFor: options.waitFor,
screenshot: options.screenshot,
Expand Down
Loading