Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 52 additions & 1 deletion src/cwv/cwv-audit-result.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,40 @@ const DAILY_THRESHOLD = 1000; // pageviews
const INTERVAL = 7; // days
// The number of top pages with issues that will be included in the report
const TOP_PAGES_COUNT = 15;
const HEAD_REQUEST_TIMEOUT_MS = 10000;

/**
* Performs a HEAD request to the URL and returns true if the response is 4xx
* or if the request fails (timeout/network). Used to skip such URLs from CWV opportunities.
* @param {string} url - The URL to check
* @param {Object} log - Logger instance
* @returns {Promise<boolean>} True if URL should be skipped (4xx or request failed)
*/
export async function isUrl4xxOrFailed(url, log) {
const controller = new AbortController();
const timeoutId = setTimeout(() => controller.abort(), HEAD_REQUEST_TIMEOUT_MS);
try {
const response = await fetch(url, {
method: 'HEAD',
redirect: 'follow',
signal: controller.signal,
headers: {
'User-Agent': 'Mozilla/5.0 (compatible; Spacecat-Audit/1.0)',
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should maybe centralize the user agents we use somewhere so everybody uses the same one.

I also noticed that the internal links also use a HEAD request approach to validate links:

async function checkLinkWithHead(url, log) {

Maybe worth thinking about creating a more centralized helper for both actually to avoid code duplication

},
});
clearTimeout(timeoutId);
const { status } = response;
if (status >= 400 && status < 500) {
log.debug(`[audit-worker-cwv] Skipping URL (4xx): ${url} status=${status}`);
return true;
}
return false;
} catch (err) {
clearTimeout(timeoutId);
log.debug(`[audit-worker-cwv] Skipping URL (HEAD failed): ${url} error=${err.message}`);
return true;
}
}

/**
* Checks if a CWV data entry URL matches a site baseURL.
Expand Down Expand Up @@ -87,9 +121,26 @@ export async function buildCWVAuditResult(context) {
+ `Pages above threshold: ${stats.thresholdCount}`,
);

// Exclude URL entries that return 4xx or HEAD failed from becoming opportunities
const urlEntries = filteredCwvData.filter((entry) => entry.type === 'url');
const skipFlags = await Promise.all(
urlEntries.map((entry) => isUrl4xxOrFailed(entry.url, log)),
);
const urlsToSkip = new Set(
urlEntries.filter((_, i) => skipFlags[i]).map((e) => e.url),
);
const after4xxFilter = filteredCwvData.filter(
(entry) => entry.type !== 'url' || !urlsToSkip.has(entry.url),
);
if (urlsToSkip.size > 0) {
log.info(
`[audit-worker-cwv] siteId: ${siteId} | Excluded ${urlsToSkip.size} URL(s) (4xx or HEAD failed): ${[...urlsToSkip].join(', ')}`,
);
}

return {
auditResult: {
cwv: filteredCwvData,
cwv: after4xxFilter,
auditContext: {
interval: INTERVAL,
},
Expand Down
5 changes: 2 additions & 3 deletions src/cwv/opportunity-sync.js
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,8 @@ export async function syncOpportunitiesAndSuggestions(context) {

// Sync suggestions
const buildKey = (data) => (data.type === 'url' ? data.url : data.pattern);
const maxOrganicForUrls = Math.max(
...auditResult.cwv.filter((entry) => entry.type === 'url').map((entry) => entry.pageviews),
);
const urlPageviews = auditResult.cwv.filter((entry) => entry.type === 'url').map((entry) => entry.pageviews);
const maxOrganicForUrls = urlPageviews.length > 0 ? Math.max(...urlPageviews) : 0;

await syncSuggestions({
opportunity,
Expand Down
42 changes: 41 additions & 1 deletion test/audits/cwv.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,10 +82,13 @@ describe('collectCWVDataAndImportCode Tests', () => {

beforeEach(() => {
context.rumApiClient.query = sandbox.stub().resolves(rumData);
// Stub fetch so 4xx filter does not remove URLs (HEAD returns 200)
sandbox.stub(globalThis, 'fetch').resolves({ status: 200 });
});

afterEach(() => {
nock.cleanAll();
sandbox.restore();
sinon.restore();
site.getDeliveryConfig.reset();
});
Expand Down Expand Up @@ -284,7 +287,7 @@ describe('collectCWVDataAndImportCode Tests', () => {
expect(homepageInResult.pageviews).to.equal(50);
});

it('does not treat grouped URLs as homepage', async () => {
it('does not treat grouped URLs as homepage', async () => {
const groupedData = {
type: 'group', // Not 'url' - should not match homepage logic
// url field is absent for grouped entries (they have pattern instead)
Expand Down Expand Up @@ -424,6 +427,43 @@ describe('collectCWVDataAndImportCode Tests', () => {
expect(suggestionsArg).to.be.an('array').with.lengthOf(4);
});

it('handles audit result with only group entries for maxOrganicForUrls coverage', async () => {
context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([]);
context.dataAccess.Opportunity.create.resolves(oppty);
sinon.stub(GoogleClient, 'createFrom').resolves({});

const auditResultWithGroupsOnly = {
cwv: [
{
type: 'group',
pattern: 'https://example.com/*',
name: 'Some pages',
pageviews: 5000,
organic: 3000,
metrics: [],
},
],
auditContext: { interval: 7 },
};
const mockAuditGroupsOnly = {
getSiteId: () => 'site-id',
getId: () => 'audit-id',
getAuditType: () => Audit.AUDIT_TYPES.CWV,
getAuditResult: () => auditResultWithGroupsOnly,
getFullAuditRef: () => auditUrl,
getAuditedAt: () => '2023-11-27T12:34:56.789Z',
};

const stepContext = { ...context, site, audit: mockAuditGroupsOnly, finalUrl: auditUrl };
await syncOpportunityAndSuggestionsStep(stepContext);

expect(context.dataAccess.Opportunity.create).to.have.been.calledOnce;
expect(oppty.addSuggestions).to.have.been.calledOnce;
const suggestionsArg = oppty.addSuggestions.getCall(0).args[0];
expect(suggestionsArg).to.have.lengthOf(1);
expect(suggestionsArg[0].data.type).to.equal('group');
});

it('creating a new opportunity object fails', async () => {
context.dataAccess.Opportunity.allBySiteIdAndStatus.resolves([]);
context.dataAccess.Opportunity.create.rejects(new Error('big error happened'));
Expand Down
138 changes: 138 additions & 0 deletions test/audits/cwv/cwv-audit-result.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
/*
* Copyright 2025 Adobe. All rights reserved.
* This file is licensed to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy
* of the License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under
* the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS
* OF ANY KIND, either express or implied. See the License for the specific language
* governing permissions and limitations under the License.
*/

/* eslint-env mocha */

import { expect } from 'chai';
import sinon from 'sinon';
import esmock from 'esmock';
import { buildCWVAuditResult, isUrl4xxOrFailed } from '../../../src/cwv/cwv-audit-result.js';

describe('CWV Audit Result', () => {
const sandbox = sinon.createSandbox();
let fetchStub;
let log;

beforeEach(() => {
log = {
info: sandbox.stub(),
debug: sandbox.stub(),
warn: sandbox.stub(),
error: sandbox.stub(),
};
fetchStub = sandbox.stub(globalThis, 'fetch');
});

afterEach(() => {
sandbox.restore();
});

describe('isUrl4xxOrFailed', () => {
it('returns true when response status is 404', async () => {
fetchStub.resolves({ status: 404 });
const result = await isUrl4xxOrFailed('https://example.com/404', log);
expect(result).to.be.true;
expect(fetchStub.calledOnceWith('https://example.com/404', sinon.match.has('method', 'HEAD'))).to.be.true;
});

it('returns true when response status is 403', async () => {
fetchStub.resolves({ status: 403 });
const result = await isUrl4xxOrFailed('https://example.com/forbidden', log);
expect(result).to.be.true;
});

it('returns true when response status is 410', async () => {
fetchStub.resolves({ status: 410 });
const result = await isUrl4xxOrFailed('https://example.com/gone', log);
expect(result).to.be.true;
});

it('returns false when response status is 200', async () => {
fetchStub.resolves({ status: 200 });
const result = await isUrl4xxOrFailed('https://example.com/ok', log);
expect(result).to.be.false;
});

it('returns false when response status is 500 (5xx not 4xx)', async () => {
fetchStub.resolves({ status: 500 });
const result = await isUrl4xxOrFailed('https://example.com/error', log);
expect(result).to.be.false;
});

it('returns true when fetch throws (e.g. timeout)', async () => {
fetchStub.rejects(new Error('network error'));
const result = await isUrl4xxOrFailed('https://example.com/timeout', log);
expect(result).to.be.true;
});
});

describe('buildCWVAuditResult', () => {
it('excludes URL entries that return 4xx from audit result', async () => {
const cwvDataFromRum = [
{ type: 'url', url: 'https://www.lexmark.com/ok', pageviews: 10000, organic: 5000, metrics: [] },
{ type: 'url', url: 'https://www.lexmark.com/etc.clientlibs/bad', pageviews: 8000, organic: 4000, metrics: [] },
];
const mockRumClient = { query: sandbox.stub().resolves(cwvDataFromRum) };
const mockRumClientClass = { createFrom: sandbox.stub().returns(mockRumClient) };

fetchStub
.onFirstCall().resolves({ status: 200 })
.onSecondCall().resolves({ status: 404 });

const { buildCWVAuditResult: build } = await esmock('../../../src/cwv/cwv-audit-result.js', {
'@adobe/spacecat-shared-rum-api-client': { default: mockRumClientClass },
});

const site = {
getId: () => 'site-1',
getBaseURL: () => 'https://www.lexmark.com',
getConfig: () => ({ getGroupedURLs: () => [] }),
};
const context = { site, finalUrl: 'www.lexmark.com', log, env: {} };

const result = await build(context);

const cwvUrls = result.auditResult.cwv.filter((e) => e.type === 'url').map((e) => e.url);
expect(cwvUrls).to.include('https://www.lexmark.com/ok');
expect(cwvUrls).to.not.include('https://www.lexmark.com/etc.clientlibs/bad');
expect(result.auditResult.cwv).to.have.length(1);
});

it('keeps group entries without HEAD check', async () => {
const cwvDataFromRum = [
{ type: 'url', url: 'https://www.example.com/', pageviews: 10000, organic: 5000, metrics: [] },
{ type: 'group', pattern: '/some/*', name: 'Some pages', pageviews: 5000, organic: 3000, metrics: [] },
];
const mockRumClient = { query: sandbox.stub().resolves(cwvDataFromRum) };
const mockRumClientClass = { createFrom: sandbox.stub().returns(mockRumClient) };

fetchStub.resolves({ status: 200 });

const { buildCWVAuditResult: build } = await esmock('../../../src/cwv/cwv-audit-result.js', {
'@adobe/spacecat-shared-rum-api-client': { default: mockRumClientClass },
});

const site = {
getId: () => 'site-1',
getBaseURL: () => 'https://www.example.com',
getConfig: () => ({ getGroupedURLs: () => [] }),
};
const context = { site, finalUrl: 'www.example.com', log, env: {} };

const result = await build(context);

expect(result.auditResult.cwv).to.have.length(2);
expect(result.auditResult.cwv.find((e) => e.type === 'group')).to.exist;
expect(fetchStub.callCount).to.equal(1);
});
});
});