From 0c37374328839864cf06dabae8624c869250abdd Mon Sep 17 00:00:00 2001 From: Yaashi Madan Date: Tue, 10 Mar 2026 16:53:32 +0530 Subject: [PATCH 1/5] feat: add backfill support for llm-error-pages --- src/llm-error-pages/guidance-handler.js | 16 +- src/llm-error-pages/handler.js | 339 +++++++++++--------- src/llm-error-pages/utils.js | 52 ++- test/audits/llm-error-pages/handler.test.js | 41 ++- test/audits/llm-error-pages/utils.test.js | 22 ++ 5 files changed, 265 insertions(+), 205 deletions(-) diff --git a/src/llm-error-pages/guidance-handler.js b/src/llm-error-pages/guidance-handler.js index 1c1e70e6b4..0bdbf48486 100644 --- a/src/llm-error-pages/guidance-handler.js +++ b/src/llm-error-pages/guidance-handler.js @@ -17,12 +17,22 @@ import { } from '../utils/report-uploader.js'; import { generateReportingPeriods, - generatePeriodIdentifier, getS3Config, toPathOnly, SPREADSHEET_COLUMNS, } from './utils.js'; +function derivePeriodFromBrokenLinks(brokenLinks = []) { + const periodRegex = /llm-404-suggestion-(w\d{2}-\d{4})-/; + for (const link of brokenLinks) { + const match = periodRegex.exec(link.suggestionId || ''); + if (match && match[1]) { + return match[1]; + } + } + return null; +} + /** * Handles Mystique responses for LLM error pages and updates suggestions with AI data * @param {Object} message - Message from Mystique with AI suggestions @@ -53,8 +63,8 @@ export default async function handler(message, context) { // Read-modify-write the weekly 404 Excel file in SharePoint try { const sharepointClient = await createLLMOSharepointClient(context); - const week = generateReportingPeriods().weeks[0]; - const derivedPeriod = generatePeriodIdentifier(week.startDate, week.endDate); + const derivedPeriod = derivePeriodFromBrokenLinks(brokenLinks) + || generateReportingPeriods(new Date(), [-1]).weeks[0].periodIdentifier; const llmoFolder = site.getConfig()?.getLlmoDataFolder?.() || s3Config.customerName; const outputDir = `${llmoFolder}/agentic-traffic`; const filename = `agentictraffic-errors-404-${derivedPeriod}.xlsx`; diff --git a/src/llm-error-pages/handler.js b/src/llm-error-pages/handler.js index 6ebc55576f..36d601dfb0 100644 --- a/src/llm-error-pages/handler.js +++ b/src/llm-error-pages/handler.js @@ -17,7 +17,6 @@ import { AuditBuilder } from '../common/audit-builder.js'; import { getS3Config, generateReportingPeriods, - generatePeriodIdentifier, processErrorPagesResults, buildLlmErrorPagesQuery, getAllLlmProviders, @@ -126,10 +125,12 @@ export async function submitForScraping(context) { } /** - * Step 3: Run audit, generate Excel reports, and send to Mystique + * Step 3: Run audit, generate Excel reports, and send to Mystique. + * Supports multiple weeks via auditContext.weekOffset for backfill. */ +/* eslint-disable no-await-in-loop */ export async function runAuditAndSendToMystique(context) { - const { log, site } = context; + const { log, site, auditContext = {} } = context; const s3Config = await getS3Config(site, context); const url = site.getBaseURL(); @@ -138,160 +139,169 @@ export async function runAuditAndSendToMystique(context) { try { const athenaClient = AWSAthenaClient.fromContext(context, s3Config.getAthenaTempLocation()); - const week = generateReportingPeriods().weeks[0]; - const { startDate, endDate } = week; - const periodIdentifier = generatePeriodIdentifier(startDate, endDate); - log.info(`[LLM-ERROR-PAGES] Running weekly audit for ${periodIdentifier}`); - - // Get site configuration - const filters = site.getConfig()?.getLlmoCdnlogsFilter?.() || []; - const siteFilters = buildSiteFilters(filters, site); - - // Build and execute query - const query = await buildLlmErrorPagesQuery({ - databaseName: s3Config.databaseName, - tableName: s3Config.tableName, - startDate, - endDate, - llmProviders: getAllLlmProviders(), - siteFilters, - site, - }); - - log.info('[LLM-ERROR-PAGES] Executing query...'); - const sqlQueryDescription = '[Athena Query] LLM error pages analysis'; - const results = await athenaClient.query( - query, - s3Config.databaseName, - sqlQueryDescription, - ); - - // Process results - const processedResults = processErrorPagesResults(results); - const categorizedResults = categorizeErrorsByStatusCode(processedResults.errorPages); - - // Prepare SharePoint client and output location - const sharepointClient = await createLLMOSharepointClient(context); - const llmoFolder = site.getConfig()?.getLlmoDataFolder?.() || s3Config.customerName; - const outputLocation = `${llmoFolder}/agentic-traffic`; - - const buildFilename = (code) => `agentictraffic-errors-${code}-${periodIdentifier}.xlsx`; - - const writeCategoryExcel = async (code, errors) => { - if (!errors || errors.length === 0) return; - - /* c8 ignore next */ - const sorted = [...errors].sort((a, b) => (b.total_requests || 0) - (a.total_requests || 0)); - - const workbook = new ExcelJS.Workbook(); - const sheet = workbook.addWorksheet('data'); - sheet.addRow(SPREADSHEET_COLUMNS); - - sorted.forEach((e) => { - sheet.addRow([ - e.agent_type || '', - e.user_agent || '', - e.total_requests || 0, - e.avg_ttfb_ms ?? '', - /* c8 ignore next */ - validateCountryCode(e.country_code), - /* c8 ignore next */ - e.url || '', - e.product || '', - e.category || '', - '', - '', - '', - ]); - }); + const isMonday = new Date().getUTCDay() === 1; + let weekOffsets; + if (auditContext.weekOffset !== undefined) { + weekOffsets = [auditContext.weekOffset]; + } else if (isMonday) { + weekOffsets = [-1, 0]; + } else { + weekOffsets = [0]; + } + + const { weeks } = generateReportingPeriods(new Date(), weekOffsets); + const auditResults = []; + + for (const week of weeks) { + const { startDate, endDate, periodIdentifier } = week; + log.info(`[LLM-ERROR-PAGES] Running weekly audit for ${periodIdentifier}`); - const filename = buildFilename(code); - await saveExcelReport({ - workbook, - outputLocation, - log, - sharepointClient, - filename, + const filters = site.getConfig()?.getLlmoCdnlogsFilter?.() || []; + const siteFilters = buildSiteFilters(filters, site); + + const query = await buildLlmErrorPagesQuery({ + databaseName: s3Config.databaseName, + tableName: s3Config.tableName, + startDate, + endDate, + llmProviders: getAllLlmProviders(), + siteFilters, + site, }); - log.info(`[LLM-ERROR-PAGES] Uploaded Excel for ${code}: ${filename} (${sorted.length} rows)`); - }; - // Generate and upload Excel files for each category - await Promise.all([ - writeCategoryExcel('404', categorizedResults[404]?.slice(0, 50)), - writeCategoryExcel('403', categorizedResults[403]), - writeCategoryExcel('5xx', categorizedResults['5xx']), - ]); - - log.info(`[LLM-ERROR-PAGES] Found ${processedResults.totalErrors} total errors across ${processedResults.summary.uniqueUrls} unique URLs`); - - // Send to Mystique if configured - const { - dataAccess, sqs, env, audit, - } = context; - - if (sqs && env?.QUEUE_SPACECAT_TO_MYSTIQUE) { - const errors404 = categorizedResults[404] || []; - - if (errors404.length > 0) { - const messageBaseUrl = site.getBaseURL?.() || ''; - const consolidated404 = consolidateErrorsByUrl(errors404); - const sorted404 = sortErrorsByTrafficVolume(consolidated404).slice(0, 50); - - // Try to get top agentic URLs from Athena first for alternative URLs - let alternativeUrls = await getTopAgenticUrlsFromAthena(site, context); - - // Fallback to Ahrefs if Athena returns no data - if (!alternativeUrls || alternativeUrls.length === 0) { - log.info('[LLM-ERROR-PAGES] No agentic URLs from Athena, falling back to Ahrefs'); - const { SiteTopPage } = dataAccess; - const topPages = await SiteTopPage.allBySiteIdAndSourceAndGeo(site.getId(), 'ahrefs', 'global'); - alternativeUrls = topPages.map((page) => page.getUrl()); - } + log.info('[LLM-ERROR-PAGES] Executing query...'); + const sqlQueryDescription = '[Athena Query] LLM error pages analysis'; + const results = await athenaClient.query( + query, + s3Config.databaseName, + sqlQueryDescription, + ); + + const processedResults = processErrorPagesResults(results); + const categorizedResults = categorizeErrorsByStatusCode(processedResults.errorPages); + + const sharepointClient = await createLLMOSharepointClient(context); + const llmoFolder = site.getConfig()?.getLlmoDataFolder?.() || s3Config.customerName; + const outputLocation = `${llmoFolder}/agentic-traffic`; + + const buildFilename = (code) => `agentictraffic-errors-${code}-${periodIdentifier}.xlsx`; + + const writeCategoryExcel = async (code, errors) => { + if (!errors || errors.length === 0) return; + + /* c8 ignore next 2 */ + const sorted = [...errors].sort( + (a, b) => (b.total_requests || 0) - (a.total_requests || 0), + ); + + const workbook = new ExcelJS.Workbook(); + const sheet = workbook.addWorksheet('data'); + sheet.addRow(SPREADSHEET_COLUMNS); + + sorted.forEach((e) => { + sheet.addRow([ + e.agent_type || '', + e.user_agent || '', + e.total_requests || 0, + e.avg_ttfb_ms ?? '', + /* c8 ignore next */ + validateCountryCode(e.country_code), + /* c8 ignore next */ + e.url || '', + e.product || '', + e.category || '', + '', + '', + '', + ]); + }); - // Consolidate by URL and combine user agents - const urlToUserAgentsMap = new Map(); - sorted404.forEach((errorPage) => { - const path = toPathOnly(errorPage.url, messageBaseUrl); - const fullUrl = messageBaseUrl ? new URL(path, messageBaseUrl).toString() : path; - if (!urlToUserAgentsMap.has(fullUrl)) { - urlToUserAgentsMap.set(fullUrl, new Set()); - } - urlToUserAgentsMap.get(fullUrl).add(errorPage.userAgent); + const filename = buildFilename(code); + await saveExcelReport({ + workbook, + outputLocation, + log, + sharepointClient, + filename, }); + log.info(`[LLM-ERROR-PAGES] Uploaded Excel for ${code}: ${filename} (${sorted.length} rows)`); + }; + + await Promise.all([ + writeCategoryExcel('404', categorizedResults[404]?.slice(0, 50)), + writeCategoryExcel('403', categorizedResults[403]), + writeCategoryExcel('5xx', categorizedResults['5xx']), + ]); + + log.info( + '[LLM-ERROR-PAGES] Found %d total errors across %d unique URLs', + processedResults.totalErrors, + processedResults.summary.uniqueUrls, + ); + + const { + dataAccess, sqs, env, audit, + } = context; + + if (sqs && env?.QUEUE_SPACECAT_TO_MYSTIQUE) { + const errors404 = categorizedResults[404] || []; + + if (errors404.length > 0) { + const messageBaseUrl = site.getBaseURL?.() || ''; + const consolidated404 = consolidateErrorsByUrl(errors404); + const sorted404 = sortErrorsByTrafficVolume(consolidated404).slice(0, 50); + + let alternativeUrls = await getTopAgenticUrlsFromAthena(site, context); + + if (!alternativeUrls || alternativeUrls.length === 0) { + log.info('[LLM-ERROR-PAGES] No agentic URLs from Athena, falling back to Ahrefs'); + const { SiteTopPage } = dataAccess; + const topPages = await SiteTopPage.allBySiteIdAndSourceAndGeo(site.getId(), 'ahrefs', 'global'); + alternativeUrls = topPages.map((page) => page.getUrl()); + } - const message = { - type: 'guidance:llm-error-pages', - siteId: site.getId(), - auditId: audit.getId() || 'llm-error-pages-audit', - deliveryType: site?.getDeliveryType?.() || 'aem_edge', - time: new Date().toISOString(), - data: { - brokenLinks: Array.from(urlToUserAgentsMap.entries()) - .map(([fullUrl, userAgents], index) => ({ - urlFrom: Array.from(userAgents).join(', '), - urlTo: fullUrl, - suggestionId: `llm-404-suggestion-${periodIdentifier}-${index}`, - })) - .filter((link) => link.urlFrom.length > 0), - alternativeUrls, - opportunityId: `llm-404-${periodIdentifier}`, - }, - }; - - await sqs.sendMessage(env.QUEUE_SPACECAT_TO_MYSTIQUE, message); - log.info(`[LLM-ERROR-PAGES] Sent ${urlToUserAgentsMap.size} consolidated 404 URLs to Mystique for AI processing`); + const urlToUserAgentsMap = new Map(); + sorted404.forEach((errorPage) => { + const path = toPathOnly(errorPage.url, messageBaseUrl); + const fullUrl = messageBaseUrl ? new URL(path, messageBaseUrl).toString() : path; + if (!urlToUserAgentsMap.has(fullUrl)) { + urlToUserAgentsMap.set(fullUrl, new Set()); + } + urlToUserAgentsMap.get(fullUrl).add(errorPage.userAgent); + }); + + const mystiqueMessage = { + type: 'guidance:llm-error-pages', + siteId: site.getId(), + auditId: audit?.getId() || 'llm-error-pages-audit', + deliveryType: site?.getDeliveryType?.() || 'aem_edge', + time: new Date().toISOString(), + data: { + brokenLinks: Array.from(urlToUserAgentsMap.entries()) + .map(([fullUrl, userAgents], index) => ({ + urlFrom: Array.from(userAgents).join(', '), + urlTo: fullUrl, + suggestionId: `llm-404-suggestion-${periodIdentifier}-${index}`, + })) + .filter((link) => link.urlFrom.length > 0), + alternativeUrls, + opportunityId: `llm-404-${periodIdentifier}`, + }, + }; + + await sqs.sendMessage(env.QUEUE_SPACECAT_TO_MYSTIQUE, mystiqueMessage); + log.info( + '[LLM-ERROR-PAGES] Sent %d consolidated 404 URLs to Mystique for AI processing', + urlToUserAgentsMap.size, + ); + } else { + log.warn('[LLM-ERROR-PAGES] No 404 errors found, skipping Mystique message'); + } } else { - log.warn('[LLM-ERROR-PAGES] No 404 errors found, skipping Mystique message'); + log.warn('[LLM-ERROR-PAGES] SQS or Mystique queue not configured, skipping message'); } - } else { - log.warn('[LLM-ERROR-PAGES] SQS or Mystique queue not configured, skipping message'); - } - return { - type: 'audit-result', - siteId: site.getId(), - auditResult: { + auditResults.push({ success: true, timestamp: new Date().toISOString(), periodIdentifier, @@ -306,7 +316,13 @@ export async function runAuditAndSendToMystique(context) { summary: processedResults.summary, errorPages: processedResults.errorPages, categorizedResults, - }, + }); + } + + return { + type: 'audit-result', + siteId: site.getId(), + auditResult: auditResults, fullAuditRef: url, }; } catch (error) { @@ -315,22 +331,41 @@ export async function runAuditAndSendToMystique(context) { return { type: 'audit-result', siteId: site.getId(), - auditResult: { + auditResult: [{ success: false, timestamp: new Date().toISOString(), error: error.message, database: s3Config?.databaseName, table: s3Config?.tableName, customer: s3Config?.customerName, - }, + }], fullAuditRef: url, }; } } +/* eslint-enable no-await-in-loop */ -export default new AuditBuilder() +const stepAudit = new AuditBuilder() .withUrlResolver(wwwUrlResolver) .addStep('import-top-pages', importTopPagesAndScrape, AUDIT_STEP_DESTINATIONS.IMPORT_WORKER) .addStep('submit-for-scraping', submitForScraping, AUDIT_STEP_DESTINATIONS.SCRAPE_CLIENT) .addStep('run-audit-and-send-to-mystique', runAuditAndSendToMystique) .build(); + +const backfillAudit = new AuditBuilder() + .withUrlResolver(wwwUrlResolver) + .withRunner(async (finalUrl, context, site, auditContext) => { + const enrichedContext = { ...context, site, auditContext }; + return runAuditAndSendToMystique(enrichedContext); + }) + .build(); + +export default { + run(message, context) { + const { auditContext = {} } = message; + if (auditContext.weekOffset !== undefined) { + return backfillAudit.run(message, context); + } + return stepAudit.run(message, context); + }, +}; diff --git a/src/llm-error-pages/utils.js b/src/llm-error-pages/utils.js index a945785f53..3bb7b61c09 100644 --- a/src/llm-error-pages/utils.js +++ b/src/llm-error-pages/utils.js @@ -10,7 +10,7 @@ * governing permissions and limitations under the License. */ -import { getStaticContent } from '@adobe/spacecat-shared-utils'; +import { getStaticContent, isoCalendarWeek } from '@adobe/spacecat-shared-utils'; import { resolveConsolidatedBucketName, extractCustomerDomain } from '../utils/cdn-utils.js'; import { buildUserAgentDisplaySQL, buildAgentTypeClassificationSQL } from '../common/user-agent-classification.js'; import { ELMO_LIVE_HOST } from '../common/constants.js'; @@ -268,14 +268,6 @@ export function formatDateString(date) { return date.toISOString().split('T')[0]; } -function getWeekNumber(date) { - const d = new Date(date); - d.setUTCHours(0, 0, 0, 0); - /* c8 ignore next */ - d.setUTCDate(d.getUTCDate() + 4 - (d.getUTCDay() || 7)); - const yearStart = new Date(Date.UTC(d.getUTCFullYear(), 0, 1)); - return Math.ceil(((d - yearStart) / 86400000 + 1) / 7); -} export function getWeekRange(offsetWeeks = 0, referenceDate = new Date()) { const refDate = new Date(referenceDate); @@ -318,36 +310,38 @@ export function generatePeriodIdentifier(startDate, endDate) { const diffDays = Math.ceil((endDate - startDate) / (24 * 60 * 60 * 1000)); if (diffDays === 7) { - const year = startDate.getUTCFullYear(); - const weekNum = getWeekNumber(startDate); + const { week: weekNum, year } = isoCalendarWeek(startDate); return `w${String(weekNum).padStart(2, '0')}-${year}`; } return `${start}_to_${end}`; } -export function generateReportingPeriods(referenceDate = new Date()) { - const { weekStart, weekEnd } = getWeekRange(-1, referenceDate); - - const weekNumber = getWeekNumber(weekStart); - const year = weekStart.getUTCFullYear(); - - const weeks = [{ - weekNumber, - year, - weekLabel: `Week ${weekNumber}`, - startDate: weekStart, - endDate: weekEnd, - dateRange: { - start: formatDateString(weekStart), - end: formatDateString(weekEnd), - }, - }]; +export function generateReportingPeriods(referenceDate = new Date(), weekOffsets = [-1]) { + const offsets = Array.isArray(weekOffsets) ? weekOffsets : [weekOffsets]; + + const weeks = offsets.map((offset) => { + const { weekStart, weekEnd } = getWeekRange(offset, referenceDate); + const { week: weekNumber, year } = isoCalendarWeek(weekStart); + + return { + weekNumber, + year, + weekLabel: `Week ${weekNumber}`, + startDate: weekStart, + endDate: weekEnd, + dateRange: { + start: formatDateString(weekStart), + end: formatDateString(weekEnd), + }, + periodIdentifier: `w${String(weekNumber).padStart(2, '0')}-${year}`, + }; + }); return { weeks, referenceDate: referenceDate.toISOString(), - columns: [`Week ${weekNumber}`], + columns: weeks.map((w) => w.weekLabel), }; } diff --git a/test/audits/llm-error-pages/handler.test.js b/test/audits/llm-error-pages/handler.test.js index e220ecdef9..b92a11e85a 100644 --- a/test/audits/llm-error-pages/handler.test.js +++ b/test/audits/llm-error-pages/handler.test.js @@ -110,6 +110,7 @@ describe('LLM Error Pages Handler', function () { year: 2025, startDate: new Date('2025-08-18T00:00:00Z'), endDate: new Date('2025-08-24T23:59:59Z'), + periodIdentifier: 'w34-2025', }], }); @@ -348,10 +349,10 @@ describe('LLM Error Pages Handler', function () { expect(result.type).to.equal('audit-result'); expect(result.siteId).to.equal('site-id-123'); - expect(result.auditResult.success).to.be.true; - expect(result.auditResult.periodIdentifier).to.match(/^w\d{2}-\d{4}$/); - expect(result.auditResult.totalErrors).to.equal(3); - expect(result.auditResult.categorizedResults).to.exist; + expect(result.auditResult[0].success).to.be.true; + expect(result.auditResult[0].periodIdentifier).to.match(/^w\d{2}-\d{4}$/); + expect(result.auditResult[0].totalErrors).to.equal(3); + expect(result.auditResult[0].categorizedResults).to.exist; expect(result.fullAuditRef).to.equal('https://example.com'); expect(context.log.info).to.have.been.calledWith('[LLM-ERROR-PAGES] Starting audit for https://example.com'); @@ -367,8 +368,8 @@ describe('LLM Error Pages Handler', function () { const result = await runAuditAndSendToMystique(context); - expect(result.auditResult.success).to.be.false; - expect(result.auditResult.error).to.equal('Database error'); + expect(result.auditResult[0].success).to.be.false; + expect(result.auditResult[0].error).to.equal('Database error'); expect(context.log.error).to.have.been.calledWith( sinon.match(/\[LLM-ERROR-PAGES\] Audit failed: Database error/), sinon.match.instanceOf(Error), @@ -381,7 +382,7 @@ describe('LLM Error Pages Handler', function () { const result = await runAuditAndSendToMystique(context); - expect(result.auditResult.success).to.be.true; + expect(result.auditResult[0].success).to.be.true; expect(context.log.warn).to.have.been.calledWith( '[LLM-ERROR-PAGES] SQS or Mystique queue not configured, skipping message', ); @@ -392,7 +393,7 @@ describe('LLM Error Pages Handler', function () { const result = await runAuditAndSendToMystique(context); - expect(result.auditResult.success).to.be.true; + expect(result.auditResult[0].success).to.be.true; expect(context.log.warn).to.have.been.calledWith( '[LLM-ERROR-PAGES] SQS or Mystique queue not configured, skipping message', ); @@ -407,7 +408,7 @@ describe('LLM Error Pages Handler', function () { const result = await runAuditAndSendToMystique(context); - expect(result.auditResult.success).to.be.true; + expect(result.auditResult[0].success).to.be.true; expect(context.log.warn).to.have.been.calledWith( '[LLM-ERROR-PAGES] No 404 errors found, skipping Mystique message', ); @@ -431,7 +432,7 @@ describe('LLM Error Pages Handler', function () { const result = await runAuditAndSendToMystique(context); - expect(result.auditResult.success).to.be.true; + expect(result.auditResult[0].success).to.be.true; expect(context.sqs.sendMessage).to.have.been.calledOnce; const [, message] = context.sqs.sendMessage.firstCall.args; expect(message.data.brokenLinks.length).to.be.at.most(50); @@ -485,7 +486,7 @@ describe('LLM Error Pages Handler', function () { const result = await runAuditAndSendToMystique(context); - expect(result.auditResult.success).to.be.true; + expect(result.auditResult[0].success).to.be.true; expect(mockSaveExcelReport).to.have.been.calledThrice; }); @@ -566,7 +567,7 @@ describe('LLM Error Pages Handler', function () { const result = await runAuditAndSendToMystique(context); - expect(result.auditResult.success).to.be.true; + expect(result.auditResult[0].success).to.be.true; expect(mockBuildSiteFilters).to.have.been.calledWith([], site); }); @@ -575,7 +576,7 @@ describe('LLM Error Pages Handler', function () { const result = await runAuditAndSendToMystique(context); - expect(result.auditResult.success).to.be.true; + expect(result.auditResult[0].success).to.be.true; }); it('should use fallback delivery type when not available', async () => { @@ -643,7 +644,7 @@ describe('LLM Error Pages Handler', function () { const result = await runAuditAndSendToMystique(context); - expect(result.auditResult.success).to.be.true; + expect(result.auditResult[0].success).to.be.true; expect(context.log.warn).to.have.been.calledWith( '[LLM-ERROR-PAGES] No 404 errors found, skipping Mystique message', ); @@ -867,7 +868,7 @@ describe('LLM Error Pages Handler - Athena/Ahrefs fallback', function () { bucket: 'test', customerName: 'test', databaseName: 'test_db', tableName: 'test_table', getAthenaTempLocation: () => 's3://test/temp/', }), - generateReportingPeriods: sandbox.stub().returns({ weeks: [{ weekNumber: 1, year: 2025, startDate: new Date(), endDate: new Date() }] }), + generateReportingPeriods: sandbox.stub().returns({ weeks: [{ weekNumber: 1, year: 2025, startDate: new Date(), endDate: new Date(), periodIdentifier: 'w01-2025' }] }), processErrorPagesResults: mockProcessResults, buildLlmErrorPagesQuery: sandbox.stub().resolves('SELECT'), getAllLlmProviders: sandbox.stub().returns([]), @@ -907,14 +908,12 @@ describe('LLM Error Pages Handler - Athena/Ahrefs fallback', function () { const result = await handler.runAuditAndSendToMystique(context); - expect(result.auditResult.success).to.be.true; - // Verify Athena URLs were used + expect(result.auditResult[0].success).to.be.true; const sentMessage = context.sqs.sendMessage.firstCall.args[1]; expect(sentMessage.data.alternativeUrls).to.deep.equal([ 'https://example.com/athena-alt1', 'https://example.com/athena-alt2', ]); - // Ahrefs was NOT called expect(context.dataAccess.SiteTopPage.allBySiteIdAndSourceAndGeo).to.not.have.been.called; }); @@ -946,7 +945,7 @@ describe('LLM Error Pages Handler - Athena/Ahrefs fallback', function () { bucket: 'test', customerName: 'test', databaseName: 'test_db', tableName: 'test_table', getAthenaTempLocation: () => 's3://test/temp/', }), - generateReportingPeriods: sandbox.stub().returns({ weeks: [{ weekNumber: 1, year: 2025, startDate: new Date(), endDate: new Date() }] }), + generateReportingPeriods: sandbox.stub().returns({ weeks: [{ weekNumber: 1, year: 2025, startDate: new Date(), endDate: new Date(), periodIdentifier: 'w01-2025' }] }), processErrorPagesResults: mockProcessResults, buildLlmErrorPagesQuery: sandbox.stub().resolves('SELECT'), getAllLlmProviders: sandbox.stub().returns([]), @@ -988,8 +987,7 @@ describe('LLM Error Pages Handler - Athena/Ahrefs fallback', function () { const result = await handler.runAuditAndSendToMystique(context); - expect(result.auditResult.success).to.be.true; - // Verify Ahrefs URLs were used as fallback + expect(result.auditResult[0].success).to.be.true; const sentMessage = context.sqs.sendMessage.firstCall.args[1]; expect(sentMessage.data.alternativeUrls).to.deep.equal(['https://example.com/ahrefs-alt1']); expect(context.log.info).to.have.been.calledWith( @@ -1044,6 +1042,7 @@ describe('LLM Error Pages Handler (isolated)', function () { year: 2025, startDate: new Date('2025-08-18T00:00:00Z'), endDate: new Date('2025-08-24T23:59:59Z'), + periodIdentifier: 'w34-2025', }], }); const mockBuildSiteFilters = sandbox.stub().returns(''); diff --git a/test/audits/llm-error-pages/utils.test.js b/test/audits/llm-error-pages/utils.test.js index f555b2b603..581bd57fd1 100644 --- a/test/audits/llm-error-pages/utils.test.js +++ b/test/audits/llm-error-pages/utils.test.js @@ -856,9 +856,31 @@ describe('LLM Error Pages Utils', () => { expect(result.weeks[0]).to.have.property('startDate'); expect(result.weeks[0]).to.have.property('endDate'); expect(result.weeks[0]).to.have.property('dateRange'); + expect(result.weeks[0]).to.have.property('periodIdentifier'); + expect(result.weeks[0].periodIdentifier).to.match(/^w\d{2}-\d{4}$/); expect(result.referenceDate).to.equal(referenceDate.toISOString()); expect(result.columns).to.be.an('array'); }); + + it('should generate multiple weeks with weekOffsets array', () => { + const referenceDate = new Date('2024-01-15'); + const result = generateReportingPeriods(referenceDate, [-2, -1, 0]); + + expect(result.weeks).to.have.length(3); + result.weeks.forEach((week) => { + expect(week).to.have.property('periodIdentifier'); + expect(week.periodIdentifier).to.match(/^w\d{2}-\d{4}$/); + }); + expect(result.columns).to.have.length(3); + }); + + it('should accept a single numeric weekOffset', () => { + const referenceDate = new Date('2024-01-15'); + const result = generateReportingPeriods(referenceDate, -2); + + expect(result.weeks).to.have.length(1); + expect(result.weeks[0]).to.have.property('periodIdentifier'); + }); }); // ============================================================================ From 5ed5060652132f59b67b72cb5f1534a398b4e11a Mon Sep 17 00:00:00 2001 From: Yaashi Madan Date: Tue, 10 Mar 2026 17:10:31 +0530 Subject: [PATCH 2/5] fix: lint issues --- src/llm-error-pages/utils.js | 1 - 1 file changed, 1 deletion(-) diff --git a/src/llm-error-pages/utils.js b/src/llm-error-pages/utils.js index 3bb7b61c09..acba8d0a51 100644 --- a/src/llm-error-pages/utils.js +++ b/src/llm-error-pages/utils.js @@ -268,7 +268,6 @@ export function formatDateString(date) { return date.toISOString().split('T')[0]; } - export function getWeekRange(offsetWeeks = 0, referenceDate = new Date()) { const refDate = new Date(referenceDate); const isSunday = refDate.getUTCDay() === TIME_CONSTANTS.ISO_SUNDAY; From 821fcccbe993e35cb604cba2b54fbfa003be51e8 Mon Sep 17 00:00:00 2001 From: Yaashi Madan Date: Tue, 10 Mar 2026 17:19:50 +0530 Subject: [PATCH 3/5] fix: added tests --- .../llm-error-pages/guidance-handler.test.js | 2 +- test/audits/llm-error-pages/handler.test.js | 129 ++++++++++++++++++ 2 files changed, 130 insertions(+), 1 deletion(-) diff --git a/test/audits/llm-error-pages/guidance-handler.test.js b/test/audits/llm-error-pages/guidance-handler.test.js index 0731a77050..d1add8b85e 100644 --- a/test/audits/llm-error-pages/guidance-handler.test.js +++ b/test/audits/llm-error-pages/guidance-handler.test.js @@ -66,6 +66,7 @@ describe('LLM Error Pages – guidance-handler (Excel upsert)', () => { urlTo: 'https://example.com/products/item', suggestedUrls: ['/products'], aiRationale: 'Closest match', + suggestionId: 'llm-404-suggestion-w34-2025-0', }], }, }; @@ -76,7 +77,6 @@ describe('LLM Error Pages – guidance-handler (Excel upsert)', () => { getId: () => 'test-site-id', getBaseURL: () => 'https://example.com', getConfig: () => ({ - // keep legacy for code paths that still read it getCdnLogsConfig: () => null, getLlmoDataFolder: () => 'test-customer', getLlmoCdnBucketConfig: () => ({ bucketName: 'test-bucket' }), diff --git a/test/audits/llm-error-pages/handler.test.js b/test/audits/llm-error-pages/handler.test.js index b92a11e85a..8270176f4d 100644 --- a/test/audits/llm-error-pages/handler.test.js +++ b/test/audits/llm-error-pages/handler.test.js @@ -363,6 +363,23 @@ describe('LLM Error Pages Handler', function () { ); }); + it('should produce two weeks when run on a Monday without weekOffset', async () => { + const monday = new Date('2025-08-18T12:00:00Z'); // Monday + const clock = sinon.useFakeTimers(monday.getTime()); + try { + mockGenerateReportingPeriods.returns({ + weeks: [ + { weekNumber: 33, year: 2025, startDate: new Date('2025-08-11'), endDate: new Date('2025-08-17'), periodIdentifier: 'w33-2025' }, + { weekNumber: 34, year: 2025, startDate: new Date('2025-08-18'), endDate: new Date('2025-08-24'), periodIdentifier: 'w34-2025' }, + ], + }); + const result = await runAuditAndSendToMystique(context); + expect(result.auditResult).to.have.length(2); + } finally { + clock.restore(); + } + }); + it('should handle audit failure gracefully', async () => { mockAthenaClient.query.rejects(new Error('Database error')); @@ -1121,3 +1138,115 @@ describe('LLM Error Pages Handler (isolated)', function () { sinon.assert.match(dataRows[1][2], 0); }); }); + +describe('LLM Error Pages Handler – default export routing', function () { + this.timeout(10000); + + it('routes to backfillAudit when weekOffset is present', async () => { + const sandbox = sinon.createSandbox(); + const mockBackfillRun = sandbox.stub().resolves({ status: 200 }); + const mockStepRun = sandbox.stub().resolves({ status: 200 }); + + const handler = await esmock('../../../src/llm-error-pages/handler.js', { + '../../../src/common/audit-builder.js': { + AuditBuilder: class AuditBuilder { + withUrlResolver() { return this; } + addStep() { return this; } + withRunner() { this._runner = true; return this; } + build() { return { run: this._runner ? mockBackfillRun : mockStepRun }; } + }, + }, + }); + + await handler.default.run({ auditContext: { weekOffset: -1 } }, {}); + expect(mockBackfillRun).to.have.been.calledOnce; + expect(mockStepRun).not.to.have.been.called; + + sandbox.restore(); + }); + + it('routes to stepAudit when weekOffset is absent', async () => { + const sandbox = sinon.createSandbox(); + const mockBackfillRun = sandbox.stub().resolves({ status: 200 }); + const mockStepRun = sandbox.stub().resolves({ status: 200 }); + + const handler = await esmock('../../../src/llm-error-pages/handler.js', { + '../../../src/common/audit-builder.js': { + AuditBuilder: class AuditBuilder { + withUrlResolver() { return this; } + addStep() { return this; } + withRunner() { this._runner = true; return this; } + build() { return { run: this._runner ? mockBackfillRun : mockStepRun }; } + }, + }, + }); + + await handler.default.run({}, {}); + expect(mockStepRun).to.have.been.calledOnce; + expect(mockBackfillRun).not.to.have.been.called; + + sandbox.restore(); + }); + + it('backfillAudit runner calls runAuditAndSendToMystique with enriched context', async () => { + const sandbox = sinon.createSandbox(); + let capturedRunner; + + const handler = await esmock('../../../src/llm-error-pages/handler.js', { + '../../../src/common/audit-builder.js': { + AuditBuilder: class AuditBuilder { + withUrlResolver() { return this; } + addStep() { return this; } + withRunner(runner) { capturedRunner = runner; return this; } + build() { return { run: sandbox.stub() }; } + }, + }, + '@adobe/spacecat-shared-athena-client': { + AWSAthenaClient: { fromContext: sandbox.stub().returns({ query: sandbox.stub().resolves([]) }) }, + }, + '../../../src/llm-error-pages/utils.js': { + getS3Config: sandbox.stub().resolves({ + databaseName: 'db', tableName: 'tbl', customerName: 'test', + getAthenaTempLocation: () => 's3://tmp/', + }), + generateReportingPeriods: sandbox.stub().returns({ + weeks: [{ startDate: new Date(), endDate: new Date(), periodIdentifier: 'w01-2025' }], + }), + processErrorPagesResults: sandbox.stub().returns({ totalErrors: 0, errorPages: [], summary: { uniqueUrls: 0 } }), + buildLlmErrorPagesQuery: sandbox.stub().resolves('SELECT'), + getAllLlmProviders: sandbox.stub().returns([]), + categorizeErrorsByStatusCode: sandbox.stub().returns({}), + consolidateErrorsByUrl: (e) => e, + sortErrorsByTrafficVolume: (e) => e, + toPathOnly: (u) => u, + SPREADSHEET_COLUMNS: [], + }, + '../../../src/utils/report-uploader.js': { + createLLMOSharepointClient: sandbox.stub().resolves({}), + saveExcelReport: sandbox.stub().resolves(), + }, + '../../../src/utils/cdn-utils.js': { + buildSiteFilters: sandbox.stub().returns(''), + }, + exceljs: { + default: { Workbook: function Workbook() { return { addWorksheet: () => ({ addRow: sandbox.stub() }) }; } }, + }, + }); + + expect(capturedRunner).to.be.a('function'); + + const mockSite = { + getBaseURL: () => 'https://example.com', + getId: () => 'site-1', + getConfig: () => ({ getLlmoCdnlogsFilter: () => [], getLlmoDataFolder: () => 'test' }), + }; + const mockContext = { + log: { info: sandbox.stub(), warn: sandbox.stub(), error: sandbox.stub() }, + }; + + const result = await capturedRunner('https://example.com', mockContext, mockSite, { weekOffset: -1 }); + expect(result).to.have.property('auditResult'); + + sandbox.restore(); + }); +}); From 7c3e44bc4b631b2635ebbca0c2616dc78e38a186 Mon Sep 17 00:00:00 2001 From: Yaashi Madan Date: Tue, 10 Mar 2026 17:38:11 +0530 Subject: [PATCH 4/5] updated tests --- src/llm-error-pages/handler.js | 39 +++++++++++++--------------------- 1 file changed, 15 insertions(+), 24 deletions(-) diff --git a/src/llm-error-pages/handler.js b/src/llm-error-pages/handler.js index 36d601dfb0..d053f78d8b 100644 --- a/src/llm-error-pages/handler.js +++ b/src/llm-error-pages/handler.js @@ -130,7 +130,7 @@ export async function submitForScraping(context) { */ /* eslint-disable no-await-in-loop */ export async function runAuditAndSendToMystique(context) { - const { log, site, auditContext = {} } = context; + const { log, site } = context; const s3Config = await getS3Config(site, context); const url = site.getBaseURL(); @@ -141,8 +141,8 @@ export async function runAuditAndSendToMystique(context) { const isMonday = new Date().getUTCDay() === 1; let weekOffsets; - if (auditContext.weekOffset !== undefined) { - weekOffsets = [auditContext.weekOffset]; + if (context.auditContext?.weekOffset !== undefined) { + weekOffsets = [context.auditContext.weekOffset]; } else if (isMonday) { weekOffsets = [-1, 0]; } else { @@ -152,13 +152,19 @@ export async function runAuditAndSendToMystique(context) { const { weeks } = generateReportingPeriods(new Date(), weekOffsets); const auditResults = []; + const filters = site.getConfig()?.getLlmoCdnlogsFilter?.() || []; + const siteFilters = buildSiteFilters(filters, site); + const sharepointClient = await createLLMOSharepointClient(context); + const llmoFolder = site.getConfig()?.getLlmoDataFolder?.() || s3Config.customerName; + const outputLocation = `${llmoFolder}/agentic-traffic`; + const { + dataAccess, sqs, env, audit, + } = context; + for (const week of weeks) { const { startDate, endDate, periodIdentifier } = week; log.info(`[LLM-ERROR-PAGES] Running weekly audit for ${periodIdentifier}`); - const filters = site.getConfig()?.getLlmoCdnlogsFilter?.() || []; - const siteFilters = buildSiteFilters(filters, site); - const query = await buildLlmErrorPagesQuery({ databaseName: s3Config.databaseName, tableName: s3Config.tableName, @@ -180,10 +186,6 @@ export async function runAuditAndSendToMystique(context) { const processedResults = processErrorPagesResults(results); const categorizedResults = categorizeErrorsByStatusCode(processedResults.errorPages); - const sharepointClient = await createLLMOSharepointClient(context); - const llmoFolder = site.getConfig()?.getLlmoDataFolder?.() || s3Config.customerName; - const outputLocation = `${llmoFolder}/agentic-traffic`; - const buildFilename = (code) => `agentictraffic-errors-${code}-${periodIdentifier}.xlsx`; const writeCategoryExcel = async (code, errors) => { @@ -233,15 +235,7 @@ export async function runAuditAndSendToMystique(context) { writeCategoryExcel('5xx', categorizedResults['5xx']), ]); - log.info( - '[LLM-ERROR-PAGES] Found %d total errors across %d unique URLs', - processedResults.totalErrors, - processedResults.summary.uniqueUrls, - ); - - const { - dataAccess, sqs, env, audit, - } = context; + log.info(`[LLM-ERROR-PAGES] Found ${processedResults.totalErrors} total errors across ${processedResults.summary.uniqueUrls} unique URLs`); if (sqs && env?.QUEUE_SPACECAT_TO_MYSTIQUE) { const errors404 = categorizedResults[404] || []; @@ -290,10 +284,7 @@ export async function runAuditAndSendToMystique(context) { }; await sqs.sendMessage(env.QUEUE_SPACECAT_TO_MYSTIQUE, mystiqueMessage); - log.info( - '[LLM-ERROR-PAGES] Sent %d consolidated 404 URLs to Mystique for AI processing', - urlToUserAgentsMap.size, - ); + log.info(`[LLM-ERROR-PAGES] Sent ${urlToUserAgentsMap.size} consolidated 404 URLs to Mystique for AI processing`); } else { log.warn('[LLM-ERROR-PAGES] No 404 errors found, skipping Mystique message'); } @@ -354,7 +345,7 @@ const stepAudit = new AuditBuilder() const backfillAudit = new AuditBuilder() .withUrlResolver(wwwUrlResolver) - .withRunner(async (finalUrl, context, site, auditContext) => { + .withRunner(async (_finalUrl, context, site, auditContext) => { const enrichedContext = { ...context, site, auditContext }; return runAuditAndSendToMystique(enrichedContext); }) From cc4d06f5b65ddda147b7455631255362ff1bb2ba Mon Sep 17 00:00:00 2001 From: Yaashi Madan Date: Wed, 11 Mar 2026 12:09:01 +0530 Subject: [PATCH 5/5] fix: added tests to resolve comments --- .../llm-error-pages/guidance-handler.test.js | 52 +++++++++++++++++++ test/audits/llm-error-pages/handler.test.js | 1 + 2 files changed, 53 insertions(+) diff --git a/test/audits/llm-error-pages/guidance-handler.test.js b/test/audits/llm-error-pages/guidance-handler.test.js index d1add8b85e..0ad47b0b3d 100644 --- a/test/audits/llm-error-pages/guidance-handler.test.js +++ b/test/audits/llm-error-pages/guidance-handler.test.js @@ -275,6 +275,58 @@ describe('LLM Error Pages – guidance-handler (Excel upsert)', () => { expect(resp.status).to.equal(200); }); + it('falls back to generateReportingPeriods when brokenLinks have no suggestionId', async () => { + const existingWorkbook = new ExcelJS.Workbook(); + const sheet = existingWorkbook.addWorksheet('data'); + sheet.addRow(['Agent Type', 'User Agent', 'Number of Hits', 'Avg TTFB (ms)', 'Country Code', 'URL', 'Product', 'Category', 'Suggested URLs', 'AI Rationale', 'Confidence score']); + sheet.addRow(['Chatbots', 'ChatGPT', 150, 245.5, 'US', '/products/item', 'Adobe Creative', 'Product Page', '', '', '']); + const existingBuffer = await existingWorkbook.xlsx.writeBuffer(); + readFromSharePointStub.resolves(existingBuffer); + + const message = { + auditId: 'audit-123', + siteId: 'site-1', + data: { + brokenLinks: [{ + urlFrom: 'ChatGPT', + urlTo: 'https://example.com/products/item', + suggestedUrls: ['/products'], + aiRationale: 'Closest match', + }], + }, + }; + + const dataAccess = { + Site: { + findById: sandbox.stub().resolves({ + getId: () => 'test-site-id', + getBaseURL: () => 'https://example.com', + getConfig: () => ({ + getCdnLogsConfig: () => null, + getLlmoDataFolder: () => 'test-customer', + getLlmoCdnBucketConfig: () => ({ bucketName: 'test-bucket' }), + }), + }), + }, + Audit: { + findById: sandbox.stub().resolves({ getId: () => 'audit-123' }), + }, + }; + + const context = { + log: { info: sandbox.stub(), error: sandbox.stub(), debug: sandbox.stub(), warn: sandbox.stub() }, + dataAccess, + s3Client: { send: sandbox.stub().resolves() }, + env: { AWS_ENV: 'test', AWS_REGION: 'us-east-1' }, + }; + + const resp = await guidanceHandler.default(message, context); + expect(resp.status).to.equal(200); + + const filenameArg = readFromSharePointStub.firstCall.args[0]; + expect(filenameArg).to.match(/^agentictraffic-errors-404-w\d{2}-\d{4}\.xlsx$/); + }); + it('handles brokenLinks with actual URL matching and updates', async () => { // Create existing Excel file with matching data const existingWorkbook = new ExcelJS.Workbook(); diff --git a/test/audits/llm-error-pages/handler.test.js b/test/audits/llm-error-pages/handler.test.js index 8270176f4d..5dd0c4201a 100644 --- a/test/audits/llm-error-pages/handler.test.js +++ b/test/audits/llm-error-pages/handler.test.js @@ -375,6 +375,7 @@ describe('LLM Error Pages Handler', function () { }); const result = await runAuditAndSendToMystique(context); expect(result.auditResult).to.have.length(2); + expect(mockGenerateReportingPeriods).to.have.been.calledWith(sinon.match.date, [-1, 0]); } finally { clock.restore(); }