diff --git a/src/__tests__/__snapshots__/index.test.ts.snap b/src/__tests__/__snapshots__/index.test.ts.snap index ad722635..d34aa281 100644 --- a/src/__tests__/__snapshots__/index.test.ts.snap +++ b/src/__tests__/__snapshots__/index.test.ts.snap @@ -70,6 +70,7 @@ exports[`test existence of exported functions 1`] = ` "xSum", "xUniqueSorted", "xVariance", + "xWhitakerSmoother", "xyAlign", "xyCheck", "xyCovariance", @@ -140,9 +141,11 @@ exports[`test existence of exported functions 1`] = ` "matrixBoxPlot", "matrixCenterZMean", "matrixCheck", + "matrixCholeskySolver", "matrixClone", "matrixColumnsCorrelation", "matrixCreateEmpty", + "matrixCuthillMckee", "matrixHistogram", "matrixMaxAbsoluteZ", "matrixMedian", @@ -167,5 +170,6 @@ exports[`test existence of exported functions 1`] = ` "nextPowerOfTwo", "recursiveResolve", "stringify", + "calculateAdaptiveWeights", ] `; diff --git a/src/matrix/__tests__/matrixCholeskySolver.test.ts b/src/matrix/__tests__/matrixCholeskySolver.test.ts new file mode 100644 index 00000000..dccd9b6d --- /dev/null +++ b/src/matrix/__tests__/matrixCholeskySolver.test.ts @@ -0,0 +1,51 @@ +import type { NumberArray } from 'cheminfo-types'; +import { expect, test } from 'vitest'; + +import { addWeights } from '../../utils/addWeights'; +import { createSystemMatrix } from '../../utils/createSystemMatrix'; +import { xSequentialFillFromTo } from '../../x'; +import { matrixCholeskySolver } from '../matrixCholeskySolver'; +import { matrixCuthillMckee } from '../matrixCuthillMckee'; + +type Func = (b: NumberArray) => NumberArray; +test('solve a least square system', () => { + const x = xSequentialFillFromTo({ from: 0, to: Math.PI, size: 101 }); + const noise = x.map(() => Math.random() * 0.1 - 0.05); + const y = x.map((value, index) => Math.cos(value) + noise[index]); + const weights = new Float64Array(y.length).fill(1); + y[50] = 0.9; // add outliers + + const lambda = 20; + const dimension = x.length; + const upperTriangularNonZeros = createSystemMatrix(dimension, lambda); + + const weighted = addWeights(upperTriangularNonZeros, y, weights); + + const cho = matrixCholeskySolver(weighted.leftHandSide, dimension) as Func; + expect(cho).not.toBeNull(); + const smoothed = cho(weighted.rightHandSide); + expect(smoothed[50]).toBeLessThan(0.2); + expect(smoothed[50]).toBeGreaterThan(-0.2); + + //ignore the outlier, it implicates the smooth should pass closer to zero. + weights[50] = 0; + const weighted2 = addWeights(upperTriangularNonZeros, y, weights); + const cho2 = matrixCholeskySolver(weighted.leftHandSide, dimension) as Func; + expect(cho2).not.toBeNull(); + const smoothed2 = cho2(weighted2.rightHandSide); + expect(smoothed2[50]).toBeLessThan(smoothed[50]); + + const permutationEncodedArray = matrixCuthillMckee( + upperTriangularNonZeros, + dimension, + ); + const cho3 = matrixCholeskySolver( + weighted2.leftHandSide, + dimension, + permutationEncodedArray, + ) as Func; + + expect(cho3).not.toBeNull(); + const smoothed3 = cho2(weighted2.rightHandSide); + expect(smoothed3[50]).toEqual(smoothed3[50]); +}); diff --git a/src/matrix/__tests__/matrixCuthillMckee.test.ts b/src/matrix/__tests__/matrixCuthillMckee.test.ts new file mode 100644 index 00000000..1d806421 --- /dev/null +++ b/src/matrix/__tests__/matrixCuthillMckee.test.ts @@ -0,0 +1,67 @@ +import type { NumberArray } from 'cheminfo-types'; +import { expect, test } from 'vitest'; + +import { matrixCuthillMckee } from '../matrixCuthillMckee'; + +test('permutation to reduce bandwidth', () => { + const dimension = 10; + const matrix = [ + [2, 2, 1.5], + [1, 1, 1], + [1, 4, 0.02], + [5, 5, 1.2], + [7, 7, 1.6], + [4, 4, 2.6], + [3, 3, 1.1], + [4, 7, 0.09], + [4, 6, 0.16], + [0, 0, 1.7], + [4, 8, 0.52], + [0, 8, 0.13], + [6, 6, 1.3], + [7, 8, 0.11], + [4, 9, 0.53], + [8, 8, 1.4], + [9, 9, 3.1], + [1, 9, 0.01], + [6, 9, 0.56], + ]; + + const permutationEncodedArray = matrixCuthillMckee(matrix, dimension); + + const reducedBandwidthMatrix = permut( + matrix, + permutationEncodedArray, + dimension, + ); + expect(bandwidth(reducedBandwidthMatrix)).toBeLessThan(bandwidth(matrix)); +}); + +function permut( + nonZerosArray: NumberArray[], + permutationEncoded: NumberArray, + dimension: number, +) { + const pinv = new Array(dimension); + for (let k = 0; k < dimension; k++) { + pinv[permutationEncoded[k]] = k; + } + const mt: NumberArray[] = new Array(nonZerosArray.length); + for (let a = 0; a < nonZerosArray.length; ++a) { + const [r, c, value] = nonZerosArray[a]; + const [ar, ac] = [pinv[r], pinv[c]]; + mt[a] = ac < ar ? [ac, ar, value] : [ar, ac, value]; + } + return mt; +} + +function bandwidth(matrix: NumberArray[]) { + let bandwidth = 0; + for (const [i, j] of matrix) { + const distance = Math.abs(i - j); + if (distance > bandwidth) { + bandwidth = distance; + } + } + return bandwidth; +} diff --git a/src/matrix/index.ts b/src/matrix/index.ts index 91f5f306..d5410359 100644 --- a/src/matrix/index.ts +++ b/src/matrix/index.ts @@ -4,9 +4,11 @@ export * from './matrixAutoCorrelation'; export * from './matrixBoxPlot'; export * from './matrixCenterZMean'; export * from './matrixCheck'; +export * from './matrixCholeskySolver'; export * from './matrixClone'; export * from './matrixColumnsCorrelation'; export * from './matrixCreateEmpty'; +export * from './matrixCuthillMckee'; export * from './matrixHistogram'; export * from './matrixMaxAbsoluteZ'; export * from './matrixMedian'; diff --git a/src/matrix/matrixCholeskySolver.ts b/src/matrix/matrixCholeskySolver.ts new file mode 100644 index 00000000..4863a7ba --- /dev/null +++ b/src/matrix/matrixCholeskySolver.ts @@ -0,0 +1,302 @@ +/* +The MIT License (MIT) + +Copyright (c) 2013 Eric Arnebäck + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. +*/ + +import type { NumberArray } from 'cheminfo-types'; + +/** + * Solves a system of linear equations using the Cholesky decomposition method. + * It is a direct conversion to TS from {@link https://github.com/scijs/cholesky-solve} + * @param nonZerosArray - The matrix in triplet form (array of arrays), where each sub-array contains three elements: row index, column index, and value. + * @param dimension - The order of the matrix (number of rows/columns). + * @param permutationEncoded - Optional permutation array. If provided, it will be used to permute the matrix. + * @returns A function that takes a right-hand side vector `b` and returns the solution vector `x`, or `null` if the decomposition fails. + */ +export function matrixCholeskySolver( + nonZerosArray: NumberArray[], + dimension: number, + permutationEncoded?: NumberArray, +): ((b: NumberArray) => NumberArray) | null { + if (permutationEncoded) { + const pinv = new Array(dimension); + for (let k = 0; k < dimension; k++) { + pinv[permutationEncoded[k]] = k; + } + const mt: NumberArray[] = new Array(nonZerosArray.length); + for (let a = 0; a < nonZerosArray.length; ++a) { + const [r, c, value] = nonZerosArray[a]; + const [ar, ac] = [pinv[r], pinv[c]]; + mt[a] = ac < ar ? [ac, ar, value] : [ar, ac, value]; + } + nonZerosArray = mt; + } else { + permutationEncoded = []; + for (let i = 0; i < dimension; ++i) { + permutationEncoded[i] = i; + } + } + + const ap: NumberArray = new Array(dimension + 1); + const ai = new Array(nonZerosArray.length); + const ax = new Array(nonZerosArray.length); + + const lnz = []; + for (let i = 0; i < dimension; ++i) { + lnz[i] = 0; + } + for (const a of nonZerosArray) { + lnz[a[1]]++; + } + + ap[0] = 0; + for (let i = 0; i < dimension; ++i) { + ap[i + 1] = ap[i] + lnz[i]; + } + + const colOffset = []; + for (let a = 0; a < dimension; ++a) { + colOffset[a] = 0; + } + + for (const e of nonZerosArray) { + const col = e[1]; + const adr = ap[col] + colOffset[col]; + ai[adr] = e[0]; + ax[adr] = e[2]; + colOffset[col]++; + } + + const d = new Array(dimension); + const y = new Array(dimension); + const lp = new Array(dimension + 1); + const parent = new Array(dimension); + const lnzArray = new Array(dimension); + const flag = new Array(dimension); + const pattern = new Array(dimension); + const bp1 = new Array(dimension); + const x = new Array(dimension); + + ldlSymbolic(dimension, ap, ai, lp, parent, lnzArray, flag); + + const lx = new Array(lp[dimension]); + const li = new Array(lp[dimension]); + + const result = ldlNumeric( + dimension, + ap, + ai, + ax, + lp, + parent, + lnzArray, + li, + lx, + d, + y, + pattern, + flag, + ); + + if (result === dimension) { + return (b: NumberArray) => { + ldlPerm(dimension, bp1, b, permutationEncoded); + ldlLsolve(dimension, bp1, lp, li, lx); + ldlDsolve(dimension, bp1, d); + ldlLTsolve(dimension, bp1, lp, li, lx); + ldlPermt(dimension, x, bp1, permutationEncoded); + return x; + }; + } else { + return null; + } +} + +function ldlSymbolic( + dimension: number, + ap: NumberArray, + ai: NumberArray, + lp: NumberArray, + parent: NumberArray, + lnz: NumberArray, + flag: NumberArray, +): void { + for (let k = 0; k < dimension; k++) { + parent[k] = -1; + flag[k] = k; + lnz[k] = 0; + const kk = k; + const p2 = ap[kk + 1]; + for ( + let permutationEncoded = ap[kk]; + permutationEncoded < p2; + permutationEncoded++ + ) { + let i = ai[permutationEncoded]; + if (i < k) { + for (; flag[i] !== k; i = parent[i]) { + if (parent[i] === -1) parent[i] = k; + lnz[i]++; + flag[i] = k; + } + } + } + } + lp[0] = 0; + for (let k = 0; k < dimension; k++) { + lp[k + 1] = lp[k] + lnz[k]; + } +} + +function ldlNumeric( + dimension: number, + ap: NumberArray, + ai: NumberArray, + ax: NumberArray, + lp: NumberArray, + parent: NumberArray, + lnz: NumberArray, + li: NumberArray, + lx: NumberArray, + d: NumberArray, + y: NumberArray, + pattern: NumberArray, + flag: NumberArray, +): number { + let yi, lKi; + let i, k, permutationEncoded, kk, p2, len, top; + for (k = 0; k < dimension; k++) { + y[k] = 0; + top = dimension; + flag[k] = k; + lnz[k] = 0; + kk = k; + p2 = ap[kk + 1]; + for ( + permutationEncoded = ap[kk]; + permutationEncoded < p2; + permutationEncoded++ + ) { + i = ai[permutationEncoded]; + if (i <= k) { + y[i] += ax[permutationEncoded]; + for (len = 0; flag[i] !== k; i = parent[i]) { + pattern[len++] = i; + flag[i] = k; + } + while (len > 0) pattern[--top] = pattern[--len]; + } + } + d[k] = y[k]; + y[k] = 0; + for (; top < dimension; top++) { + i = pattern[top]; + yi = y[i]; + y[i] = 0; + p2 = lp[i] + lnz[i]; + for ( + permutationEncoded = lp[i]; + permutationEncoded < p2; + permutationEncoded++ + ) { + y[li[permutationEncoded]] -= lx[permutationEncoded] * yi; + } + lKi = yi / d[i]; + d[k] -= lKi * yi; + li[permutationEncoded] = k; + lx[permutationEncoded] = lKi; + lnz[i]++; + } + if (d[k] === 0) return k; + } + return dimension; +} + +function ldlLsolve( + dimension: number, + x: NumberArray, + lp: NumberArray, + li: NumberArray, + lx: NumberArray, +): void { + let j, permutationEncoded, p2; + for (j = 0; j < dimension; j++) { + p2 = lp[j + 1]; + for ( + permutationEncoded = lp[j]; + permutationEncoded < p2; + permutationEncoded++ + ) { + x[li[permutationEncoded]] -= lx[permutationEncoded] * x[j]; + } + } +} + +function ldlDsolve(dimension: number, x: NumberArray, d: NumberArray): void { + for (let j = 0; j < dimension; j++) { + x[j] /= d[j]; + } +} + +function ldlLTsolve( + dimension: number, + x: NumberArray, + lp: NumberArray, + li: NumberArray, + lx: NumberArray, +): void { + let j, permutationEncoded, p2; + for (j = dimension - 1; j >= 0; j--) { + p2 = lp[j + 1]; + for ( + permutationEncoded = lp[j]; + permutationEncoded < p2; + permutationEncoded++ + ) { + x[j] -= lx[permutationEncoded] * x[li[permutationEncoded]]; + } + } +} + +function ldlPerm( + dimension: number, + x: NumberArray, + b: NumberArray, + permutationEncoded: NumberArray, +): void { + let j; + for (j = 0; j < dimension; j++) { + x[j] = b[permutationEncoded[j]]; + } +} + +function ldlPermt( + dimension: number, + x: NumberArray, + b: NumberArray, + permutationEncoded: NumberArray, +): void { + let j; + for (j = 0; j < dimension; j++) { + x[permutationEncoded[j]] = b[j]; + } +} diff --git a/src/matrix/matrixCuthillMckee.ts b/src/matrix/matrixCuthillMckee.ts new file mode 100644 index 00000000..aa2c57ca --- /dev/null +++ b/src/matrix/matrixCuthillMckee.ts @@ -0,0 +1,76 @@ +/** + * The MIT License (MIT) + * + * Copyright (c) 2013 Mikola Lysenko + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + */ + +/** + * The reverse Cuthill-Mckee method is a fast and effective preconditioner for reducing the bandwidth of sparse linear systems. + * When solving a positive semidefinite linear system using Cholesky factorization, it greatly reduces fill-in. + * It is a direct conversion to TS from {@link github.com/mikolalysenko/cuthill-mckee} + * @param list - lower triangular non zeros from a symmetric sparse matrix. + * @param dimension - matrix dimension + * @returns A Float64Array where the value at each index represents the new position of the node + * in the bandwidth-reduced ordering. + */ +export function matrixCuthillMckee( + list: number[][], + dimension: number, +): Float64Array { + const adj: number[][] = new Array(dimension); + const visited: boolean[] = new Array(dimension).fill(false); + for (let i = 0; i < dimension; ++i) { + adj[i] = []; + } + + for (const l of list) { + adj[l[0]].push(l[1]); + } + + const toVisit = new Float64Array(dimension); + let eol = 0; + let ptr = 0; + for (let i = 0; i < dimension; ++i) { + if (visited[i]) { + continue; + } + toVisit[eol++] = i; + visited[i] = true; + while (ptr < eol) { + const v = toVisit[ptr++]; + const nbhd = Float64Array.from(adj[v]).sort(); + for (const u of nbhd) { + if (visited[u]) { + continue; + } + visited[u] = true; + toVisit[eol++] = u; + } + } + } + + const result = new Float64Array(dimension); + for (let i = 0; i < dimension; ++i) { + result[toVisit[i]] = i; + } + + return result; +} diff --git a/src/utils/addWeights.ts b/src/utils/addWeights.ts new file mode 100644 index 00000000..6edec4bf --- /dev/null +++ b/src/utils/addWeights.ts @@ -0,0 +1,39 @@ +import type { NumberArray } from 'cheminfo-types'; + +/** + * add the provided weights to a particular given system matrix (lD'D) in the triplet form and y data. This function is not general + * it assumes that diagonal coefficients are in the even indexes, it is the case of the matrix generated by createSystemMatrix function. + * It simulates the matrix operation W + lD'D and Wy. + * @param leftHandSide - The original system matrix to be updated, a lower triangular non-zeros of the system matrix (lambda D'D). + * @param rightHandSide - The original vector to be updated. + * @param weights - The weights to apply to the system matrix and vector. + * @returns An object that contains the news left and right hand-side of the system. + */ +export function addWeights( + leftHandSide: number[][], + rightHandSide: NumberArray, + weights: NumberArray, +) { + const nbPoints = rightHandSide.length; + const l = nbPoints - 1; + const newLeftHandSide: number[][] = new Array(leftHandSide.length); + const newRightHandSide: Float64Array = new Float64Array(nbPoints); + for (let i = 0; i < l; i++) { + const w = weights[i]; + const diag = i * 2; + const next = diag + 1; + newLeftHandSide[diag] = leftHandSide[diag].slice(); + newLeftHandSide[next] = leftHandSide[next].slice(); + + newRightHandSide[i] = rightHandSide[i] * w; + newLeftHandSide[diag][2] += w; + } + newRightHandSide[l] = rightHandSide[l] * weights[l]; + newLeftHandSide[l * 2] = leftHandSide[l * 2].slice(); + newLeftHandSide[l * 2][2] += weights[l]; + + return { + leftHandSide: newLeftHandSide, + rightHandSide: newRightHandSide, + }; +} diff --git a/src/utils/calculateAdaptiveWeights.ts b/src/utils/calculateAdaptiveWeights.ts new file mode 100644 index 00000000..3b9ac435 --- /dev/null +++ b/src/utils/calculateAdaptiveWeights.ts @@ -0,0 +1,92 @@ +import type { NumberArray } from 'cheminfo-types'; + +import { xAbsolute } from '../x/xAbsolute'; +import { xMedian } from '../x/xMedian'; +import { xSubtract } from '../x/xSubtract'; + +export interface WeightsAndControlPoints { + /** + * Control points to determine which weights to update. + */ + controlPoints?: NumberArray; + /** + * Array of weights + * @default [1,1,...,1] + */ + weights?: NumberArray; +} + +export interface CalculateAdaptiveWeightsOptions + extends WeightsAndControlPoints { + /** + * Factor that determines how quickly the weights are updated in each iteration. + * A value between 0 and 1, where higher values mean faster updates. + * @default 0.5 + */ + learningRate?: number; + /** + * The minimum allowed weight value to prevent weights from becoming too small. + * @default 0.01 + */ + minWeight?: number; + /** + * Factor used to calculate the threshold for determining outliers in the residuals. + * Higher values mean more tolerance for outliers. The default value is based on noise follow the normal distribution + * values over 3 times the standard-deviation could be marked as signals or outliers. + * @default 3 + */ + factorStd?: number; +} + +/** + * Calculate the weights based on the control points and the MAD between the original data and the new baseline. + * MAD (Median Absolute Deviation) is more robust to outliers and + * the factor (1.4826) makes MAD scaled to be equivalent to the standard deviation for + * normal distributions. {@link https://en.m.wikipedia.org/wiki/Median_absolute_deviation}. + * @param yData - The original data. + * @param baseline - The new baseline calculated. + * @param weights - The current weights to be updated. + * @param options - Options for updating weights. + * @returns new array of weights. + */ + +export function calculateAdaptiveWeights( + yData: NumberArray, + baseline: NumberArray, + weights: NumberArray, + options: CalculateAdaptiveWeightsOptions, +) { + const { + controlPoints, + factorStd = 3, + learningRate = 0.5, + minWeight = 0.01, + } = options; + const absResiduals = xAbsolute(xSubtract(yData, baseline)); + + const medAbsRes = xMedian(absResiduals); + const mad = 1.4826 * medAbsRes; + const threshold = factorStd * mad; + + const rawWeights = new Float64Array(absResiduals.length); + for (let i = 0; i < absResiduals.length; i++) { + rawWeights[i] = Math.exp(-((absResiduals[i] / threshold) ** 2)); + } + + let maxWeight = Number.MIN_SAFE_INTEGER; + const newWeights = Float64Array.from(weights); + const oneMinusLearningRate = 1 - learningRate; + for (let i = 0; i < newWeights.length; i++) { + if (controlPoints && controlPoints[i] > 0) continue; + const weight = Math.max( + minWeight, + oneMinusLearningRate * weights[i] + learningRate * rawWeights[i], + ); + newWeights[i] = weight; + maxWeight = Math.max(maxWeight, weight); + } + newWeights[0] = maxWeight; + newWeights[weights.length - 1] = maxWeight; + + return newWeights; +} diff --git a/src/utils/createSystemMatrix.ts b/src/utils/createSystemMatrix.ts new file mode 100644 index 00000000..902981e9 --- /dev/null +++ b/src/utils/createSystemMatrix.ts @@ -0,0 +1,24 @@ +/** + * Generates a lower triangular non-zeros of the first order smoother matrix (lambda D'D) where D is the derivate of the identity matrix + * this function in combination with addWeights function can obtain (Q = W + lambda D'D) a penalized least square of Whitaker smoother, + * it also generates a permutation encoded array. + * @param dimension - The number of points in the matrix. + * @param lambda - The factor of smoothness . + * @returns An object containing the lower triangular non-zero elements of the matrix + * and the permutation encoded array. + * @property lowerTriangularNonZeros - The lower triangular non-zero elements of the matrix in triplet form. + * @property permutationEncodedArray - The permutation encoded array generated using the Cuthill-McKee algorithm. + */ +export function createSystemMatrix( + dimension: number, + lambda: number, +): number[][] { + const upperTriangularNonZeros: number[][] = []; + const last = dimension - 1; + for (let i = 0; i < last; i++) { + upperTriangularNonZeros.push([i, i, lambda * 2], [i, i + 1, -1 * lambda]); + } + upperTriangularNonZeros[0][2] = lambda; + upperTriangularNonZeros.push([last, last, lambda]); + return upperTriangularNonZeros; +} diff --git a/src/utils/index.ts b/src/utils/index.ts index 36bbd254..cb6078da 100644 --- a/src/utils/index.ts +++ b/src/utils/index.ts @@ -7,3 +7,4 @@ export * from './isPowerOfTwo'; export * from './nextPowerOfTwo'; export * from './recursiveResolve'; export * from './stringify'; +export * from './calculateAdaptiveWeights'; diff --git a/src/x/__tests__/xWhitakerSmoother.test.ts b/src/x/__tests__/xWhitakerSmoother.test.ts new file mode 100644 index 00000000..9a0b2bbf --- /dev/null +++ b/src/x/__tests__/xWhitakerSmoother.test.ts @@ -0,0 +1,20 @@ +import { expect, test } from 'vitest'; + +import { xSequentialFillFromTo } from '../xSequentialFillFromTo'; +import { xWhitakerSmoother } from '../xWhitakerSmoother'; + +test('two peaks with sine baseline', () => { + const x = xSequentialFillFromTo({ from: 0, to: Math.PI, size: 101 }); + const noise = x.map(() => Math.random() * 0.1 - 0.05); + const y = x.map((value, index) => Math.cos(value) + 2 * noise[index]); + + y[50] = 0.9; // add outliers + + const smooth = xWhitakerSmoother(y, { + lambda: 20, + maxIterations: 100, + }); + + expect(smooth[50]).toBeLessThan(0.2); + expect(smooth[50]).toBeGreaterThan(-0.2); +}); diff --git a/src/x/index.ts b/src/x/index.ts index 67dd2d45..3c4a3c9d 100644 --- a/src/x/index.ts +++ b/src/x/index.ts @@ -62,3 +62,4 @@ export * from './xSubtract'; export * from './xSum'; export * from './xUniqueSorted'; export * from './xVariance'; +export * from './xWhitakerSmoother'; diff --git a/src/x/xWhitakerSmoother.ts b/src/x/xWhitakerSmoother.ts new file mode 100644 index 00000000..798ab342 --- /dev/null +++ b/src/x/xWhitakerSmoother.ts @@ -0,0 +1,149 @@ +import type { NumberArray } from 'cheminfo-types'; + +import { matrixCholeskySolver } from '../matrix/matrixCholeskySolver'; +import { addWeights } from '../utils/addWeights'; +import type { + CalculateAdaptiveWeightsOptions, + WeightsAndControlPoints, +} from '../utils/calculateAdaptiveWeights'; +import { calculateAdaptiveWeights } from '../utils/calculateAdaptiveWeights'; +import { createSystemMatrix } from '../utils/createSystemMatrix'; + +import { xEnsureFloat64 } from './xEnsureFloat64'; +import { xMultiply } from './xMultiply'; + +interface XWhitakerSmootherOptions extends CalculateAdaptiveWeightsOptions { + /** + * Factor of weights matrix in -> [I + lambda D'D]z = x + * @default 100 + */ + lambda?: number; + /** + * Maximum number of iterations for the baseline refinement process. + * @default 100 + */ + maxIterations?: number; + + /** + * Tolerance for convergence. The process stops if the change in baseline is less than this value. + * @default 1e-6 + */ + tolerance?: number; + + /** + * Learning rate for weight updates. + * @default 0.5 + */ + learningRate?: number; + + /** + * Minimum weight value to avoid division by zero or extremely small weights. + * @default 0.01 + */ + minWeight?: number; +} + +/** + * Computes the baseline points for the given data using an iterative smoothing algorithm. + * @param yData - The input data array. + * @param options - The options for baseline computation. + * @returns - The computed baseline points. + */ +export function xWhitakerSmoother( + yData: NumberArray, + options: XWhitakerSmootherOptions = {}, +) { + const { + lambda = 100, + maxIterations = 100, + tolerance = 1e-6, + factorStd = 3, + learningRate = 0.5, + minWeight = 0.01, + } = options; + + const size = yData.length; + + // eslint-disable-next-line prefer-const + let { controlPoints, weights } = getWeightsAndControlPoints(yData, options); + const prevBaseline: Float64Array = new Float64Array(size); + + let iteration = 0; + let delta = Infinity; + let baseline = yData.slice(); + const upperTriangularNonZeros = createSystemMatrix(size, lambda); + while (iteration < maxIterations && delta > tolerance) { + const { leftHandSide, rightHandSide } = addWeights( + upperTriangularNonZeros, + yData, + weights, + ); + + const cho = matrixCholeskySolver(leftHandSide, size); + + if (!cho) { + return baseline; + } + + const newBaseline = cho(rightHandSide); + + weights = calculateAdaptiveWeights(yData, newBaseline, weights, { + controlPoints, + minWeight, + learningRate, + factorStd, + }); + + delta = calculateDelta(newBaseline, prevBaseline, size); + prevBaseline.set(newBaseline); + baseline = xEnsureFloat64(newBaseline); + iteration++; + } + + return baseline; +} + +/** + * Calculates the delta between the current and previous baseline. + * @param baseline - The current baseline array. + * @param prevBaseline - The previous baseline array. + * @param n - The length of the arrays. + * @returns - The calculated delta value. + */ +function calculateDelta( + baseline: NumberArray, + prevBaseline: NumberArray, + n: number, +): number { + let sum = 0; + for (let i = 0; i < n; i++) { + sum += (baseline[i] - prevBaseline[i]) ** 2; + } + return Math.sqrt(sum / n); +} + +/** + * Retrieves the control points and weights for the given data, the weights are modified multiplication of controlPoints if it exists. + * @param y - The input data array. + * @param options - The options for control points and weights. + * @returns - The control points and modified weights. + */ +function getWeightsAndControlPoints( + y: NumberArray, + options: WeightsAndControlPoints = {}, +): { weights: NumberArray; controlPoints?: NumberArray } { + const { length } = y; + const { controlPoints } = options; + const { weights = Float64Array.from({ length }).fill(1) } = options; + + if (controlPoints && controlPoints.length !== y.length) { + throw new RangeError('controlPoints should match the length with X'); + } else if (weights.length !== y.length) { + throw new RangeError('weights should match the length with X'); + } + + return { + weights: controlPoints ? xMultiply(weights, controlPoints) : weights, + controlPoints, + }; +}