Skip to content
7 changes: 7 additions & 0 deletions packages/transformers/src/env.js
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,11 @@ export const LogLevel = Object.freeze({
* This can improve performance by avoiding repeated downloads of WASM files. Note: Only the WASM binary is cached.
* The MJS loader file still requires network access unless you use a Service Worker.
* @property {string} cacheKey The cache key to use for storing models and WASM binaries. Defaults to 'transformers-cache'.
* @property {boolean} experimental_useCrossOriginStorage Whether to use the Cross-Origin Storage API to cache model files
* across origins, allowing different sites to share the same cached model weights. Defaults to `false`.
* Requires the Cross-Origin Storage Chrome extension: {@link https://chromewebstore.google.com/detail/cross-origin-storage/denpnpcgjgikjpoglpjefakmdcbmlgih}.
* The `experimental_` prefix indicates that the underlying browser API is not yet standardised and may change or be
* removed without a major version bump. For more information, see {@link https://github.com/WICG/cross-origin-storage}.
* @property {(input: string | URL, init?: any) => Promise<any>} fetch The fetch function to use. Defaults to `fetch`.
*/

Expand Down Expand Up @@ -247,6 +252,8 @@ export const env = {
useWasmCache: IS_WEB_CACHE_AVAILABLE || IS_FS_AVAILABLE,
cacheKey: 'transformers-cache',

experimental_useCrossOriginStorage: false,

/////////////////// Custom fetch /////////////////////
fetch: DEFAULT_FETCH,

Expand Down
5 changes: 5 additions & 0 deletions packages/transformers/src/utils/cache.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import { apis, env } from '../env.js';
import { FileCache } from './hub/files.js';
import { logger } from './logger.js';
import { CrossOriginStorage } from './cache/CrossOriginStorageCache.js';

/**
* @typedef {Object} CacheInterface
Expand Down Expand Up @@ -38,6 +39,10 @@ export async function getCache(file_cache_dir = null) {
cache = env.customCache;
}

if (!cache && env.experimental_useCrossOriginStorage && CrossOriginStorage.isAvailable()) {
cache = new CrossOriginStorage();
}

if (!cache && env.useBrowserCache) {
if (typeof caches === 'undefined') {
throw Error('Browser cache is not available in this environment.');
Expand Down
247 changes: 247 additions & 0 deletions packages/transformers/src/utils/cache/CrossOriginStorageCache.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,247 @@
/// <reference path="./cross-origin-storage.d.ts" />

const HASH_ALGORITHM = 'SHA-256';

/**
* Name of the Cache API bucket used to persist the url→hash mapping.
*/
const HASH_CACHE_NAME = 'experimental_transformers-hash-cache';

/**
* Builds the hash descriptor object expected by the cross-origin storage API.
*
* @param {string} value Hex-encoded SHA-256 hash.
* @returns {{ algorithm: string, value: string }}
*/
const makeHashDescriptor = (value) => ({ algorithm: HASH_ALGORITHM, value });

/**
* A cache implementation backed by the experimental `navigator.crossOriginStorage` API,
* which allows sharing cached files (identified by content hash) across origins.
*
* Implements {@link import('../cache.js').CacheInterface}.
*
* @see https://github.com/explainers-by-googlers/cross-origin-storage
*/
export class CrossOriginStorage {
/** @type {Promise<Cache> | null} */
#hashCache = null;

/**
* Returns (and lazily opens) the hash cache, reusing the same promise across concurrent callers.
* @returns {Promise<Cache>}
*/
_getHashCache = () => {
this.#hashCache ??= caches.open(HASH_CACHE_NAME);
return this.#hashCache;
};

/**
* Returns whether the `navigator.crossOriginStorage` API is available in the current environment.
* @returns {boolean}
*/
static isAvailable = () => typeof navigator !== 'undefined' && 'crossOriginStorage' in navigator;

/**
* Looks up a cached response for the given URL by resolving its SHA-256 hash and requesting
* the corresponding file handle from cross-origin storage.
*
* Implements `CacheInterface.match`.
*
* @param {string} request The URL of the resource to look up.
* @returns {Promise<Response|undefined>} The cached `Response`, or `undefined` if not found.
*/
match = async (request) => {
const hashValue = await this._getFileHash(request);
if (!hashValue) {
return undefined;
}
try {
const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashValue)]);
const blob = await handle.getFile();
return new Response(blob);
} catch {
return undefined;
}
};

/**
* Stores a response in cross-origin storage, keyed by its SHA-256 hash.
*
* For LFS-backed URLs the hash is resolved cheaply via `_getFileHash` (which checks
* `HASH_CACHE_NAME` first, then falls back to fetching the Git LFS pointer file)
* without reading the response body a second time.
*
* For non-LFS resources the hash is unknown upfront. In that case the body is consumed
* in the background: the stream is read to compute the content hash, the file is written
* into cross-origin storage, and the computed hash is persisted to `HASH_CACHE_NAME`
* so that future `match` calls can resolve the file without a network round-trip.
*
* Implements `CacheInterface.put`.
*
* @param {string} request The URL of the resource (used as the hash-cache key).
* @param {Response} response The response whose body will be written to the cache.
* @returns {Promise<void>}
*/
put = async (request, response) => {
const hashValue = await this._getFileHash(request);

if (hashValue) {
// Fast path: LFS hash already known. Consume the body and store directly.
const blob = await response.blob();
await this._storeBlobInCOS(blob, hashValue);
} else {
// Slow path: hash unknown. Process in the background so put() returns promptly.
// The caller already holds a reference to the original response; we receive it
// here only to buffer and hash its body.
this._processAndStore(request, response.body);
}
};

/**
* Writes a blob into cross-origin storage using the given pre-computed hex hash string.
*
* @param {Blob} blob
* @param {string} hashHex Hex-encoded SHA-256 hash of `blob`.
* @returns {Promise<void>}
*/
_storeBlobInCOS = async (blob, hashHex) => {
const [handle] = await navigator.crossOriginStorage.requestFileHandles([makeHashDescriptor(hashHex)], {
create: true,
});
const writableStream = await handle.createWritable();
await writableStream.write(blob);
await writableStream.close();
};

/**
* Background task for non-LFS resources: consumes `stream`, computes the SHA-256 hash
* of the resulting blob, stores it in cross-origin storage, and persists the computed
* hash to `HASH_CACHE_NAME` keyed by `request` so future `match` calls can resolve the
* file without a network round-trip.
*
* Called fire-and-forget from `put` — errors are swallowed so failures never surface to
* the caller.
*
* @param {string} request The original resource URL.
* @param {ReadableStream} stream The response body stream to consume.
* @returns {Promise<void>}
*/
_processAndStore = async (request, stream) => {
try {
const chunks = [];
for await (const chunk of stream) {
chunks.push(chunk);
}
const blob = new Blob(chunks);
const hashHex = await this._getBlobHash(blob);

await this._storeBlobInCOS(blob, hashHex);

// Persist the computed hash so future match() calls resolve without the network.
try {
const hashCache = await this._getHashCache();
await hashCache.put(request, new Response(hashHex));
} catch {
// Cache API unavailable (e.g. non-secure context): COS entry still written.
}
} catch {
// Non-fatal: background store failure must not affect the caller.
}
};

/**
* Deletes the cache entry for the given request.
*
* Removes the hash entry from `HASH_CACHE_NAME`. Note: cross-origin storage itself does not
* expose a delete API, so only the local hash mapping is removed. For non-LFS URLs this
* permanently prevents `match` from resolving the file. For LFS-backed URLs, `match` will
* re-fetch the LFS pointer file on the next call and repopulate the hash cache automatically.
*
* Implements `CacheInterface.delete`.
*
* @param {string} request
* @returns {Promise<boolean>} Resolves to `true` if the hash entry was deleted, `false` otherwise.
*/
delete = async (request) => {
try {
const hashCache = await this._getHashCache();
return await hashCache.delete(request);
} catch {
return false;
}
};

/**
* Resolves the SHA-256 hash for a given URL.
*
* Returns the cached hash immediately if one has been persisted to `HASH_CACHE_NAME`.
* Otherwise falls back to `_getLfsFileHash` to retrieve the hash from the Hugging Face
* LFS pointer file, persisting the result to `HASH_CACHE_NAME` for future lookups.
*
* Returns `null` if the hash cannot be determined (e.g. non-LFS URL with no cached entry).
*
* @param {string} url The resource URL to resolve a hash for.
* @returns {Promise<string|null>} The hex-encoded SHA-256 hash, or `null` if unavailable.
*/
_getFileHash = async (url) => {
try {
const hashCache = await this._getHashCache();
const cached = await hashCache.match(url);
if (cached) {
return cached.text();
}

const hash = await this._getLfsFileHash(url);
if (hash) {
await hashCache.put(url, new Response(hash));
return hash;
}

return null;
} catch {
return null;
}
};

/**
* Attempts to retrieve the SHA-256 hash for a Hugging Face resource URL from its raw
* Git LFS pointer file.
*
* Only applicable to URLs containing `/resolve/` (i.e. Hugging Face resolved file URLs).
* The `/resolve/` segment is rewritten to `/raw/` to fetch the LFS pointer directly.
* Returns `null` for non-LFS URLs or when the network request fails.
*
* @see https://huggingface.co/docs/hub/en/storage-backends#xet
* @param {string} url The resolved Hugging Face URL of the resource.
* @returns {Promise<string|null>} The hex-encoded SHA-256 hash, or `null` if unavailable.
*/
_getLfsFileHash = async (url) => {
if (!url.includes('/resolve/')) {
return null;
}

const rawUrl = url.replace('/resolve/', '/raw/');

try {
const text = await fetch(rawUrl).then((r) => r.text());
const match = text.match(/^oid sha256:([0-9a-f]+)$/m);
return match ? match[1] : null;
} catch {
return null;
}
};

/**
* Computes the SHA-256 hash of a `Blob`'s contents.
*
* @param {Blob} blob The blob to hash.
* @returns {Promise<string>} The lowercase hex-encoded SHA-256 hash.
*/
_getBlobHash = async (blob) => {
const arrayBuffer = await blob.arrayBuffer();
const hashBuffer = await crypto.subtle.digest(HASH_ALGORITHM, arrayBuffer);
const hashArray = Array.from(new Uint8Array(hashBuffer));
return hashArray.map((byte) => byte.toString(16).padStart(2, '0')).join('');
};
}
38 changes: 38 additions & 0 deletions packages/transformers/src/utils/cache/cross-origin-storage.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/**
* Type definitions for the Cross-Origin Storage API
* Source: https://github.com/WICG/cross-origin-storage/blob/main/cross-origin-storage.d.ts
* @see https://github.com/WICG/cross-origin-storage
*/

/**
* Represents the dictionary for hash algorithms and values.
*/
interface CrossOriginStorageRequestFileHandleHash {
value: string;
algorithm: string;
}

/**
* Represents the options for requesting file handles.
*/
interface CrossOriginStorageRequestFileHandleOptions {
create?: boolean;
}

/**
* The CrossOriginStorageManager interface.
* [SecureContext]
*/
interface CrossOriginStorageManager {
requestFileHandles(
hashes: CrossOriginStorageRequestFileHandleHash[],
options?: CrossOriginStorageRequestFileHandleOptions,
): Promise<FileSystemFileHandle[]>;
}

/**
* Augment the standard Navigator interface.
*/
interface Navigator {
readonly crossOriginStorage: CrossOriginStorageManager;
}
Loading