diff --git a/packages/transformers/docs/plugins/preprocess.js b/packages/transformers/docs/plugins/preprocess.js index 9cdf3c753..2097465f7 100644 --- a/packages/transformers/docs/plugins/preprocess.js +++ b/packages/transformers/docs/plugins/preprocess.js @@ -39,7 +39,10 @@ function transformType(expr) { .replace(/\[\w+\]/g, "Array") // [T] single-element tuple -> Array .replace(/\[[^\[\]]*,[^\[\]]*\]/g, "Array") // tuples with commas -> Array .replace(/\w+\s+extends\s+[^?]+\?\s*[^:]+\s*:\s*[^,}>)]+/g, "any") // conditionals -> any - .replace(/\bnew\s+([A-Z]\w*)\b/g, "$1"); // new Type -> Type + .replace(/\bnew\s+([A-Z]\w*)\b/g, "$1") // new Type -> Type + .replace(/,?\s*\[\s*\w+\s*:\s*\w+\s*\]\s*:\s*\w+/g, "") // [key: string]: any -> (removed) + .replace(/\(\s*(\w+)\s*&\s*\{\s*\}\s*\)/g, "$1") // (string & {}) -> string + .replace(/\s*&\s*\{\s*\}/g, ""); // string & {} -> string if (!result.includes("=>")) result = result.replace(/\s*&\s*/g, "|"); // A & B -> A|B } return result; diff --git a/packages/transformers/src/pipelines/text-generation.js b/packages/transformers/src/pipelines/text-generation.js index d9e04dc5f..0dd1beece 100644 --- a/packages/transformers/src/pipelines/text-generation.js +++ b/packages/transformers/src/pipelines/text-generation.js @@ -100,6 +100,10 @@ function isChat(x) { export class TextGenerationPipeline extends /** @type {new (options: TextPipelineConstructorArgs) => TextGenerationPipelineType} */ (Pipeline) { + /** + * @param {string | string[] | import('../tokenization_utils.js').Message[] | import('../tokenization_utils.js').Message[][]} texts + * @param {Partial} generate_kwargs + */ async _call(texts, generate_kwargs = {}) { let isBatched = false; let isChatInput = false; diff --git a/packages/transformers/src/tokenization_utils.js b/packages/transformers/src/tokenization_utils.js index dd2642b04..7e3c5baed 100644 --- a/packages/transformers/src/tokenization_utils.js +++ b/packages/transformers/src/tokenization_utils.js @@ -64,10 +64,30 @@ const SPECIAL_TOKEN_ATTRIBUTES = [ // additional_special_tokens (TODO) ]; +/** + * @typedef {{ type: 'text', text: string, [key: string]: any }} TextContent + * @property {'text'} type The type of content (must be 'text'). + * @property {string} text The text content. + */ + +/** + * @typedef {{ type: 'image', image?: string | import('./utils/image.js').RawImage, [key: string]: any }} ImageContent + * @property {'image'} type The type of content (must be 'image'). + * @property {string | import('./utils/image.js').RawImage} [image] Optional URL or instance of the image. + * + * Note: This works for SmolVLM. Qwen2VL and Idefics3 have different implementations. + */ + +/** + * @typedef {TextContent | ImageContent | { type: string & {}, [key: string]: any }} MessageContent + * Base type for message content. This is a discriminated union that can be extended with additional content types. + * Example: `@typedef {TextContent | ImageContent | AudioContent} MessageContent` + */ + /** * @typedef {Object} Message - * @property {string} role The role of the message (e.g., "user" or "assistant" or "system"). - * @property {string} content The content of the message. + * @property {'user' | 'assistant' | 'system' | (string & {})} role The role of the message. + * @property {string | MessageContent[]} content The content of the message. Can be a simple string or an array of content objects. */ /** @@ -276,10 +296,10 @@ export class PreTrainedTokenizer extends Callable { * @param {string|string[]} [options.text_pair=null] Optional second sequence to be encoded. If set, must be the same type as text. * @param {boolean|'max_length'} [options.padding=false] Whether to pad the input sequences. * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model. - * @param {boolean} [options.truncation=null] Whether to truncate the input sequences. - * @param {number} [options.max_length=null] Maximum length of the returned list and optionally padding length. + * @param {boolean|null} [options.truncation=null] Whether to truncate the input sequences. + * @param {number|null} [options.max_length=null] Maximum length of the returned list and optionally padding length. * @param {boolean} [options.return_tensor=true] Whether to return the results as Tensors or arrays. - * @param {boolean} [options.return_token_type_ids=null] Whether to return the token type ids. + * @param {boolean|null} [options.return_token_type_ids=null] Whether to return the token type ids. * @returns {BatchEncoding} Object to be passed to the model. */ _call( @@ -455,9 +475,9 @@ export class PreTrainedTokenizer extends Callable { * * @param {string} text The text to encode. * @param {Object} options An optional object containing the following properties: - * @param {string} [options.text_pair=null] The optional second text to encode. + * @param {string|null} [options.text_pair=null] The optional second text to encode. * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model. - * @param {boolean} [options.return_token_type_ids=null] Whether to return token_type_ids. + * @param {boolean|null} [options.return_token_type_ids=null] Whether to return token_type_ids. * @returns {{input_ids: number[], attention_mask: number[], token_type_ids?: number[]}} An object containing the encoded text. * @private */ @@ -478,7 +498,7 @@ export class PreTrainedTokenizer extends Callable { * Converts a string into a sequence of tokens. * @param {string} text The sequence to be encoded. * @param {Object} options An optional object containing the following properties: - * @param {string} [options.pair] A second sequence to be encoded with the first. + * @param {string|null} [options.pair] A second sequence to be encoded with the first. * @param {boolean} [options.add_special_tokens=false] Whether or not to add the special tokens associated with the corresponding model. * @returns {string[]} The list of tokens. */ @@ -491,9 +511,9 @@ export class PreTrainedTokenizer extends Callable { * * @param {string} text The text to encode. * @param {Object} options An optional object containing the following properties: - * @param {string} [options.text_pair=null] The optional second text to encode. + * @param {string|null} [options.text_pair=null] The optional second text to encode. * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model. - * @param {boolean} [options.return_token_type_ids=null] Whether to return token_type_ids. + * @param {boolean|null} [options.return_token_type_ids=null] Whether to return token_type_ids. * @returns {number[]} An array of token IDs representing the encoded text(s). */ encode(text, { text_pair = null, add_special_tokens = true, return_token_type_ids = null } = {}) { @@ -545,7 +565,7 @@ export class PreTrainedTokenizer extends Callable { * @param {number[]|bigint[]} token_ids List of token ids to decode * @param {Object} decode_args Optional arguments for decoding * @param {boolean} [decode_args.skip_special_tokens=false] Whether to skip special tokens during decoding - * @param {boolean} [decode_args.clean_up_tokenization_spaces=null] Whether to clean up tokenization spaces during decoding. + * @param {boolean|null} [decode_args.clean_up_tokenization_spaces=null] Whether to clean up tokenization spaces during decoding. * If null, the value is set to `this.decoder.cleanup` if it exists, falling back to `this.clean_up_tokenization_spaces` if it exists, falling back to `true`. * @returns {string} The decoded string */ @@ -562,7 +582,7 @@ export class PreTrainedTokenizer extends Callable { * template for better generation tracking. * * @param {Object} options An optional object containing the following properties: - * @param {string} [options.chat_template=null] + * @param {string|null} [options.chat_template=null] * A Jinja template or the name of a template to use for this conversion. * It is usually not necessary to pass anything to this argument, * as the model's template will be used by default. @@ -642,7 +662,7 @@ export class PreTrainedTokenizer extends Callable { * @param {Message[]} conversation A list of message objects with `"role"` and `"content"` keys, * representing the chat history so far. * @param {Object} options An optional object containing the following properties: - * @param {string} [options.chat_template=null] A Jinja template to use for this conversion. If + * @param {string|null} [options.chat_template=null] A Jinja template to use for this conversion. If * this is not passed, the model's chat template will be used instead. * @param {Object[]} [options.tools=null] * A list of tools (callable functions) that will be accessible to the model. If the template does not @@ -663,7 +683,7 @@ export class PreTrainedTokenizer extends Callable { * @param {boolean} [options.tokenize=true] Whether to tokenize the output. If false, the output will be a string. * @param {boolean} [options.padding=false] Whether to pad sequences to the maximum length. Has no effect if tokenize is false. * @param {boolean} [options.truncation=false] Whether to truncate sequences to the maximum length. Has no effect if tokenize is false. - * @param {number} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false. + * @param {number|null} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false. * If not specified, the tokenizer's `max_length` attribute will be used as a default. * @param {boolean} [options.return_tensor=true] Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false. * @param {boolean} [options.return_dict=true] Whether to return a dictionary with named outputs. Has no effect if tokenize is false. diff --git a/packages/transformers/src/transformers.js b/packages/transformers/src/transformers.js index 66a3b5385..539079658 100644 --- a/packages/transformers/src/transformers.js +++ b/packages/transformers/src/transformers.js @@ -56,6 +56,7 @@ export { softmax, log_softmax, dot, cos_sim } from './utils/maths.js'; /** * @typedef {import('./utils/hub.js').PretrainedModelOptions} PretrainedModelOptions * @typedef {import('./processing_utils.js').PretrainedProcessorOptions} PretrainedProcessorOptions + * @typedef {import('./tokenization_utils.js').Message} Message * @typedef {import('./tokenization_utils.js').PretrainedTokenizerOptions} PretrainedTokenizerOptions * @typedef {import('./utils/dtypes.js').DataType} DataType * @typedef {import('./utils/devices.js').DeviceType} DeviceType