diff --git a/packages/transformers/docs/plugins/preprocess.js b/packages/transformers/docs/plugins/preprocess.js
index 9cdf3c753..2097465f7 100644
--- a/packages/transformers/docs/plugins/preprocess.js
+++ b/packages/transformers/docs/plugins/preprocess.js
@@ -39,7 +39,10 @@ function transformType(expr) {
       .replace(/\[\w+\]/g, "Array") // [T] single-element tuple -> Array
       .replace(/\[[^\[\]]*,[^\[\]]*\]/g, "Array") // tuples with commas -> Array
       .replace(/\w+\s+extends\s+[^?]+\?\s*[^:]+\s*:\s*[^,}>)]+/g, "any") // conditionals -> any
-      .replace(/\bnew\s+([A-Z]\w*)\b/g, "$1"); // new Type -> Type
+      .replace(/\bnew\s+([A-Z]\w*)\b/g, "$1") // new Type -> Type
+      .replace(/,?\s*\[\s*\w+\s*:\s*\w+\s*\]\s*:\s*\w+/g, "") // [key: string]: any -> (removed)
+      .replace(/\(\s*(\w+)\s*&\s*\{\s*\}\s*\)/g, "$1") // (string & {}) -> string
+      .replace(/\s*&\s*\{\s*\}/g, ""); // string & {} -> string
     if (!result.includes("=>")) result = result.replace(/\s*&\s*/g, "|"); // A & B -> A|B
   }
   return result;
diff --git a/packages/transformers/src/pipelines/text-generation.js b/packages/transformers/src/pipelines/text-generation.js
index d9e04dc5f..0dd1beece 100644
--- a/packages/transformers/src/pipelines/text-generation.js
+++ b/packages/transformers/src/pipelines/text-generation.js
@@ -100,6 +100,10 @@ function isChat(x) {
 export class TextGenerationPipeline
     extends /** @type {new (options: TextPipelineConstructorArgs) => TextGenerationPipelineType} */ (Pipeline)
 {
+    /**
+     * @param {string | string[] | import('../tokenization_utils.js').Message[] | import('../tokenization_utils.js').Message[][]} texts
+     * @param {Partial<TextGenerationConfig>} generate_kwargs
+     */
     async _call(texts, generate_kwargs = {}) {
         let isBatched = false;
         let isChatInput = false;
diff --git a/packages/transformers/src/tokenization_utils.js b/packages/transformers/src/tokenization_utils.js
index dd2642b04..7e3c5baed 100644
--- a/packages/transformers/src/tokenization_utils.js
+++ b/packages/transformers/src/tokenization_utils.js
@@ -64,10 +64,30 @@ const SPECIAL_TOKEN_ATTRIBUTES = [
     // additional_special_tokens (TODO)
 ];
 
+/**
+ * @typedef {{ type: 'text', text: string, [key: string]: any }} TextContent
+ * @property {'text'} type The type of content (must be 'text').
+ * @property {string} text The text content.
+ */
+
+/**
+ * @typedef {{ type: 'image', image?: string | import('./utils/image.js').RawImage, [key: string]: any }} ImageContent
+ * @property {'image'} type The type of content (must be 'image').
+ * @property {string | import('./utils/image.js').RawImage} [image] Optional URL or instance of the image.
+ *
+ * Note: This works for SmolVLM. Qwen2VL and Idefics3 have different implementations.
+ */
+
+/**
+ * @typedef {TextContent | ImageContent | { type: string & {}, [key: string]: any }} MessageContent
+ * Base type for message content. This is a discriminated union that can be extended with additional content types.
+ * Example: `@typedef {TextContent | ImageContent | AudioContent} MessageContent`
+ */
+
 /**
  * @typedef {Object} Message
- * @property {string} role The role of the message (e.g., "user" or "assistant" or "system").
- * @property {string} content The content of the message.
+ * @property {'user' | 'assistant' | 'system' | (string & {})} role The role of the message.
+ * @property {string | MessageContent[]} content The content of the message. Can be a simple string or an array of content objects.
  */
 
 /**
@@ -276,10 +296,10 @@ export class PreTrainedTokenizer extends Callable {
      * @param {string|string[]} [options.text_pair=null] Optional second sequence to be encoded. If set, must be the same type as text.
      * @param {boolean|'max_length'} [options.padding=false] Whether to pad the input sequences.
      * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model.
-     * @param {boolean} [options.truncation=null] Whether to truncate the input sequences.
-     * @param {number} [options.max_length=null] Maximum length of the returned list and optionally padding length.
+     * @param {boolean|null} [options.truncation=null] Whether to truncate the input sequences.
+     * @param {number|null} [options.max_length=null] Maximum length of the returned list and optionally padding length.
      * @param {boolean} [options.return_tensor=true] Whether to return the results as Tensors or arrays.
-     * @param {boolean} [options.return_token_type_ids=null] Whether to return the token type ids.
+     * @param {boolean|null} [options.return_token_type_ids=null] Whether to return the token type ids.
      * @returns {BatchEncoding} Object to be passed to the model.
      */
     _call(
@@ -455,9 +475,9 @@ export class PreTrainedTokenizer extends Callable {
      *
      * @param {string} text The text to encode.
      * @param {Object} options An optional object containing the following properties:
-     * @param {string} [options.text_pair=null] The optional second text to encode.
+     * @param {string|null} [options.text_pair=null] The optional second text to encode.
      * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model.
-     * @param {boolean} [options.return_token_type_ids=null] Whether to return token_type_ids.
+     * @param {boolean|null} [options.return_token_type_ids=null] Whether to return token_type_ids.
      * @returns {{input_ids: number[], attention_mask: number[], token_type_ids?: number[]}} An object containing the encoded text.
      * @private
      */
@@ -478,7 +498,7 @@ export class PreTrainedTokenizer extends Callable {
      * Converts a string into a sequence of tokens.
      * @param {string} text The sequence to be encoded.
      * @param {Object} options An optional object containing the following properties:
-     * @param {string} [options.pair] A second sequence to be encoded with the first.
+     * @param {string|null} [options.pair] A second sequence to be encoded with the first.
      * @param {boolean} [options.add_special_tokens=false] Whether or not to add the special tokens associated with the corresponding model.
      * @returns {string[]} The list of tokens.
      */
@@ -491,9 +511,9 @@ export class PreTrainedTokenizer extends Callable {
      *
      * @param {string} text The text to encode.
      * @param {Object} options An optional object containing the following properties:
-     * @param {string} [options.text_pair=null] The optional second text to encode.
+     * @param {string|null} [options.text_pair=null] The optional second text to encode.
      * @param {boolean} [options.add_special_tokens=true] Whether or not to add the special tokens associated with the corresponding model.
-     * @param {boolean} [options.return_token_type_ids=null] Whether to return token_type_ids.
+     * @param {boolean|null} [options.return_token_type_ids=null] Whether to return token_type_ids.
      * @returns {number[]} An array of token IDs representing the encoded text(s).
      */
     encode(text, { text_pair = null, add_special_tokens = true, return_token_type_ids = null } = {}) {
@@ -545,7 +565,7 @@ export class PreTrainedTokenizer extends Callable {
      * @param {number[]|bigint[]} token_ids List of token ids to decode
      * @param {Object} decode_args Optional arguments for decoding
      * @param {boolean} [decode_args.skip_special_tokens=false] Whether to skip special tokens during decoding
-     * @param {boolean} [decode_args.clean_up_tokenization_spaces=null] Whether to clean up tokenization spaces during decoding.
+     * @param {boolean|null} [decode_args.clean_up_tokenization_spaces=null] Whether to clean up tokenization spaces during decoding.
      * If null, the value is set to `this.decoder.cleanup` if it exists, falling back to `this.clean_up_tokenization_spaces` if it exists, falling back to `true`.
      * @returns {string} The decoded string
      */
@@ -562,7 +582,7 @@ export class PreTrainedTokenizer extends Callable {
      * template for better generation tracking.
      *
      * @param {Object} options An optional object containing the following properties:
-     * @param {string} [options.chat_template=null]
+     * @param {string|null} [options.chat_template=null]
      * A Jinja template or the name of a template to use for this conversion.
      * It is usually not necessary to pass anything to this argument,
      * as the model's template will be used by default.
@@ -642,7 +662,7 @@ export class PreTrainedTokenizer extends Callable {
      * @param {Message[]} conversation A list of message objects with `"role"` and `"content"` keys,
      * representing the chat history so far.
      * @param {Object} options An optional object containing the following properties:
-     * @param {string} [options.chat_template=null] A Jinja template to use for this conversion. If
+     * @param {string|null} [options.chat_template=null] A Jinja template to use for this conversion. If
      * this is not passed, the model's chat template will be used instead.
      * @param {Object[]} [options.tools=null]
      * A list of tools (callable functions) that will be accessible to the model. If the template does not
@@ -663,7 +683,7 @@ export class PreTrainedTokenizer extends Callable {
      * @param {boolean} [options.tokenize=true] Whether to tokenize the output. If false, the output will be a string.
      * @param {boolean} [options.padding=false] Whether to pad sequences to the maximum length. Has no effect if tokenize is false.
      * @param {boolean} [options.truncation=false] Whether to truncate sequences to the maximum length. Has no effect if tokenize is false.
-     * @param {number} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.
+     * @param {number|null} [options.max_length=null] Maximum length (in tokens) to use for padding or truncation. Has no effect if tokenize is false.
      * If not specified, the tokenizer's `max_length` attribute will be used as a default.
      * @param {boolean} [options.return_tensor=true] Whether to return the output as a Tensor or an Array. Has no effect if tokenize is false.
      * @param {boolean} [options.return_dict=true] Whether to return a dictionary with named outputs. Has no effect if tokenize is false.
diff --git a/packages/transformers/src/transformers.js b/packages/transformers/src/transformers.js
index 66a3b5385..539079658 100644
--- a/packages/transformers/src/transformers.js
+++ b/packages/transformers/src/transformers.js
@@ -56,6 +56,7 @@ export { softmax, log_softmax, dot, cos_sim } from './utils/maths.js';
 /**
  * @typedef {import('./utils/hub.js').PretrainedModelOptions} PretrainedModelOptions
  * @typedef {import('./processing_utils.js').PretrainedProcessorOptions} PretrainedProcessorOptions
+ * @typedef {import('./tokenization_utils.js').Message} Message
  * @typedef {import('./tokenization_utils.js').PretrainedTokenizerOptions} PretrainedTokenizerOptions
  * @typedef {import('./utils/dtypes.js').DataType} DataType
  * @typedef {import('./utils/devices.js').DeviceType} DeviceType