import torch
from transformers import WhisperForConditionalGeneration, WhisperProcessor
from transformers.trainer_utils import align_special_tokens
def main() -> None:
model_name = "openai/whisper-tiny"
processor = WhisperProcessor.from_pretrained(model_name)
model = WhisperForConditionalGeneration.from_pretrained(model_name)
# clear suppress tokens so the model can freely produce EOS when there is nothing to transcribe.
model.generation_config.begin_suppress_tokens = None
model.generation_config.suppress_tokens = [
i for i in range(model.config.vocab_size) if i != model.config.eos_token_id
]
# --- Without align_special_tokens: works fine ---
features = processor(torch.zeros(16_000 * 10).numpy(), sampling_rate=16_000, return_tensors="pt").input_features
out = model.generate(features)
decoded = processor.batch_decode(out, skip_special_tokens=True)
print(f"Before align_special_tokens: generate() on silence → {decoded!r} (empty transcription, as expected)")
# --- After align_special_tokens (called e.g. by Trainer.train()): crashes ---
align_special_tokens(model, processor)
print("\nAfter align_special_tokens: generate() on silence →", end=" ")
try:
model.generate(features)
print("no crash")
except IndexError as e:
print(f"IndexError: {e}")
if __name__ == "__main__":
main()
System Info
transformersversion: 5.6.0.dev0Who can help?
@eustlb
Information
Tasks
examplesfolder (such as GLUE/SQuAD, ...)Reproduction
Basic reproduction script
Expected behavior
The generation should not fail (and return an empty transcription)