From c904208aff8eaf904ef96b29b1ab80e52c4d45f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 26 Feb 2026 02:51:38 +0000 Subject: [PATCH 1/2] Fix title consistency from "Transformers Reinforcement Learning" to "Transformer Reinforcement Learning" --- CITATION.cff | 4 ++-- README.md | 2 +- examples/notebooks/grpo_ministral3_vl.ipynb | 2 +- examples/notebooks/grpo_qwen3_vl.ipynb | 2 +- examples/notebooks/grpo_rnj_1_instruct.ipynb | 2 +- examples/notebooks/grpo_trl_lora_qlora.ipynb | 2 +- examples/notebooks/openenv_sudoku_grpo.ipynb | 2 +- examples/notebooks/openenv_wordle_grpo.ipynb | 2 +- examples/notebooks/sft_ministral3_vl.ipynb | 2 +- examples/notebooks/sft_qwen_vl.ipynb | 2 +- examples/notebooks/sft_trl_lora_qlora.ipynb | 2 +- trl/skills/trl-training/SKILL.md | 4 ++-- trl/templates/lm_model_card.md | 2 +- trl/templates/rm_model_card.md | 2 +- 14 files changed, 16 insertions(+), 16 deletions(-) diff --git a/CITATION.cff b/CITATION.cff index 9169ae57e5e..eea3e5d82f2 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -1,5 +1,5 @@ cff-version: 1.2.0 -title: 'TRL: Transformers Reinforcement Learning' +title: 'TRL: Transformer Reinforcement Learning' message: >- If you use this software, please cite it using the metadata from this file. @@ -25,7 +25,7 @@ authors: family-names: Gallouédec repository-code: 'https://github.com/huggingface/trl' abstract: >- - TRL (Transformers Reinforcement Learning) is an + TRL (Transformer Reinforcement Learning) is an open-source toolkit for aligning transformer models via post-training. It provides practical, scalable implementations of SFT, reward modeling, DPO, and GRPO diff --git a/README.md b/README.md index 88c6db10fc9..b2df1b59701 100644 --- a/README.md +++ b/README.md @@ -189,7 +189,7 @@ Read more in the [Experimental docs](https://huggingface.co/docs/trl/experimenta ```bibtex @software{vonwerra2020trl, - title = {{TRL: Transformers Reinforcement Learning}}, + title = {{TRL: Transformer Reinforcement Learning}}, author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, license = {Apache-2.0}, url = {https://github.com/huggingface/trl}, diff --git a/examples/notebooks/grpo_ministral3_vl.ipynb b/examples/notebooks/grpo_ministral3_vl.ipynb index 1c5fc220b05..593c415cc85 100644 --- a/examples/notebooks/grpo_ministral3_vl.ipynb +++ b/examples/notebooks/grpo_ministral3_vl.ipynb @@ -13,7 +13,7 @@ "![trl banner](https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/trl_banner_dark.png)\n", "\n", "\n", - "With [**Transformers Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can fine-tune cutting edge vision language models. It comes with support for quantized parameter efficient fine-tuning technique **QLoRA**, so we can use free Colab (T4 GPU) to fine-tune models like [Ministral-3](https://huggingface.co/collections/mistralai/ministral-3).\n", + "With [**Transformer Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can fine-tune cutting edge vision language models. It comes with support for quantized parameter efficient fine-tuning technique **QLoRA**, so we can use free Colab (T4 GPU) to fine-tune models like [Ministral-3](https://huggingface.co/collections/mistralai/ministral-3).\n", "\n", "\n", "- [TRL GitHub Repository](https://github.com/huggingface/trl) — star us to support the project! \n", diff --git a/examples/notebooks/grpo_qwen3_vl.ipynb b/examples/notebooks/grpo_qwen3_vl.ipynb index f9c0f3d6066..df8c851f916 100644 --- a/examples/notebooks/grpo_qwen3_vl.ipynb +++ b/examples/notebooks/grpo_qwen3_vl.ipynb @@ -13,7 +13,7 @@ "![trl banner](https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/trl_banner_dark.png)\n", "\n", "\n", - "With [**Transformers Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can fine-tune cutting edge vision language models. It comes with support for quantized parameter efficient fine-tuning technique **QLoRA**, so we can use free Colab (T4 GPU) to fine-tune models like [Qwen3-VL](https://huggingface.co/collections/Qwen/qwen3-vl-68d2a7c1b8a8afce4ebd2dbe).\n", + "With [**Transformer Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can fine-tune cutting edge vision language models. It comes with support for quantized parameter efficient fine-tuning technique **QLoRA**, so we can use free Colab (T4 GPU) to fine-tune models like [Qwen3-VL](https://huggingface.co/collections/Qwen/qwen3-vl-68d2a7c1b8a8afce4ebd2dbe).\n", "\n", "\n", "- [TRL GitHub Repository](https://github.com/huggingface/trl) — star us to support the project! \n", diff --git a/examples/notebooks/grpo_rnj_1_instruct.ipynb b/examples/notebooks/grpo_rnj_1_instruct.ipynb index 4911aceed9f..6e4e702115f 100644 --- a/examples/notebooks/grpo_rnj_1_instruct.ipynb +++ b/examples/notebooks/grpo_rnj_1_instruct.ipynb @@ -13,7 +13,7 @@ "![trl banner](https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/trl_banner_dark.png)\n", "\n", "\n", - "With [**Transformers Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can fine-tune cutting edge large language models. It comes with support for quantized parameter efficient fine-tuning technique **QLoRA**, so we can use Colab to fine-tune models like [EssentialAI/rnj-1-instruct](https://huggingface.co/collections/EssentialAI/rnj-1).\n", + "With [**Transformer Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can fine-tune cutting edge large language models. It comes with support for quantized parameter efficient fine-tuning technique **QLoRA**, so we can use Colab to fine-tune models like [EssentialAI/rnj-1-instruct](https://huggingface.co/collections/EssentialAI/rnj-1).\n", "\n", "\n", "- [TRL GitHub Repository](https://github.com/huggingface/trl) — star us to support the project! \n", diff --git a/examples/notebooks/grpo_trl_lora_qlora.ipynb b/examples/notebooks/grpo_trl_lora_qlora.ipynb index 80375e2b4e3..3215eb01acd 100644 --- a/examples/notebooks/grpo_trl_lora_qlora.ipynb +++ b/examples/notebooks/grpo_trl_lora_qlora.ipynb @@ -19,7 +19,7 @@ "source": [ "![trl banner](https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/trl_banner_dark.png)\n", "\n", - "Easily fine-tune **Large Language Models (LLMs)** or **Vision-Language Models (VLMs)** with **LoRA** or **QLoRA** using the [**Transformers Reinforcement Learning (TRL)**](https://github.com/huggingface/trl) library by Hugging Face and Group Relative Policy Optimization (GRPO) — all within a **free Google Colab notebook** powered by a **T4 GPU**.\n", + "Easily fine-tune **Large Language Models (LLMs)** or **Vision-Language Models (VLMs)** with **LoRA** or **QLoRA** using the [**Transformer Reinforcement Learning (TRL)**](https://github.com/huggingface/trl) library by Hugging Face and Group Relative Policy Optimization (GRPO) — all within a **free Google Colab notebook** powered by a **T4 GPU**.\n", "\n", "Thanks to the **built-in memory and training optimizations in TRL**, including LoRA, quantization, gradient checkpointing, and optimized attention kernels, it is possible to **fine-tune a 7B model on a free T4** with a **~7× reduction in memory consumption** compared to naive FP16 training.\n", "\n", diff --git a/examples/notebooks/openenv_sudoku_grpo.ipynb b/examples/notebooks/openenv_sudoku_grpo.ipynb index 7c2120a8d23..615217eb0a9 100644 --- a/examples/notebooks/openenv_sudoku_grpo.ipynb +++ b/examples/notebooks/openenv_sudoku_grpo.ipynb @@ -12,7 +12,7 @@ "\n", "![trl banner](https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/trl_banner_dark.png)\n", "\n", - "With [**Transformers Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can train a model that learns to **play Sudoku**, through interaction and reinforcement.\n", + "With [**Transformer Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can train a model that learns to **play Sudoku**, through interaction and reinforcement.\n", "\n", "- [TRL GitHub Repository](https://github.com/huggingface/trl) — star us to support the project! \n", "- [Official TRL Examples](https://huggingface.co/docs/trl/example_overview) \n", diff --git a/examples/notebooks/openenv_wordle_grpo.ipynb b/examples/notebooks/openenv_wordle_grpo.ipynb index 66b5b9959c5..ea19ce660a3 100644 --- a/examples/notebooks/openenv_wordle_grpo.ipynb +++ b/examples/notebooks/openenv_wordle_grpo.ipynb @@ -14,7 +14,7 @@ "![trl banner](https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/trl_banner_dark.png)\n", "\n", "\n", - "With [**Transformers Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can train a model that learns to **play Wordle**, a word-guessing game, through interaction and reinforcement.\n", + "With [**Transformer Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can train a model that learns to **play Wordle**, a word-guessing game, through interaction and reinforcement.\n", "\n", "- [TRL GitHub Repository](https://github.com/huggingface/trl) — star us to support the project! \n", "- [Official TRL Examples](https://huggingface.co/docs/trl/example_overview) \n", diff --git a/examples/notebooks/sft_ministral3_vl.ipynb b/examples/notebooks/sft_ministral3_vl.ipynb index e802be71225..eb0d60bf458 100644 --- a/examples/notebooks/sft_ministral3_vl.ipynb +++ b/examples/notebooks/sft_ministral3_vl.ipynb @@ -19,7 +19,7 @@ "id": "4f0hzSo4kKEc" }, "source": [ - "With [**Transformers Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can fine-tune cutting edge vision language models. It comes with support for quantized parameter efficient fine-tuning technique **QLoRA**, so we can use free Colab (T4 GPU) to fine-tune models like [Ministral-3](https://huggingface.co/collections/mistralai/ministral-3).\n", + "With [**Transformer Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can fine-tune cutting edge vision language models. It comes with support for quantized parameter efficient fine-tuning technique **QLoRA**, so we can use free Colab (T4 GPU) to fine-tune models like [Ministral-3](https://huggingface.co/collections/mistralai/ministral-3).\n", "\n", "\n", "- [TRL GitHub Repository](https://github.com/huggingface/trl) — star us to support the project! \n", diff --git a/examples/notebooks/sft_qwen_vl.ipynb b/examples/notebooks/sft_qwen_vl.ipynb index 2cd50fbf5b6..3163a821247 100644 --- a/examples/notebooks/sft_qwen_vl.ipynb +++ b/examples/notebooks/sft_qwen_vl.ipynb @@ -19,7 +19,7 @@ "id": "4f0hzSo4kKEc" }, "source": [ - "With [**Transformers Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can fine-tune cutting edge vision language models. It comes with support for quantized parameter efficient fine-tuning technique **QLoRA**, so we can use free Colab (T4 GPU) to fine-tune models like [Qwen3-VL](https://huggingface.co/collections/Qwen/qwen3-vl-68d2a7c1b8a8afce4ebd2dbe).\n", + "With [**Transformer Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can fine-tune cutting edge vision language models. It comes with support for quantized parameter efficient fine-tuning technique **QLoRA**, so we can use free Colab (T4 GPU) to fine-tune models like [Qwen3-VL](https://huggingface.co/collections/Qwen/qwen3-vl-68d2a7c1b8a8afce4ebd2dbe).\n", "\n", "\n", "- [TRL GitHub Repository](https://github.com/huggingface/trl) — star us to support the project! \n", diff --git a/examples/notebooks/sft_trl_lora_qlora.ipynb b/examples/notebooks/sft_trl_lora_qlora.ipynb index f6aeb420f31..3104d820a0d 100644 --- a/examples/notebooks/sft_trl_lora_qlora.ipynb +++ b/examples/notebooks/sft_trl_lora_qlora.ipynb @@ -26,7 +26,7 @@ "id": "cQ6bxQaMLWAS" }, "source": [ - "Easily fine-tune Large Language Models (LLMs) or Vision-Language Models (VLMs) with **LoRA** or **QLoRA** using the [**Transformers Reinforcement Learning (TRL)**](https://github.com/huggingface/trl) library built by Hugging Face — all within a **free Google Colab notebook** (powered by a **T4 GPU**.). \n", + "Easily fine-tune Large Language Models (LLMs) or Vision-Language Models (VLMs) with **LoRA** or **QLoRA** using the [**Transformer Reinforcement Learning (TRL)**](https://github.com/huggingface/trl) library built by Hugging Face — all within a **free Google Colab notebook** (powered by a **T4 GPU**.). \n", "\n", "- [TRL GitHub Repository](https://github.com/huggingface/trl) — star us to support the project! \n", "- [Official TRL Examples](https://huggingface.co/docs/trl/example_overview) \n", diff --git a/trl/skills/trl-training/SKILL.md b/trl/skills/trl-training/SKILL.md index 1941385812d..65f9c0a1628 100644 --- a/trl/skills/trl-training/SKILL.md +++ b/trl/skills/trl-training/SKILL.md @@ -1,6 +1,6 @@ --- name: trl-training -description: Train and fine-tune transformer language models using TRL (Transformers Reinforcement Learning). Supports SFT, DPO, GRPO, KTO, RLOO and Reward Model training via CLI commands. +description: Train and fine-tune transformer language models using TRL (Transformer Reinforcement Learning). Supports SFT, DPO, GRPO, KTO, RLOO and Reward Model training via CLI commands. license: Apache-2.0 metadata: version: "1.0.0" @@ -28,7 +28,7 @@ metadata: # TRL Training Skill -You are an expert at using the TRL (Transformers Reinforcement Learning) library to train and fine-tune large language models. +You are an expert at using the TRL (Transformer Reinforcement Learning) library to train and fine-tune large language models. ## Overview diff --git a/trl/templates/lm_model_card.md b/trl/templates/lm_model_card.md index ae7e33bfb5d..8c4d2925b74 100644 --- a/trl/templates/lm_model_card.md +++ b/trl/templates/lm_model_card.md @@ -46,7 +46,7 @@ Cite TRL as: ```bibtex {% raw %}@software{vonwerra2020trl, - title = {{TRL: Transformers Reinforcement Learning}}, + title = {{TRL: Transformer Reinforcement Learning}}, author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, license = {Apache-2.0}, url = {https://github.com/huggingface/trl}, diff --git a/trl/templates/rm_model_card.md b/trl/templates/rm_model_card.md index 0e47e9724a6..19f7acd2a7e 100644 --- a/trl/templates/rm_model_card.md +++ b/trl/templates/rm_model_card.md @@ -46,7 +46,7 @@ Cite TRL as: ```bibtex {% raw %}@software{vonwerra2020trl, - title = {{TRL: Transformers Reinforcement Learning}}, + title = {{TRL: Transformer Reinforcement Learning}}, author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin}, license = {Apache-2.0}, url = {https://github.com/huggingface/trl}, From 95f1ee96fbb4f6e9037c78e9b8685d981bd47a21 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Quentin=20Gallou=C3=A9dec?= Date: Thu, 26 Feb 2026 02:55:33 +0000 Subject: [PATCH 2/2] last one --- examples/notebooks/grpo_agent.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/notebooks/grpo_agent.ipynb b/examples/notebooks/grpo_agent.ipynb index 9fec579c293..cb48ac2d10c 100644 --- a/examples/notebooks/grpo_agent.ipynb +++ b/examples/notebooks/grpo_agent.ipynb @@ -10,7 +10,7 @@ "![trl banner](https://huggingface.co/datasets/trl-lib/documentation-images/resolve/main/trl_banner_dark.png)\n", "\n", "\n", - "With [**Transformers Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can train a language model to act as an **agent**. One that learns to reason, interact with external tools, and improve through reinforcement.\n", + "With [**Transformer Reinforcement Learning (TRL)**](https://github.com/huggingface/trl), you can train a language model to act as an **agent**. One that learns to reason, interact with external tools, and improve through reinforcement.\n", "\n", "- [TRL GitHub Repository](https://github.com/huggingface/trl) — star us to support the project! \n", "- [Official TRL Examples](https://huggingface.co/docs/trl/example_overview) \n",