diff --git a/src/diffusers/quantizers/__init__.py b/src/diffusers/quantizers/__init__.py
index 3ca867c..baf7598 100644
--- a/src/diffusers/quantizers/__init__.py
+++ b/src/diffusers/quantizers/__init__.py
@@ -16,3 +16,4 @@
from .auto import DiffusersAutoQuantizer
from .base import DiffusersQuantizer
from .pipe_quant_config import PipelineQuantizationConfig
+from .quant_recipes import available_recipes, build_pipeline_quant_config, ideogram4_quant_recipe
diff --git a/src/diffusers/quantizers/pipe_quant_config.py b/src/diffusers/quantizers/pipe_quant_config.py
index 90be7d7..1abbb9e 100644
--- a/src/diffusers/quantizers/pipe_quant_config.py
+++ b/src/diffusers/quantizers/pipe_quant_config.py
@@ -63,6 +63,17 @@ class PipelineQuantizationConfig:
self.config_mapping = {} # book-keeping Example: `{module_name: quant_config}`
self.post_init()
+ @classmethod
+ def from_recipe(cls, recipe: str = "ideogram4-int8", **overrides) -> "PipelineQuantizationConfig":
+ """Build a config from a named, paper-validated quantization recipe.
+
+ See [`~quantizers.quant_recipes.available_recipes`] for the supported names and
+ [`~quantizers.quant_recipes.build_pipeline_quant_config`] for the accepted overrides.
+ """
+ from .quant_recipes import build_pipeline_quant_config
+
+ return build_pipeline_quant_config(recipe, **overrides)
+
def post_init(self):
quant_mapping = self.quant_mapping
self.is_granular = True if quant_mapping is not None else False
diff --git a/src/diffusers/quantizers/quant_recipes.py b/src/diffusers/quantizers/quant_recipes.py
new file mode 100644
index 0000000..7bb947c
--- /dev/null
+++ b/src/diffusers/quantizers/quant_recipes.py
@@ -0,0 +1,208 @@
+# Copyright 2026 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Validated, model-specific post-training quantization recipes.
+
+The recipes here translate published quantization studies into ready-to-use
+[`PipelineQuantizationConfig`] objects, so a user does not have to rediscover
+which layers are safe to quantize and which must be protected.
+
+The Ideogram 4.0 INT8 W8A8 recipe is adapted from "Holding the FP8 Quality
+Ceiling at 8-Bit Weights and Activations: INT8 and GGUF Post-Training
+Quantization of Ideogram 4.0 for Consumer GPUs" (arXiv:2606.12280). That work
+quantizes the 34-layer flow-matching backbone with per-channel INT8 weights and
+per-token dynamic INT8 activations, and shows that leaving the FFN
+down-projections (``feed_forward.w2``) in higher precision is the dominant lever
+for holding the FP8 quality ceiling on Ampere GPUs that lack FP8 tensor cores.
+The model ships the backbone as two separate weight copies for classifier-free
+guidance, exposed by the pipeline as ``transformer`` and
+``unconditional_transformer``, so the recipe protects the fragile layers in both.
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+
+from ..utils import logging
+from .pipe_quant_config import PipelineQuantizationConfig
+
+
+logger = logging.get_logger(__name__)
+
+
+# Ideogram 4.0 ships the backbone as two separate-weight copies for CFG; both are quantized.
+IDEOGRAM4_CFG_COMPONENTS = ("transformer", "unconditional_transformer")
+# Default depth of the single-stream backbone (`Ideogram4Transformer2DModel(num_layers=...)`).
+IDEOGRAM4_NUM_LAYERS = 34
+
+
+@dataclass(frozen=True)
+class QuantRecipe:
+ """A named, paper-validated quantization recipe for a specific model.
+
+ Args:
+ name (`str`): Identifier used to look the recipe up via [`PipelineQuantizationConfig.from_recipe`].
+ precision (`str`): Human-readable precision tag, e.g. ``"int8-w8a8"``.
+ quant_backend (`str`): Backend the recipe builds against (e.g. ``"torchao"``).
+ components (`tuple[str, ...]`): Pipeline components the recipe quantizes.
+ modules_to_not_convert (`tuple[str, ...]`): Per-component module name suffixes kept in higher
+ precision (the "fragility-protected" set).
+ description (`str`): Short note on what the recipe does and where it comes from.
+ """
+
+ name: str
+ precision: str
+ quant_backend: str
+ components: tuple[str, ...]
+ modules_to_not_convert: tuple[str, ...]
+ description: str = field(default="")
+
+
+def ideogram4_protected_layers(
+ num_layers: int = IDEOGRAM4_NUM_LAYERS,
+ protect_ffn_down: bool = True,
+ protect_final_layer: bool = True,
+ extra: list[str] | None = None,
+) -> list[str]:
+ """Return the module names that should stay in higher precision for Ideogram 4.0.
+
+ The list mirrors the high-fragility set isolated by arXiv:2606.12280: the FFN down-projections
+ (``layers.{i}.feed_forward.w2``) are the dominant quality lever, with the output projection
+ (``final_layer.linear``) kept in higher precision as well. Names are component-relative, so they
+ apply identically to ``transformer`` and ``unconditional_transformer``.
+
+ Args:
+ num_layers (`int`): Number of transformer blocks in the backbone.
+ protect_ffn_down (`bool`): Protect every block's FFN down-projection (the paper's main lever).
+ protect_final_layer (`bool`): Protect the final output projection.
+ extra (`list[str]`, *optional*): Additional module names to protect.
+
+ Returns:
+ `list[str]`: Sorted, de-duplicated module names to leave unquantized.
+ """
+ if num_layers <= 0:
+ raise ValueError(f"`num_layers` must be a positive integer, got {num_layers}.")
+
+ protected: set[str] = set()
+ if protect_ffn_down:
+ protected.update(f"layers.{i}.feed_forward.w2" for i in range(num_layers))
+ if protect_final_layer:
+ protected.add("final_layer.linear")
+ if extra:
+ protected.update(extra)
+
+ return sorted(protected)
+
+
+def ideogram4_quant_recipe(
+ precision: str = "int8",
+ num_layers: int = IDEOGRAM4_NUM_LAYERS,
+ components: tuple[str, ...] = IDEOGRAM4_CFG_COMPONENTS,
+ extra_protected_layers: list[str] | None = None,
+) -> QuantRecipe:
+ """Build the [`QuantRecipe`] describing the Ideogram 4.0 INT8 W8A8 PTQ configuration.
+
+ This is the pure-Python description of the recipe (which components, which backend, which layers to
+ protect); use [`build_pipeline_quant_config`] to materialize it into a runnable config.
+
+ Args:
+ precision (`str`): Currently only ``"int8"`` (W8A8) is supported for on-the-fly quantization.
+ num_layers (`int`): Number of transformer blocks; controls how many FFN down-projections are protected.
+ components (`tuple[str, ...]`): Pipeline components to quantize.
+ extra_protected_layers (`list[str]`, *optional*): Extra module names to keep in higher precision.
+ """
+ if precision != "int8":
+ raise ValueError(
+ f"Unsupported `precision={precision!r}` for on-the-fly Ideogram 4.0 quantization. "
+ "Only 'int8' (W8A8) is supported; GGUF Q4_K / Q8_0 require pre-quantized checkpoints."
+ )
+
+ protected = ideogram4_protected_layers(num_layers=num_layers, extra=extra_protected_layers)
+ return QuantRecipe(
+ name="ideogram4-int8",
+ precision="int8-w8a8",
+ quant_backend="torchao",
+ components=tuple(components),
+ modules_to_not_convert=tuple(protected),
+ description=(
+ "INT8 W8A8 (per-channel weights, per-token dynamic activations) for Ideogram 4.0, protecting "
+ "the FFN down-projections to hold the FP8 quality ceiling (arXiv:2606.12280)."
+ ),
+ )
+
+
+# Registry of recipe builders, keyed by the name passed to `PipelineQuantizationConfig.from_recipe`.
+_RECIPE_BUILDERS = {
+ "ideogram4-int8": ideogram4_quant_recipe,
+}
+
+
+def available_recipes() -> list[str]:
+ """Return the names of the quantization recipes that can be built."""
+ return sorted(_RECIPE_BUILDERS)
+
+
+def _build_torchao_quant_config(modules_to_not_convert: list[str]):
+ """Construct a torchao INT8 W8A8 config that protects the given modules."""
+ from .quantization_config import TorchAoConfig
+
+ try:
+ from torchao.quantization import Int8DynamicActivationInt8WeightConfig
+ except ImportError as exc:
+ raise ImportError(
+ "Building the Ideogram 4.0 INT8 W8A8 recipe requires `torchao` (>= 0.15.0). "
+ "Install it with `pip install -U torchao`."
+ ) from exc
+
+ # Per-channel INT8 weights + per-token dynamic INT8 activations == the paper's W8A8 recipe.
+ return TorchAoConfig(
+ quant_type=Int8DynamicActivationInt8WeightConfig(),
+ modules_to_not_convert=list(modules_to_not_convert),
+ )
+
+
+def build_pipeline_quant_config(recipe: str | QuantRecipe = "ideogram4-int8", **overrides) -> PipelineQuantizationConfig:
+ """Materialize a named recipe into a [`PipelineQuantizationConfig`].
+
+ Args:
+ recipe (`str` or [`QuantRecipe`]): Either a recipe name from [`available_recipes`] or a
+ pre-built [`QuantRecipe`].
+ **overrides: Forwarded to the recipe builder (e.g. ``num_layers``, ``components``,
+ ``extra_protected_layers``) when ``recipe`` is a name.
+
+ Returns:
+ [`PipelineQuantizationConfig`]: A granular config whose ``quant_mapping`` quantizes every targeted
+ component while leaving the fragility-protected modules in higher precision.
+ """
+ if isinstance(recipe, str):
+ if recipe not in _RECIPE_BUILDERS:
+ raise ValueError(
+ f"Unknown quantization recipe {recipe!r}. Available recipes: {available_recipes()}."
+ )
+ recipe = _RECIPE_BUILDERS[recipe](**overrides)
+ elif overrides:
+ raise ValueError("`overrides` are only supported when `recipe` is passed as a name.")
+
+ if recipe.quant_backend != "torchao":
+ raise ValueError(f"Unsupported quant_backend={recipe.quant_backend!r} for recipe {recipe.name!r}.")
+
+ modules_to_not_convert = list(recipe.modules_to_not_convert)
+ # Each CFG copy is a full backbone, so the same protected-module set applies to every component.
+ quant_mapping = {component: _build_torchao_quant_config(modules_to_not_convert) for component in recipe.components}
+ logger.info(
+ "Built '%s' recipe: quantizing %s, protecting %d module(s) per component.",
+ recipe.name,
+ ", ".join(recipe.components),
+ len(modules_to_not_convert),
+ )
+ return PipelineQuantizationConfig(quant_mapping=quant_mapping)
diff --git a/tests/quantization/test_quant_recipes.py b/tests/quantization/test_quant_recipes.py
new file mode 100644
index 0000000..548837c
--- /dev/null
+++ b/tests/quantization/test_quant_recipes.py
@@ -0,0 +1,98 @@
+# coding=utf-8
+# Copyright 2026 The HuggingFace Team Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+
+# Import through the public, NON-NEW call-site module to exercise the `from_recipe` wiring.
+from diffusers.quantizers import PipelineQuantizationConfig
+from diffusers.quantizers.quant_recipes import (
+ IDEOGRAM4_CFG_COMPONENTS,
+ IDEOGRAM4_NUM_LAYERS,
+ available_recipes,
+ ideogram4_protected_layers,
+ ideogram4_quant_recipe,
+)
+
+from ..testing_utils import require_torchao_version_greater_or_equal
+
+
+class Ideogram4ProtectedLayersTest(unittest.TestCase):
+ def test_protects_every_ffn_down_projection(self):
+ protected = ideogram4_protected_layers(num_layers=IDEOGRAM4_NUM_LAYERS)
+ for i in range(IDEOGRAM4_NUM_LAYERS):
+ self.assertIn(f"layers.{i}.feed_forward.w2", protected)
+ # The output projection is part of the small high-fragility set as well.
+ self.assertIn("final_layer.linear", protected)
+
+ def test_toggles_and_extra_layers(self):
+ only_extra = ideogram4_protected_layers(
+ num_layers=4, protect_ffn_down=False, protect_final_layer=False, extra=["llm_cond_proj"]
+ )
+ self.assertEqual(only_extra, ["llm_cond_proj"])
+
+ # No double counting and deterministic ordering.
+ protected = ideogram4_protected_layers(num_layers=2)
+ self.assertEqual(len(protected), len(set(protected)))
+ self.assertEqual(protected, sorted(protected))
+
+ def test_rejects_non_positive_layers(self):
+ with self.assertRaises(ValueError):
+ ideogram4_protected_layers(num_layers=0)
+
+
+class Ideogram4RecipeSpecTest(unittest.TestCase):
+ def test_recipe_targets_both_cfg_copies(self):
+ recipe = ideogram4_quant_recipe()
+ # Ideogram 4.0 ships two separate-weight copies for classifier-free guidance.
+ self.assertEqual(recipe.components, IDEOGRAM4_CFG_COMPONENTS)
+ self.assertEqual(recipe.quant_backend, "torchao")
+ self.assertEqual(recipe.precision, "int8-w8a8")
+
+ def test_recipe_protects_ffn_down_projections(self):
+ recipe = ideogram4_quant_recipe(num_layers=IDEOGRAM4_NUM_LAYERS)
+ protected = set(recipe.modules_to_not_convert)
+ expected = set(ideogram4_protected_layers(num_layers=IDEOGRAM4_NUM_LAYERS))
+ self.assertEqual(protected, expected)
+
+ def test_unsupported_precision_raises(self):
+ with self.assertRaises(ValueError):
+ ideogram4_quant_recipe(precision="gguf-q4_k")
+
+
+class PipelineQuantizationConfigFromRecipeTest(unittest.TestCase):
+ """Exercises the `PipelineQuantizationConfig.from_recipe` call-site wiring."""
+
+ def test_from_recipe_is_wired_to_registry(self):
+ self.assertIn("ideogram4-int8", available_recipes())
+
+ def test_unknown_recipe_raises_through_call_site(self):
+ # Reaching this error proves `from_recipe` delegates into `quant_recipes` (no backend needed).
+ with self.assertRaises(ValueError) as ctx:
+ PipelineQuantizationConfig.from_recipe("does-not-exist")
+ self.assertIn("Unknown quantization recipe", str(ctx.exception))
+
+ @require_torchao_version_greater_or_equal("0.15.0")
+ def test_from_recipe_builds_granular_torchao_config(self):
+ config = PipelineQuantizationConfig.from_recipe("ideogram4-int8")
+ self.assertIsInstance(config, PipelineQuantizationConfig)
+ self.assertTrue(config.is_granular)
+ self.assertEqual(set(config.quant_mapping), set(IDEOGRAM4_CFG_COMPONENTS))
+
+ expected_protected = set(ideogram4_protected_layers())
+ for component, component_config in config.quant_mapping.items():
+ self.assertEqual(set(component_config.modules_to_not_convert), expected_protected, component)
+
+
+if __name__ == "__main__":
+ unittest.main()
Recommended paper: Holding the FP8 Quality Ceiling at 8-Bit Weights and Activations: INT8 and GGUF Post-Training Quantization of Ideogram 4.0 for Consumer GPUs
Confidence: high (Remyx relevance 0.98)
Research interest: diffusers
License & code availability
🟡 No code repository surfaced — couldn't fetch a LICENSE to evaluate. Worth confirming the paper has an open release before investing in adoption.
(none detected)(class:no-code-link, compat: 0.30)Why this candidate (selected from the lookback pool)
Candidate [4]'s contribution — a validated INT8 W8A8 + GGUF Q4_K PTQ recipe specific to Ideogram 4.0's dual-transformer (CFG) layout — maps directly onto an existing call site: the Ideogram4Pipeline already loads
transformerandunconditional_transformer(the exact 'two separate-weight copies' the abstract names) throughfrom_pretrained(quantization_config=...), dispatching to the torchao and gguf backends the recipe targets. It anchors on the repo's most active investment thread (git log shows TorchAO safetensors huggingface#13719, AutoRound huggingface#13552, quant-config logging huggingface#13906/huggingface#13850), making it a clean model-specific quant-recipe addition rather than net-new infrastructure. Preferred over [2] because [2]'s fused INT8 GEMM kernel has no call site in diffusers — the GEMM lives in torchao, not this repo.Why this paper is interesting for the team
Continuing the iteration thread from 'Add Ideogram 4 Text-to-Image Model' and 'Add Structured Prompt Upsampling to Ideogram 4', the
diffusersteam would logically focus on broadening deployment and efficiency for this powerful model. This paper is a direct and crucial next step, providing a validated INT8 W8A8 post-training quantization recipe specifically for Ideogram 4.0 that preserves FP8 quality, alongside effective GGUF Q4_K quantization. This aligns with the team's 'Deepening Focus on Performance and Quantization,' enabling more efficient inference on consumer GPUs and informing the next set of optimizations for Ideogram 4 within the library.Suggested experiment
Apply the INT8 W8A8 recipe (per-channel weights, dynamic activations, SmoothQuant) and GGUF Q4_K quantization to the Ideogram 4.0 model in
diffusers. Evaluate generation quality on a 200-prompt benchmark using PickScore and CLIPScore, comparing it to FP8 and NF4 baselines to confirm quality preservation and memory footprint improvements.Proposed implementation
The coding agent wrote a working draft before the downgrade gate fired. Apply locally with
git applyafter saving the block below.Diff (349 lines)
Why the orchestrator opened an Issue instead of a PR
Diff Risk Score 0.99 exceeds the auto-land threshold (0.80)
A calibrated static-diff risk score placed this change in the high band, where RADAR mandates human review over auto-landing. The implementation is attached for a maintainer to land manually.
Diff Risk Score: 0.99 / high band (auto-land threshold 0.80)
Diff features scored
What else Outrider considered this run
4 other candidate(s) considered and rejected
2606.13501v1— GF-DiT: Scheduling Parallelism for Diffusion Transformer Serving2606.14317v1— CausalMotion: Structured Physical Reasoning as Keyframe and Trajectory Guidance for Training-Free Video Generation2606.14598v1— Realizing Native INT8 Compute for Diffusion Transformers on Consumer GPUs: A Fused INT8 GEMM Kernel for Ideogram 4.02606.13679v1— InterleaveThinker: Reinforcing Agentic Interleaved GenerationOpened by the Remyx Recommendation orchestrator. The diff's calibrated risk crossed the auto-land threshold — routed to Issue for human review per RADAR's risk-aware policy.
Reopen this Issue if you want Outrider to revisit this paper later. While it stays closed, the orchestrator will not re-recommend the same paper.