Coverage for transformer_lens/tools/model_registry/__init__.py: 100%
6 statements
« prev ^ index » next coverage.py v7.10.1, created at 2026-07-01 15:58 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2026-07-01 15:58 +0000
1"""Model Registry tools for TransformerLens.
3This package provides tools for discovering and documenting HuggingFace models
4that are compatible with TransformerLens.
6Main modules:
7 - api: Public API for programmatic access to model registry data
8 - schemas: Data classes for model entries, architecture gaps, etc.
9 - verification: Verification tracking for model compatibility
10 - exceptions: Custom exceptions for the model registry
12Example usage:
13 >>> from transformer_lens.tools.model_registry import api # doctest: +SKIP
14 >>> api.is_model_supported("openai-community/gpt2") # doctest: +SKIP
15 True
16 >>> models = api.get_architecture_models("GPT2LMHeadModel") # doctest: +SKIP
17"""
19from .exceptions import (
20 ArchitectureNotSupportedError,
21 DataNotLoadedError,
22 DataValidationError,
23 ModelNotFoundError,
24 ModelRegistryError,
25)
26from .schemas import (
27 ArchitectureAnalysis,
28 ArchitectureGap,
29 ArchitectureGapsReport,
30 ArchitectureStats,
31 ModelEntry,
32 ModelMetadata,
33 ScanInfo,
34 SupportedModelsReport,
35)
36from .verification import VerificationHistory, VerificationRecord
38# Canonical set of HuggingFace architecture class names supported by TransformerBridge.
39# These must match the exact strings found in HF model config.architectures[]
40# and correspond to adapters registered in architecture_adapter_factory.py.
41#
42# Internal-only architectures (NanoGPT, MinGPT, NeelSoluOld, GPT2LMHeadCustomModel,
43# TransformerLensNative) are excluded since they never appear on HuggingFace Hub.
44# Factory-internal alias casings (Gemma1, Neo, NeoX) are also excluded since they
45# route to canonical adapters but HF reports the canonical names (Gemma, GPTNeo,
46# GPTNeoX) in config.architectures instead.
47HF_SUPPORTED_ARCHITECTURES: set[str] = {
48 "ApertusForCausalLM",
49 "BaiChuanForCausalLM",
50 "BaichuanForCausalLM",
51 "BartForConditionalGeneration",
52 "BertForMaskedLM",
53 "BloomForCausalLM",
54 "CodeGenForCausalLM",
55 "CohereForCausalLM",
56 "DeepseekV2ForCausalLM",
57 "DeepseekV3ForCausalLM",
58 "FalconForCausalLM",
59 "GemmaForCausalLM",
60 "Gemma2ForCausalLM",
61 "Gemma3ForCausalLM",
62 "Gemma3ForConditionalGeneration",
63 "Gemma3nForConditionalGeneration",
64 "Gemma4ForConditionalGeneration",
65 "Gemma4UnifiedForConditionalGeneration",
66 "Glm4MoeForCausalLM",
67 "GlmMoeDsaForCausalLM",
68 "GraniteForCausalLM",
69 "GraniteMoeForCausalLM",
70 "GraniteMoeHybridForCausalLM",
71 "GPT2LMHeadModel",
72 "GPTBigCodeForCausalLM",
73 "GptOssForCausalLM",
74 "GPTJForCausalLM",
75 "GPTNeoForCausalLM",
76 "OpenELMForCausalLM",
77 "GPTNeoXForCausalLM",
78 "HubertForCTC",
79 "HubertModel",
80 "HunYuanDenseV1ForCausalLM",
81 "InternLM2ForCausalLM",
82 "LlamaForCausalLM",
83 "LlavaForConditionalGeneration",
84 "LlavaNextForConditionalGeneration",
85 "LlavaOnevisionForConditionalGeneration",
86 "Lfm2MoeForCausalLM",
87 "MambaForCausalLM",
88 "Mamba2ForCausalLM",
89 "NemotronHForCausalLM",
90 "MPTForCausalLM",
91 "MistralForCausalLM",
92 "MixtralForCausalLM",
93 "Olmo2ForCausalLM",
94 "Olmo3ForCausalLM",
95 "OlmoForCausalLM",
96 "OlmoeForCausalLM",
97 "OPTForCausalLM",
98 "PhiForCausalLM",
99 "Phi3ForCausalLM",
100 "PhiMoEForCausalLM",
101 "QwenForCausalLM",
102 "Qwen2ForCausalLM",
103 "Qwen3ForCausalLM",
104 "Qwen3MoeForCausalLM",
105 "Qwen3NextForCausalLM",
106 "Qwen3_5ForCausalLM",
107 "Qwen3_5ForConditionalGeneration",
108 "SmolLM3ForCausalLM",
109 "StableLmForCausalLM",
110 "T5ForConditionalGeneration",
111 "MT5ForConditionalGeneration",
112 "T5GemmaForConditionalGeneration",
113 "XGLMForCausalLM",
114}
116# Foundation-trained orgs per architecture. Source of truth for the scraper's
117# download-threshold bypass and the docs table's "Canonical only" toggle.
118CANONICAL_AUTHORS_BY_ARCH: dict[str, list[str]] = {
119 "ApertusForCausalLM": ["swiss-ai"],
120 "BaiChuanForCausalLM": ["baichuan-inc"],
121 "BaichuanForCausalLM": ["baichuan-inc"],
122 "BartForConditionalGeneration": ["facebook"],
123 "BertForMaskedLM": ["google-bert"],
124 "BloomForCausalLM": ["bigscience"],
125 "CodeGenForCausalLM": ["Salesforce"],
126 "CohereForCausalLM": ["CohereLabs"],
127 "DeepseekV2ForCausalLM": ["deepseek-ai"],
128 "DeepseekV3ForCausalLM": ["deepseek-ai"],
129 "FalconForCausalLM": ["tiiuae"],
130 "Gemma2ForCausalLM": ["google"],
131 "Gemma3ForCausalLM": ["google"],
132 "Gemma3ForConditionalGeneration": ["google"],
133 "Gemma3nForConditionalGeneration": ["google"],
134 "Gemma4ForConditionalGeneration": ["google"],
135 "Gemma4UnifiedForConditionalGeneration": ["google"],
136 "GemmaForCausalLM": ["google"],
137 "Glm4MoeForCausalLM": ["zai-org"],
138 "GlmMoeDsaForCausalLM": ["zai-org"],
139 "GPT2LMHeadModel": ["openai-community", "stanford-crfm", "Writer"],
140 "GPTBigCodeForCausalLM": ["bigcode"],
141 "GptOssForCausalLM": ["openai"],
142 "GPTJForCausalLM": ["EleutherAI", "togethercomputer"],
143 "GPTNeoForCausalLM": ["EleutherAI", "roneneldan"],
144 "GPTNeoXForCausalLM": ["EleutherAI", "cyberagent", "stabilityai", "togethercomputer"],
145 "GraniteForCausalLM": ["ibm-granite"],
146 "GraniteMoeForCausalLM": ["ibm-granite"],
147 "GraniteMoeHybridForCausalLM": ["ibm-granite"],
148 "HubertForCTC": ["facebook"],
149 "HubertModel": ["facebook"],
150 "HunYuanDenseV1ForCausalLM": ["tencent"],
151 "InternLM2ForCausalLM": ["internlm"],
152 "LlamaForCausalLM": ["meta-llama", "huggyllama", "codellama", "SimpleStories"],
153 "LlavaForConditionalGeneration": ["llava-hf"],
154 "LlavaNextForConditionalGeneration": ["llava-hf"],
155 "LlavaOnevisionForConditionalGeneration": ["llava-hf"],
156 "Lfm2MoeForCausalLM": ["LiquidAI"],
157 "Mamba2ForCausalLM": ["state-spaces"],
158 "MambaForCausalLM": ["state-spaces"],
159 "NemotronHForCausalLM": ["nvidia"],
160 "MistralForCausalLM": ["mistralai"],
161 "MixtralForCausalLM": ["mistralai"],
162 "MPTForCausalLM": ["mosaicml"],
163 "MT5ForConditionalGeneration": ["google", "bigscience", "csebuetnlp"],
164 "Olmo2ForCausalLM": ["allenai", "HPLT"],
165 "Olmo3ForCausalLM": ["allenai"],
166 "OlmoeForCausalLM": ["allenai"],
167 "OlmoForCausalLM": ["allenai"],
168 "OpenELMForCausalLM": ["apple"],
169 "OPTForCausalLM": ["facebook"],
170 "Phi3ForCausalLM": ["microsoft"],
171 "PhiMoEForCausalLM": ["microsoft"],
172 "PhiForCausalLM": ["microsoft"],
173 "Qwen2ForCausalLM": ["Qwen", "nvidia"],
174 "Qwen3ForCausalLM": ["Qwen", "nvidia"],
175 "Qwen3MoeForCausalLM": ["Qwen"],
176 "Qwen3NextForCausalLM": ["Qwen"],
177 "Qwen3_5ForCausalLM": ["Qwen"],
178 "Qwen3_5ForConditionalGeneration": ["Qwen"],
179 "QwenForCausalLM": ["Qwen"],
180 "SmolLM3ForCausalLM": ["HuggingFaceTB"],
181 "StableLmForCausalLM": ["stabilityai"],
182 "T5ForConditionalGeneration": ["google-t5", "google", "Salesforce", "MBZUAI"],
183 "T5GemmaForConditionalGeneration": ["google"],
184 "XGLMForCausalLM": ["facebook"],
185}
187__all__ = [
188 # Constants
189 "HF_SUPPORTED_ARCHITECTURES",
190 "CANONICAL_AUTHORS_BY_ARCH",
191 # Exceptions
192 "ModelRegistryError",
193 "ModelNotFoundError",
194 "ArchitectureNotSupportedError",
195 "DataNotLoadedError",
196 "DataValidationError",
197 # Schemas
198 "ModelEntry",
199 "ModelMetadata",
200 "ScanInfo",
201 "ArchitectureGap",
202 "ArchitectureStats",
203 "ArchitectureAnalysis",
204 "SupportedModelsReport",
205 "ArchitectureGapsReport",
206 # Verification
207 "VerificationRecord",
208 "VerificationHistory",
209]