Coverage for transformer_lens/tools/model_registry/__init__.py: 100%
6 statements
« prev ^ index » next coverage.py v7.10.1, created at 2026-06-09 00:32 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2026-06-09 00:32 +0000
1"""Model Registry tools for TransformerLens.
3This package provides tools for discovering and documenting HuggingFace models
4that are compatible with TransformerLens.
6Main modules:
7 - api: Public API for programmatic access to model registry data
8 - schemas: Data classes for model entries, architecture gaps, etc.
9 - verification: Verification tracking for model compatibility
10 - exceptions: Custom exceptions for the model registry
12Example usage:
13 >>> from transformer_lens.tools.model_registry import api # doctest: +SKIP
14 >>> api.is_model_supported("openai-community/gpt2") # doctest: +SKIP
15 True
16 >>> models = api.get_architecture_models("GPT2LMHeadModel") # doctest: +SKIP
17"""
19from .exceptions import (
20 ArchitectureNotSupportedError,
21 DataNotLoadedError,
22 DataValidationError,
23 ModelNotFoundError,
24 ModelRegistryError,
25)
26from .schemas import (
27 ArchitectureAnalysis,
28 ArchitectureGap,
29 ArchitectureGapsReport,
30 ArchitectureStats,
31 ModelEntry,
32 ModelMetadata,
33 ScanInfo,
34 SupportedModelsReport,
35)
36from .verification import VerificationHistory, VerificationRecord
38# Canonical set of HuggingFace architecture class names supported by TransformerBridge.
39# These must match the exact strings found in HF model config.architectures[]
40# and correspond to adapters registered in architecture_adapter_factory.py.
41#
42# Internal-only architectures (NanoGPT, MinGPT, NeelSoluOld, GPT2LMHeadCustomModel,
43# TransformerLensNative) are excluded since they never appear on HuggingFace Hub.
44# Factory-internal alias casings (Gemma1, Neo, NeoX) are also excluded since they
45# route to canonical adapters but HF reports the canonical names (Gemma, GPTNeo,
46# GPTNeoX) in config.architectures instead.
47HF_SUPPORTED_ARCHITECTURES: set[str] = {
48 "ApertusForCausalLM",
49 "BaiChuanForCausalLM",
50 "BaichuanForCausalLM",
51 "BertForMaskedLM",
52 "BloomForCausalLM",
53 "CodeGenForCausalLM",
54 "CohereForCausalLM",
55 "DeepseekV3ForCausalLM",
56 "FalconForCausalLM",
57 "GemmaForCausalLM",
58 "Gemma2ForCausalLM",
59 "Gemma3ForCausalLM",
60 "Gemma3ForConditionalGeneration",
61 "Gemma3nForConditionalGeneration",
62 "GraniteForCausalLM",
63 "GraniteMoeForCausalLM",
64 "GraniteMoeHybridForCausalLM",
65 "GPT2LMHeadModel",
66 "GPTBigCodeForCausalLM",
67 "GptOssForCausalLM",
68 "GPTJForCausalLM",
69 "GPTNeoForCausalLM",
70 "OpenELMForCausalLM",
71 "GPTNeoXForCausalLM",
72 "HubertForCTC",
73 "HubertModel",
74 "InternLM2ForCausalLM",
75 "LlamaForCausalLM",
76 "LlavaForConditionalGeneration",
77 "LlavaNextForConditionalGeneration",
78 "LlavaOnevisionForConditionalGeneration",
79 "MambaForCausalLM",
80 "Mamba2ForCausalLM",
81 "MPTForCausalLM",
82 "MistralForCausalLM",
83 "MixtralForCausalLM",
84 "Olmo2ForCausalLM",
85 "Olmo3ForCausalLM",
86 "OlmoForCausalLM",
87 "OlmoeForCausalLM",
88 "OPTForCausalLM",
89 "PhiForCausalLM",
90 "Phi3ForCausalLM",
91 "QwenForCausalLM",
92 "Qwen2ForCausalLM",
93 "Qwen3ForCausalLM",
94 "Qwen3MoeForCausalLM",
95 "Qwen3NextForCausalLM",
96 "Qwen3_5ForCausalLM",
97 "Qwen3_5ForConditionalGeneration",
98 "SmolLM3ForCausalLM",
99 "StableLmForCausalLM",
100 "T5ForConditionalGeneration",
101 "MT5ForConditionalGeneration",
102 "XGLMForCausalLM",
103}
105# Foundation-trained orgs per architecture. Source of truth for the scraper's
106# download-threshold bypass and the docs table's "Canonical only" toggle.
107CANONICAL_AUTHORS_BY_ARCH: dict[str, list[str]] = {
108 "ApertusForCausalLM": ["swiss-ai"],
109 "BaiChuanForCausalLM": ["baichuan-inc"],
110 "BaichuanForCausalLM": ["baichuan-inc"],
111 "BertForMaskedLM": ["google-bert"],
112 "BloomForCausalLM": ["bigscience"],
113 "CodeGenForCausalLM": ["Salesforce"],
114 "CohereForCausalLM": ["CohereLabs"],
115 "DeepseekV3ForCausalLM": ["deepseek-ai"],
116 "FalconForCausalLM": ["tiiuae"],
117 "Gemma2ForCausalLM": ["google"],
118 "Gemma3ForCausalLM": ["google"],
119 "Gemma3ForConditionalGeneration": ["google"],
120 "Gemma3nForConditionalGeneration": ["google"],
121 "GemmaForCausalLM": ["google"],
122 "GPT2LMHeadModel": ["openai-community", "stanford-crfm", "Writer"],
123 "GPTBigCodeForCausalLM": ["bigcode"],
124 "GptOssForCausalLM": ["openai"],
125 "GPTJForCausalLM": ["EleutherAI", "togethercomputer"],
126 "GPTNeoForCausalLM": ["EleutherAI", "roneneldan"],
127 "GPTNeoXForCausalLM": ["EleutherAI", "cyberagent", "stabilityai", "togethercomputer"],
128 "GraniteForCausalLM": ["ibm-granite"],
129 "GraniteMoeForCausalLM": ["ibm-granite"],
130 "GraniteMoeHybridForCausalLM": ["ibm-granite"],
131 "HubertForCTC": ["facebook"],
132 "HubertModel": ["facebook"],
133 "InternLM2ForCausalLM": ["internlm"],
134 "LlamaForCausalLM": ["meta-llama", "huggyllama", "codellama", "SimpleStories"],
135 "LlavaForConditionalGeneration": ["llava-hf"],
136 "LlavaNextForConditionalGeneration": ["llava-hf"],
137 "LlavaOnevisionForConditionalGeneration": ["llava-hf"],
138 "Mamba2ForCausalLM": ["state-spaces"],
139 "MambaForCausalLM": ["state-spaces"],
140 "MistralForCausalLM": ["mistralai"],
141 "MixtralForCausalLM": ["mistralai"],
142 "MPTForCausalLM": ["mosaicml"],
143 "MT5ForConditionalGeneration": ["google", "bigscience", "csebuetnlp"],
144 "Olmo2ForCausalLM": ["allenai", "HPLT"],
145 "Olmo3ForCausalLM": ["allenai"],
146 "OlmoeForCausalLM": ["allenai"],
147 "OlmoForCausalLM": ["allenai"],
148 "OpenELMForCausalLM": ["apple"],
149 "OPTForCausalLM": ["facebook"],
150 "Phi3ForCausalLM": ["microsoft"],
151 "PhiForCausalLM": ["microsoft"],
152 "Qwen2ForCausalLM": ["Qwen", "nvidia"],
153 "Qwen3ForCausalLM": ["Qwen", "nvidia"],
154 "Qwen3MoeForCausalLM": ["Qwen"],
155 "Qwen3NextForCausalLM": ["Qwen"],
156 "Qwen3_5ForCausalLM": ["Qwen"],
157 "Qwen3_5ForConditionalGeneration": ["Qwen"],
158 "QwenForCausalLM": ["Qwen"],
159 "SmolLM3ForCausalLM": ["HuggingFaceTB"],
160 "StableLmForCausalLM": ["stabilityai"],
161 "T5ForConditionalGeneration": ["google-t5", "google", "Salesforce", "MBZUAI"],
162 "XGLMForCausalLM": ["facebook"],
163}
165__all__ = [
166 # Constants
167 "HF_SUPPORTED_ARCHITECTURES",
168 "CANONICAL_AUTHORS_BY_ARCH",
169 # Exceptions
170 "ModelRegistryError",
171 "ModelNotFoundError",
172 "ArchitectureNotSupportedError",
173 "DataNotLoadedError",
174 "DataValidationError",
175 # Schemas
176 "ModelEntry",
177 "ModelMetadata",
178 "ScanInfo",
179 "ArchitectureGap",
180 "ArchitectureStats",
181 "ArchitectureAnalysis",
182 "SupportedModelsReport",
183 "ArchitectureGapsReport",
184 # Verification
185 "VerificationRecord",
186 "VerificationHistory",
187]