Coverage for transformer_lens/supported_models.py: 100%
5 statements
« prev ^ index » next coverage.py v7.10.1, created at 2026-07-01 15:58 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2026-07-01 15:58 +0000
1# HookedTransformers model registry. If you are looking for TransformerBridge's supported models
2# see transformer_lens/tools/model_registry/data/supported_models.json or the docs page at
3# https://transformerlensorg.github.io/TransformerLens/generated/transformer_bridge_models.html
4OFFICIAL_MODEL_NAMES: list[str] = [
5 "01-ai/Yi-34B",
6 "01-ai/Yi-34B-Chat",
7 "01-ai/Yi-6B",
8 "01-ai/Yi-6B-Chat",
9 "ai-forever/mGPT",
10 "allenai/OLMo-1B-hf",
11 "allenai/OLMo-2-0425-1B",
12 "allenai/OLMo-2-1124-7B",
13 "allenai/Olmo-3-32B-Think",
14 "allenai/Olmo-3-7B-Instruct",
15 "allenai/Olmo-3-7B-Think",
16 "allenai/Olmo-3.1-32B-Instruct",
17 "allenai/Olmo-3.1-32B-Think",
18 "allenai/OLMo-7B-hf",
19 "allenai/OLMoE-1B-7B-0924",
20 "ArthurConmy/redwood_attn_2l",
21 "Baidicoot/Othello-GPT-Transformer-Lens",
22 "bigcode/santacoder",
23 "bigscience/bloom-1b1",
24 "bigscience/bloom-1b7",
25 "bigscience/bloom-3b",
26 "bigscience/bloom-560m",
27 "bigscience/bloom-7b1",
28 "codellama/CodeLlama-7b-hf",
29 "codellama/CodeLlama-7b-Instruct-hf",
30 "codellama/CodeLlama-7b-Python-hf",
31 "distilgpt2",
32 "EleutherAI/gpt-j-6B",
33 "EleutherAI/gpt-neo-1.3B",
34 "EleutherAI/gpt-neo-125M",
35 "EleutherAI/gpt-neo-2.7B",
36 "EleutherAI/gpt-neox-20b",
37 "EleutherAI/pythia-1.4b",
38 "EleutherAI/pythia-1.4b-deduped",
39 "EleutherAI/pythia-1.4b-deduped-v0",
40 "EleutherAI/pythia-1.4b-v0",
41 "EleutherAI/pythia-12b",
42 "EleutherAI/pythia-12b-deduped",
43 "EleutherAI/pythia-12b-deduped-v0",
44 "EleutherAI/pythia-12b-v0",
45 "EleutherAI/pythia-14m",
46 "EleutherAI/pythia-160m",
47 "EleutherAI/pythia-160m-deduped",
48 "EleutherAI/pythia-160m-deduped-v0",
49 "EleutherAI/pythia-160m-seed1",
50 "EleutherAI/pythia-160m-seed2",
51 "EleutherAI/pythia-160m-seed3",
52 "EleutherAI/pythia-160m-v0",
53 "EleutherAI/pythia-1b",
54 "EleutherAI/pythia-1b-deduped",
55 "EleutherAI/pythia-1b-deduped-v0",
56 "EleutherAI/pythia-1b-v0",
57 "EleutherAI/pythia-2.8b",
58 "EleutherAI/pythia-2.8b-deduped",
59 "EleutherAI/pythia-2.8b-deduped-v0",
60 "EleutherAI/pythia-2.8b-v0",
61 "EleutherAI/pythia-31m",
62 "EleutherAI/pythia-410m",
63 "EleutherAI/pythia-410m-deduped",
64 "EleutherAI/pythia-410m-deduped-v0",
65 "EleutherAI/pythia-410m-v0",
66 "EleutherAI/pythia-6.9b",
67 "EleutherAI/pythia-6.9b-deduped",
68 "EleutherAI/pythia-6.9b-deduped-v0",
69 "EleutherAI/pythia-6.9b-v0",
70 "EleutherAI/pythia-70m",
71 "EleutherAI/pythia-70m-deduped",
72 "EleutherAI/pythia-70m-deduped-v0",
73 "EleutherAI/pythia-70m-v0",
74 "facebook/hubert-base-ls960",
75 "facebook/opt-1.3b",
76 "facebook/opt-125m",
77 "facebook/opt-13b",
78 "facebook/opt-2.7b",
79 "facebook/opt-30b",
80 "facebook/opt-6.7b",
81 "facebook/opt-66b",
82 "facebook/wav2vec2-base",
83 "facebook/wav2vec2-large",
84 "google-bert/bert-base-cased",
85 "google-bert/bert-base-uncased",
86 "google-bert/bert-large-cased",
87 "google-bert/bert-large-uncased",
88 "google-t5/t5-base",
89 "google-t5/t5-large",
90 "google-t5/t5-small",
91 "google/gemma-2-27b",
92 "google/gemma-2-27b-it",
93 "google/gemma-2-2b",
94 "google/gemma-2-2b-it",
95 "google/gemma-2-9b",
96 "google/gemma-2-9b-it",
97 "google/gemma-2b",
98 "google/gemma-2b-it",
99 "google/gemma-3-12b-it",
100 "google/gemma-3-12b-pt",
101 "google/gemma-3-1b-it",
102 "google/gemma-3-1b-pt",
103 "google/gemma-3-270m",
104 "google/gemma-3-270m-it",
105 "google/gemma-3-27b-it",
106 "google/gemma-3-27b-pt",
107 "google/gemma-3-4b-it",
108 "google/gemma-3-4b-pt",
109 "google/gemma-7b",
110 "google/gemma-7b-it",
111 "google/medgemma-27b-it",
112 "google/medgemma-27b-text-it",
113 "google/medgemma-4b-it",
114 "google/medgemma-4b-pt",
115 "gpt2",
116 "gpt2-large",
117 "gpt2-medium",
118 "gpt2-xl",
119 "llama-13b-hf",
120 "llama-30b-hf",
121 "llama-65b-hf",
122 "llama-7b-hf",
123 "meta-llama/Llama-2-13b-chat-hf",
124 "meta-llama/Llama-2-13b-hf",
125 "meta-llama/Llama-2-70b-chat-hf",
126 "meta-llama/Llama-2-7b-chat-hf",
127 "meta-llama/Llama-2-7b-hf",
128 "meta-llama/Llama-3.1-70B",
129 "meta-llama/Llama-3.1-70B-Instruct",
130 "meta-llama/Llama-3.1-8B",
131 "meta-llama/Llama-3.1-8B-Instruct",
132 "meta-llama/Llama-3.2-1B",
133 "meta-llama/Llama-3.2-1B-Instruct",
134 "meta-llama/Llama-3.2-3B",
135 "meta-llama/Llama-3.2-3B-Instruct",
136 "meta-llama/Llama-3.3-70B-Instruct",
137 "meta-llama/Meta-Llama-3-70B",
138 "meta-llama/Meta-Llama-3-70B-Instruct",
139 "meta-llama/Meta-Llama-3-8B",
140 "meta-llama/Meta-Llama-3-8B-Instruct",
141 "microsoft/phi-1",
142 "microsoft/phi-1_5",
143 "microsoft/phi-2",
144 "microsoft/Phi-3-mini-4k-instruct",
145 "microsoft/phi-4",
146 "mistralai/Mistral-7B-Instruct-v0.1",
147 "mistralai/Mistral-7B-v0.1",
148 "mistralai/Mistral-Nemo-Base-2407",
149 "mistralai/Mistral-Small-24B-Base-2501",
150 "mistralai/Mixtral-8x7B-Instruct-v0.1",
151 "mistralai/Mixtral-8x7B-v0.1",
152 "NeelNanda/Attn-Only-2L512W-Shortformer-6B-big-lr",
153 "NeelNanda/Attn_Only_1L512W_C4_Code",
154 "NeelNanda/Attn_Only_2L512W_C4_Code",
155 "NeelNanda/Attn_Only_3L512W_C4_Code",
156 "NeelNanda/Attn_Only_4L512W_C4_Code",
157 "NeelNanda/GELU_1L512W_C4_Code",
158 "NeelNanda/GELU_2L512W_C4_Code",
159 "NeelNanda/GELU_3L512W_C4_Code",
160 "NeelNanda/GELU_4L512W_C4_Code",
161 "NeelNanda/SoLU_10L1280W_C4_Code",
162 "NeelNanda/SoLU_10L_v22_old",
163 "NeelNanda/SoLU_12L1536W_C4_Code",
164 "NeelNanda/SoLU_12L_v23_old",
165 "NeelNanda/SoLU_1L512W_C4_Code",
166 "NeelNanda/SoLU_1L512W_Wiki_Finetune",
167 "NeelNanda/SoLU_1L_v9_old",
168 "NeelNanda/SoLU_2L512W_C4_Code",
169 "NeelNanda/SoLU_2L_v10_old",
170 "NeelNanda/SoLU_3L512W_C4_Code",
171 "NeelNanda/SoLU_4L512W_C4_Code",
172 "NeelNanda/SoLU_4L512W_Wiki_Finetune",
173 "NeelNanda/SoLU_4L_v11_old",
174 "NeelNanda/SoLU_6L768W_C4_Code",
175 "NeelNanda/SoLU_6L_v13_old",
176 "NeelNanda/SoLU_8L1024W_C4_Code",
177 "NeelNanda/SoLU_8L_v21_old",
178 "openai/gpt-oss-20b",
179 "Qwen/Qwen-14B",
180 "Qwen/Qwen-14B-Chat",
181 "Qwen/Qwen-1_8B",
182 "Qwen/Qwen-1_8B-Chat",
183 "Qwen/Qwen-7B",
184 "Qwen/Qwen-7B-Chat",
185 "Qwen/Qwen1.5-0.5B",
186 "Qwen/Qwen1.5-0.5B-Chat",
187 "Qwen/Qwen1.5-1.8B",
188 "Qwen/Qwen1.5-1.8B-Chat",
189 "Qwen/Qwen1.5-14B",
190 "Qwen/Qwen1.5-14B-Chat",
191 "Qwen/Qwen1.5-4B",
192 "Qwen/Qwen1.5-4B-Chat",
193 "Qwen/Qwen1.5-7B",
194 "Qwen/Qwen1.5-7B-Chat",
195 "Qwen/Qwen2-0.5B",
196 "Qwen/Qwen2-0.5B-Instruct",
197 "Qwen/Qwen2-1.5B",
198 "Qwen/Qwen2-1.5B-Instruct",
199 "Qwen/Qwen2-7B",
200 "Qwen/Qwen2-7B-Instruct",
201 "Qwen/Qwen2.5-0.5B",
202 "Qwen/Qwen2.5-0.5B-Instruct",
203 "Qwen/Qwen2.5-1.5B",
204 "Qwen/Qwen2.5-1.5B-Instruct",
205 "Qwen/Qwen2.5-14B",
206 "Qwen/Qwen2.5-14B-Instruct",
207 "Qwen/Qwen2.5-32B",
208 "Qwen/Qwen2.5-32B-Instruct",
209 "Qwen/Qwen2.5-3B",
210 "Qwen/Qwen2.5-3B-Instruct",
211 "Qwen/Qwen2.5-72B",
212 "Qwen/Qwen2.5-72B-Instruct",
213 "Qwen/Qwen2.5-7B",
214 "Qwen/Qwen2.5-7B-Instruct",
215 "Qwen/Qwen3-0.6B",
216 "Qwen/Qwen3-0.6B-Base",
217 "Qwen/Qwen3-1.7B",
218 "Qwen/Qwen3-14B",
219 "Qwen/Qwen3-4B",
220 "Qwen/Qwen3-8B",
221 "Qwen/QwQ-32B-Preview",
222 "roneneldan/TinyStories-1Layer-21M",
223 "roneneldan/TinyStories-1M",
224 "roneneldan/TinyStories-28M",
225 "roneneldan/TinyStories-2Layers-33M",
226 "roneneldan/TinyStories-33M",
227 "roneneldan/TinyStories-3M",
228 "roneneldan/TinyStories-8M",
229 "roneneldan/TinyStories-Instruct-1M",
230 "roneneldan/TinyStories-Instruct-28M",
231 "roneneldan/TinyStories-Instruct-2Layers-33M",
232 "roneneldan/TinyStories-Instruct-33M",
233 "roneneldan/TinyStories-Instruct-3M",
234 "roneneldan/TinyStories-Instruct-8M",
235 "roneneldan/TinyStories-Instuct-1Layer-21M",
236 "stabilityai/stablelm-base-alpha-3b",
237 "stabilityai/stablelm-base-alpha-7b",
238 "stabilityai/stablelm-tuned-alpha-3b",
239 "stabilityai/stablelm-tuned-alpha-7b",
240 "stanford-crfm/alias-gpt2-small-x21",
241 "stanford-crfm/arwen-gpt2-medium-x21",
242 "stanford-crfm/battlestar-gpt2-small-x49",
243 "stanford-crfm/beren-gpt2-medium-x49",
244 "stanford-crfm/caprica-gpt2-small-x81",
245 "stanford-crfm/celebrimbor-gpt2-medium-x81",
246 "stanford-crfm/darkmatter-gpt2-small-x343",
247 "stanford-crfm/durin-gpt2-medium-x343",
248 "stanford-crfm/eowyn-gpt2-medium-x777",
249 "stanford-crfm/expanse-gpt2-small-x777",
250 "swiss-ai/Apertus-8B-2509",
251 "swiss-ai/Apertus-8B-Instruct-2509",
252]
253"""Official model names for models on HuggingFace."""
255# Model Aliases:
256MODEL_ALIASES: dict[str, list[str]] = {
257 "01-ai/Yi-34B": ["yi-34b", "Yi-34B"],
258 "01-ai/Yi-34B-Chat": ["yi-34b-chat", "Yi-34B-Chat"],
259 "01-ai/Yi-6B": ["yi-6b", "Yi-6B"],
260 "01-ai/Yi-6B-Chat": ["yi-6b-chat", "Yi-6B-Chat"],
261 "ai-forever/mGPT": ["mGPT"],
262 "allenai/OLMo-1B-hf": ["olmo-1b"],
263 "allenai/OLMo-2-0425-1B": ["olmo-2-1b"],
264 "allenai/OLMo-2-1124-7B": ["olmo-2-7b"],
265 "allenai/Olmo-3-32B-Think": ["olmo-3-32b-think"],
266 "allenai/Olmo-3-7B-Instruct": ["olmo-3-7b-instruct"],
267 "allenai/Olmo-3-7B-Think": ["olmo-3-7b-think"],
268 "allenai/Olmo-3.1-32B-Instruct": ["olmo-3.1-32b-instruct"],
269 "allenai/Olmo-3.1-32B-Think": ["olmo-3.1-32b-think"],
270 "allenai/OLMo-7B-hf": ["olmo-7b"],
271 "allenai/OLMoE-1B-7B-0924": ["olmoe"],
272 "ArthurConmy/redwood_attn_2l": ["redwood_attn_2l"],
273 "Baidicoot/Othello-GPT-Transformer-Lens": ["othello-gpt"],
274 "bigcode/santacoder": ["santacoder"],
275 "bigscience/bloom-1b1": ["bloom-1b1"],
276 "bigscience/bloom-1b7": ["bloom-1b7"],
277 "bigscience/bloom-3b": ["bloom-3b"],
278 "bigscience/bloom-560m": ["bloom-560m"],
279 "bigscience/bloom-7b1": ["bloom-7b1"],
280 "codellama/CodeLlama-7b-hf": ["CodeLlamallama-2-7b"],
281 "codellama/CodeLlama-7b-Instruct-hf": ["CodeLlama-7b-instruct"],
282 "codellama/CodeLlama-7b-Python-hf": ["CodeLlama-7b-python"],
283 "distilgpt2": ["distillgpt2", "distill-gpt2", "distil-gpt2", "gpt2-xs"],
284 "EleutherAI/gpt-j-6B": ["gpt-j-6B", "gpt-j", "gptj"],
285 "EleutherAI/gpt-neo-1.3B": ["gpt-neo-1.3B", "gpt-neo-medium", "neo-medium"],
286 "EleutherAI/gpt-neo-125M": ["gpt-neo-125M", "gpt-neo-small", "neo-small", "neo"],
287 "EleutherAI/gpt-neo-2.7B": ["gpt-neo-2.7B", "gpt-neo-large", "neo-large"],
288 "EleutherAI/gpt-neox-20b": ["gpt-neox-20b", "gpt-neox", "neox"],
289 "EleutherAI/pythia-1.4b": ["pythia-1.4b", "EleutherAI/pythia-1.3b", "pythia-1.3b"],
290 "EleutherAI/pythia-1.4b-deduped": [
291 "pythia-1.4b-deduped",
292 "EleutherAI/pythia-1.3b-deduped",
293 "pythia-1.3b-deduped",
294 ],
295 "EleutherAI/pythia-1.4b-deduped-v0": [
296 "pythia-1.4b-deduped-v0",
297 "EleutherAI/pythia-1.3b-deduped-v0",
298 "pythia-1.3b-deduped-v0",
299 ],
300 "EleutherAI/pythia-1.4b-v0": ["pythia-1.4b-v0", "EleutherAI/pythia-1.3b-v0", "pythia-1.3b-v0"],
301 "EleutherAI/pythia-12b": ["pythia-12b", "EleutherAI/pythia-13b", "pythia-13b"],
302 "EleutherAI/pythia-12b-deduped": [
303 "pythia-12b-deduped",
304 "EleutherAI/pythia-13b-deduped",
305 "pythia-13b-deduped",
306 ],
307 "EleutherAI/pythia-12b-deduped-v0": [
308 "pythia-12b-deduped-v0",
309 "EleutherAI/pythia-13b-deduped-v0",
310 "pythia-13b-deduped-v0",
311 ],
312 "EleutherAI/pythia-12b-v0": ["pythia-12b-v0", "EleutherAI/pythia-13b-v0", "pythia-13b-v0"],
313 "EleutherAI/pythia-14m": ["pythia-14m"],
314 "EleutherAI/pythia-160m": ["pythia-160m", "EleutherAI/pythia-125m", "pythia-125m"],
315 "EleutherAI/pythia-160m-deduped": [
316 "pythia-160m-deduped",
317 "EleutherAI/pythia-125m-deduped",
318 "pythia-125m-deduped",
319 ],
320 "EleutherAI/pythia-160m-deduped-v0": [
321 "pythia-160m-deduped-v0",
322 "EleutherAI/pythia-125m-deduped-v0",
323 "pythia-125m-deduped-v0",
324 ],
325 "EleutherAI/pythia-160m-seed1": [
326 "pythia-160m-seed1",
327 "EleutherAI/pythia-125m-seed1",
328 "pythia-125m-seed1",
329 ],
330 "EleutherAI/pythia-160m-seed2": [
331 "pythia-160m-seed2",
332 "EleutherAI/pythia-125m-seed2",
333 "pythia-125m-seed2",
334 ],
335 "EleutherAI/pythia-160m-seed3": [
336 "pythia-160m-seed3",
337 "EleutherAI/pythia-125m-seed3",
338 "pythia-125m-seed3",
339 ],
340 "EleutherAI/pythia-160m-v0": ["pythia-160m-v0", "EleutherAI/pythia-125m-v0", "pythia-125m-v0"],
341 "EleutherAI/pythia-1b": ["pythia-1b", "EleutherAI/pythia-800m", "pythia-800m"],
342 "EleutherAI/pythia-1b-deduped": [
343 "pythia-1b-deduped",
344 "EleutherAI/pythia-800m-deduped",
345 "pythia-800m-deduped",
346 ],
347 "EleutherAI/pythia-1b-deduped-v0": [
348 "pythia-1b-deduped-v0",
349 "EleutherAI/pythia-800m-deduped-v0",
350 "pythia-800m-deduped-v0",
351 ],
352 "EleutherAI/pythia-1b-v0": ["pythia-1b-v0", "EleutherAI/pythia-800m-v0", "pythia-800m-v0"],
353 "EleutherAI/pythia-2.8b": ["pythia-2.8b", "EleutherAI/pythia-2.7b", "pythia-2.7b"],
354 "EleutherAI/pythia-2.8b-deduped": [
355 "pythia-2.8b-deduped",
356 "EleutherAI/pythia-2.7b-deduped",
357 "pythia-2.7b-deduped",
358 ],
359 "EleutherAI/pythia-2.8b-deduped-v0": [
360 "pythia-2.8b-deduped-v0",
361 "EleutherAI/pythia-2.7b-deduped-v0",
362 "pythia-2.7b-deduped-v0",
363 ],
364 "EleutherAI/pythia-2.8b-v0": ["pythia-2.8b-v0", "EleutherAI/pythia-2.7b-v0", "pythia-2.7b-v0"],
365 "EleutherAI/pythia-31m": ["pythia-31m"],
366 "EleutherAI/pythia-410m": ["pythia-410m", "EleutherAI/pythia-350m", "pythia-350m"],
367 "EleutherAI/pythia-410m-deduped": [
368 "pythia-410m-deduped",
369 "EleutherAI/pythia-350m-deduped",
370 "pythia-350m-deduped",
371 ],
372 "EleutherAI/pythia-410m-deduped-v0": [
373 "pythia-410m-deduped-v0",
374 "EleutherAI/pythia-350m-deduped-v0",
375 "pythia-350m-deduped-v0",
376 ],
377 "EleutherAI/pythia-410m-v0": ["pythia-410m-v0", "EleutherAI/pythia-350m-v0", "pythia-350m-v0"],
378 "EleutherAI/pythia-6.9b": ["pythia-6.9b", "EleutherAI/pythia-6.7b", "pythia-6.7b"],
379 "EleutherAI/pythia-6.9b-deduped": [
380 "pythia-6.9b-deduped",
381 "EleutherAI/pythia-6.7b-deduped",
382 "pythia-6.7b-deduped",
383 ],
384 "EleutherAI/pythia-6.9b-deduped-v0": [
385 "pythia-6.9b-deduped-v0",
386 "EleutherAI/pythia-6.7b-deduped-v0",
387 "pythia-6.7b-deduped-v0",
388 ],
389 "EleutherAI/pythia-6.9b-v0": ["pythia-6.9b-v0", "EleutherAI/pythia-6.7b-v0", "pythia-6.7b-v0"],
390 "EleutherAI/pythia-70m": ["pythia-70m", "pythia", "EleutherAI/pythia-19m", "pythia-19m"],
391 "EleutherAI/pythia-70m-deduped": [
392 "pythia-70m-deduped",
393 "EleutherAI/pythia-19m-deduped",
394 "pythia-19m-deduped",
395 ],
396 "EleutherAI/pythia-70m-deduped-v0": [
397 "pythia-70m-deduped-v0",
398 "EleutherAI/pythia-19m-deduped-v0",
399 "pythia-19m-deduped-v0",
400 ],
401 "EleutherAI/pythia-70m-v0": [
402 "pythia-70m-v0",
403 "pythia-v0",
404 "EleutherAI/pythia-19m-v0",
405 "pythia-19m-v0",
406 ],
407 "facebook/hubert-base-ls960": ["hubert-base-ls960"],
408 "facebook/opt-1.3b": ["opt-1.3b", "opt-medium"],
409 "facebook/opt-125m": ["opt-125m", "opt-small", "opt"],
410 "facebook/opt-13b": ["opt-13b", "opt-xxl"],
411 "facebook/opt-2.7b": ["opt-2.7b", "opt-large"],
412 "facebook/opt-30b": ["opt-30b", "opt-xxxl"],
413 "facebook/opt-6.7b": ["opt-6.7b", "opt-xl"],
414 "facebook/opt-66b": ["opt-66b", "opt-xxxxl"],
415 "facebook/wav2vec2-base": ["wav2vec2-base", "w2v2-base"],
416 "facebook/wav2vec2-large": ["wav2vec2-large", "w2v2-large"],
417 "google-bert/bert-base-cased": ["bert-base-cased"],
418 "google-bert/bert-base-uncased": ["bert-base-uncased"],
419 "google-bert/bert-large-cased": ["bert-large-cased"],
420 "google-bert/bert-large-uncased": ["bert-large-uncased"],
421 "google-t5/t5-base": ["t5-base"],
422 "google-t5/t5-large": ["t5-large"],
423 "google-t5/t5-small": ["t5-small"],
424 "google/gemma-2-27b": ["gemma-2-27b"],
425 "google/gemma-2-27b-it": ["gemma-2-27b-it"],
426 "google/gemma-2-2b": ["gemma-2-2b"],
427 "google/gemma-2-2b-it": ["gemma-2-2b-it"],
428 "google/gemma-2-9b": ["gemma-2-9b"],
429 "google/gemma-2-9b-it": ["gemma-2-9b-it"],
430 "google/gemma-2b": ["gemma-2b"],
431 "google/gemma-2b-it": ["gemma-2b-it"],
432 "google/gemma-3-12b-it": ["gemma-3-12b-it"],
433 "google/gemma-3-12b-pt": ["gemma-3-12b-pt"],
434 "google/gemma-3-1b-it": ["gemma-3-1b-it"],
435 "google/gemma-3-1b-pt": ["gemma-3-1b-pt"],
436 "google/gemma-3-270m": ["gemma-3-270m"],
437 "google/gemma-3-270m-it": ["gemma-3-270m-it"],
438 "google/gemma-3-27b-it": ["gemma-3-27b-it"],
439 "google/gemma-3-27b-pt": ["gemma-3-27b-pt"],
440 "google/gemma-3-4b-it": ["gemma-3-4b-it"],
441 "google/gemma-3-4b-pt": ["gemma-3-4b-pt"],
442 "google/gemma-7b": ["gemma-7b"],
443 "google/gemma-7b-it": ["gemma-7b-it"],
444 "google/medgemma-27b-it": ["medgemma-27b-it"],
445 "google/medgemma-27b-text-it": ["medgemma-27b-text-it"],
446 "google/medgemma-4b-it": ["medgemma-4b-it"],
447 "google/medgemma-4b-pt": ["medgemma-4b-pt"],
448 "gpt2": ["gpt2-small"],
449 "llama-13b-hf": ["llama-13b"],
450 "llama-30b-hf": ["llama-30b"],
451 "llama-65b-hf": ["llama-65b"],
452 "llama-7b-hf": ["llama-7b"],
453 "meta-llama/Llama-2-13b-chat-hf": ["Llama-2-13b-chat"],
454 "meta-llama/Llama-2-13b-hf": ["Llama-2-13b"],
455 "meta-llama/Llama-2-70b-chat-hf": ["Llama-2-70b-chat", "meta-llama-2-70b-chat-hf"],
456 "meta-llama/Llama-2-7b-chat-hf": ["Llama-2-7b-chat"],
457 "meta-llama/Llama-2-7b-hf": ["Llama-2-7b"],
458 "microsoft/phi-1": ["phi-1"],
459 "microsoft/phi-1_5": ["phi-1_5"],
460 "microsoft/phi-2": ["phi-2"],
461 "microsoft/Phi-3-mini-4k-instruct": ["phi-3"],
462 "microsoft/phi-4": ["phi-4"],
463 "mistralai/Mistral-7B-Instruct-v0.1": ["mistral-7b-instruct"],
464 "mistralai/Mistral-7B-v0.1": ["mistral-7b"],
465 "mistralai/Mistral-Nemo-Base-2407": ["mistral-nemo-base-2407"],
466 "mistralai/Mixtral-8x7B-Instruct-v0.1": ["mixtral-instruct", "mixtral-8x7b-instruct"],
467 "mistralai/Mixtral-8x7B-v0.1": ["mixtral", "mixtral-8x7b"],
468 "NeelNanda/Attn-Only-2L512W-Shortformer-6B-big-lr": [
469 "attn-only-2l-demo",
470 "attn-only-2l-shortformer-6b-big-lr",
471 "attn-only-2l-induction-demo",
472 "attn-only-demo",
473 ],
474 "NeelNanda/Attn_Only_1L512W_C4_Code": [
475 "attn-only-1l",
476 "attn-only-1l-new",
477 "attn-only-1l-c4-code",
478 ],
479 "NeelNanda/Attn_Only_2L512W_C4_Code": [
480 "attn-only-2l",
481 "attn-only-2l-new",
482 "attn-only-2l-c4-code",
483 ],
484 "NeelNanda/Attn_Only_3L512W_C4_Code": [
485 "attn-only-3l",
486 "attn-only-3l-new",
487 "attn-only-3l-c4-code",
488 ],
489 "NeelNanda/Attn_Only_4L512W_C4_Code": [
490 "attn-only-4l",
491 "attn-only-4l-new",
492 "attn-only-4l-c4-code",
493 ],
494 "NeelNanda/GELU_1L512W_C4_Code": ["gelu-1l", "gelu-1l-new", "gelu-1l-c4-code"],
495 "NeelNanda/GELU_2L512W_C4_Code": ["gelu-2l", "gelu-2l-new", "gelu-2l-c4-code"],
496 "NeelNanda/GELU_3L512W_C4_Code": ["gelu-3l", "gelu-3l-new", "gelu-3l-c4-code"],
497 "NeelNanda/GELU_4L512W_C4_Code": ["gelu-4l", "gelu-4l-new", "gelu-4l-c4-code"],
498 "NeelNanda/SoLU_10L1280W_C4_Code": ["solu-10l", "solu-10l-new", "solu-10l-c4-code"],
499 "NeelNanda/SoLU_10L_v22_old": ["solu-10l-pile", "solu-10l-old"],
500 "NeelNanda/SoLU_12L1536W_C4_Code": ["solu-12l", "solu-12l-new", "solu-12l-c4-code"],
501 "NeelNanda/SoLU_12L_v23_old": ["solu-12l-pile", "solu-12l-old"],
502 "NeelNanda/SoLU_1L512W_C4_Code": ["solu-1l", "solu-1l-new", "solu-1l-c4-code"],
503 "NeelNanda/SoLU_1L512W_Wiki_Finetune": [
504 "solu-1l-wiki",
505 "solu-1l-wiki-finetune",
506 "solu-1l-finetune",
507 ],
508 "NeelNanda/SoLU_1L_v9_old": ["solu-1l-pile", "solu-1l-old"],
509 "NeelNanda/SoLU_2L512W_C4_Code": ["solu-2l", "solu-2l-new", "solu-2l-c4-code"],
510 "NeelNanda/SoLU_2L_v10_old": ["solu-2l-pile", "solu-2l-old"],
511 "NeelNanda/SoLU_3L512W_C4_Code": ["solu-3l", "solu-3l-new", "solu-3l-c4-code"],
512 "NeelNanda/SoLU_4L512W_C4_Code": ["solu-4l", "solu-4l-new", "solu-4l-c4-code"],
513 "NeelNanda/SoLU_4L512W_Wiki_Finetune": [
514 "solu-4l-wiki",
515 "solu-4l-wiki-finetune",
516 "solu-4l-finetune",
517 ],
518 "NeelNanda/SoLU_4L_v11_old": ["solu-4l-pile", "solu-4l-old"],
519 "NeelNanda/SoLU_6L768W_C4_Code": ["solu-6l", "solu-6l-new", "solu-6l-c4-code"],
520 "NeelNanda/SoLU_6L_v13_old": ["solu-6l-pile", "solu-6l-old"],
521 "NeelNanda/SoLU_8L1024W_C4_Code": ["solu-8l", "solu-8l-new", "solu-8l-c4-code"],
522 "NeelNanda/SoLU_8L_v21_old": ["solu-8l-pile", "solu-8l-old"],
523 "openai/gpt-oss-20b": ["gpt-oss-20b", "gpt-oss"],
524 "Qwen/Qwen-14B": ["qwen-14b"],
525 "Qwen/Qwen-14B-Chat": ["qwen-14b-chat"],
526 "Qwen/Qwen-1_8B": ["qwen-1.8b"],
527 "Qwen/Qwen-1_8B-Chat": ["qwen-1.8b-chat"],
528 "Qwen/Qwen-7B": ["qwen-7b"],
529 "Qwen/Qwen-7B-Chat": ["qwen-7b-chat"],
530 "Qwen/Qwen1.5-0.5B": ["qwen1.5-0.5b"],
531 "Qwen/Qwen1.5-0.5B-Chat": ["qwen1.5-0.5b-chat"],
532 "Qwen/Qwen1.5-1.8B": ["qwen1.5-1.8b"],
533 "Qwen/Qwen1.5-1.8B-Chat": ["qwen1.5-1.8b-chat"],
534 "Qwen/Qwen1.5-14B": ["qwen1.5-14b"],
535 "Qwen/Qwen1.5-14B-Chat": ["qwen1.5-14b-chat"],
536 "Qwen/Qwen1.5-4B": ["qwen1.5-4b"],
537 "Qwen/Qwen1.5-4B-Chat": ["qwen1.5-4b-chat"],
538 "Qwen/Qwen1.5-7B": ["qwen1.5-7b"],
539 "Qwen/Qwen1.5-7B-Chat": ["qwen1.5-7b-chat"],
540 "Qwen/Qwen2-0.5B": ["qwen2-0.5b"],
541 "Qwen/Qwen2-0.5B-Instruct": ["qwen2-0.5b-instruct"],
542 "Qwen/Qwen2-1.5B": ["qwen2-1.5b"],
543 "Qwen/Qwen2-1.5B-Instruct": ["qwen2-1.5b-instruct"],
544 "Qwen/Qwen2-7B": ["qwen2-7b"],
545 "Qwen/Qwen2-7B-Instruct": ["qwen2-7b-instruct"],
546 "Qwen/Qwen2.5-0.5B": ["qwen2.5-0.5b"],
547 "Qwen/Qwen2.5-0.5B-Instruct": ["qwen2.5-0.5b-instruct"],
548 "Qwen/Qwen2.5-1.5B": ["qwen2.5-1.5b"],
549 "Qwen/Qwen2.5-1.5B-Instruct": ["qwen2.5-1.5b-instruct"],
550 "Qwen/Qwen2.5-14B": ["qwen2.5-14b"],
551 "Qwen/Qwen2.5-14B-Instruct": ["qwen2.5-14b-instruct"],
552 "Qwen/Qwen2.5-32B": ["qwen2.5-32b"],
553 "Qwen/Qwen2.5-32B-Instruct": ["qwen2.5-32b-instruct"],
554 "Qwen/Qwen2.5-3B": ["qwen2.5-3b"],
555 "Qwen/Qwen2.5-3B-Instruct": ["qwen2.5-3b-instruct"],
556 "Qwen/Qwen2.5-72B": ["qwen2.5-72b"],
557 "Qwen/Qwen2.5-72B-Instruct": ["qwen2.5-72b-instruct"],
558 "Qwen/Qwen2.5-7B": ["qwen2.5-7b"],
559 "Qwen/Qwen2.5-7B-Instruct": ["qwen2.5-7b-instruct"],
560 "Qwen/Qwen3-0.6B": ["qwen3-0.6b"],
561 "Qwen/Qwen3-0.6B-Base": ["qwen3-0.6b-base"],
562 "Qwen/Qwen3-1.7B": ["qwen3-1.7b"],
563 "Qwen/Qwen3-14B": ["qwen3-14b"],
564 "Qwen/Qwen3-4B": ["qwen3-4b"],
565 "Qwen/Qwen3-8B": ["qwen3-8b"],
566 "Qwen/QwQ-32B-Preview": ["qwen-32b-preview"],
567 "roneneldan/TinyStories-1Layer-21M": ["tiny-stories-1L-21M"],
568 "roneneldan/TinyStories-1M": ["tiny-stories-1M"],
569 "roneneldan/TinyStories-28M": ["tiny-stories-28M"],
570 "roneneldan/TinyStories-2Layers-33M": ["tiny-stories-2L-33M"],
571 "roneneldan/TinyStories-33M": ["tiny-stories-33M"],
572 "roneneldan/TinyStories-3M": ["tiny-stories-3M"],
573 "roneneldan/TinyStories-8M": ["tiny-stories-8M"],
574 "roneneldan/TinyStories-Instruct-1M": ["tiny-stories-instruct-1M"],
575 "roneneldan/TinyStories-Instruct-28M": ["tiny-stories-instruct-28M"],
576 "roneneldan/TinyStories-Instruct-2Layers-33M": ["tiny-stories-instruct-2L-33M"],
577 "roneneldan/TinyStories-Instruct-33M": ["tiny-stories-instruct-33M"],
578 "roneneldan/TinyStories-Instruct-3M": ["tiny-stories-instruct-3M"],
579 "roneneldan/TinyStories-Instruct-8M": ["tiny-stories-instruct-8M"],
580 "roneneldan/TinyStories-Instuct-1Layer-21M": ["tiny-stories-instruct-1L-21M"],
581 "stabilityai/stablelm-base-alpha-3b": ["stablelm-base-alpha-3b", "stablelm-base-3b"],
582 "stabilityai/stablelm-base-alpha-7b": ["stablelm-base-alpha-7b", "stablelm-base-7b"],
583 "stabilityai/stablelm-tuned-alpha-3b": ["stablelm-tuned-alpha-3b", "stablelm-tuned-3b"],
584 "stabilityai/stablelm-tuned-alpha-7b": ["stablelm-tuned-alpha-7b", "stablelm-tuned-7b"],
585 "stanford-crfm/alias-gpt2-small-x21": [
586 "stanford-gpt2-small-a",
587 "alias-gpt2-small-x21",
588 "gpt2-mistral-small-a",
589 "gpt2-stanford-small-a",
590 ],
591 "stanford-crfm/arwen-gpt2-medium-x21": [
592 "stanford-gpt2-medium-a",
593 "arwen-gpt2-medium-x21",
594 "gpt2-medium-small-a",
595 "gpt2-stanford-medium-a",
596 ],
597 "stanford-crfm/battlestar-gpt2-small-x49": [
598 "stanford-gpt2-small-b",
599 "battlestar-gpt2-small-x49",
600 "gpt2-mistral-small-b",
601 "gpt2-mistral-small-b",
602 ],
603 "stanford-crfm/beren-gpt2-medium-x49": [
604 "stanford-gpt2-medium-b",
605 "beren-gpt2-medium-x49",
606 "gpt2-medium-small-b",
607 "gpt2-stanford-medium-b",
608 ],
609 "stanford-crfm/caprica-gpt2-small-x81": [
610 "stanford-gpt2-small-c",
611 "caprica-gpt2-small-x81",
612 "gpt2-mistral-small-c",
613 "gpt2-stanford-small-c",
614 ],
615 "stanford-crfm/celebrimbor-gpt2-medium-x81": [
616 "stanford-gpt2-medium-c",
617 "celebrimbor-gpt2-medium-x81",
618 "gpt2-medium-small-c",
619 "gpt2-medium-small-c",
620 ],
621 "stanford-crfm/darkmatter-gpt2-small-x343": [
622 "stanford-gpt2-small-d",
623 "darkmatter-gpt2-small-x343",
624 "gpt2-mistral-small-d",
625 "gpt2-mistral-small-d",
626 ],
627 "stanford-crfm/durin-gpt2-medium-x343": [
628 "stanford-gpt2-medium-d",
629 "durin-gpt2-medium-x343",
630 "gpt2-medium-small-d",
631 "gpt2-stanford-medium-d",
632 ],
633 "stanford-crfm/eowyn-gpt2-medium-x777": [
634 "stanford-gpt2-medium-e",
635 "eowyn-gpt2-medium-x777",
636 "gpt2-medium-small-e",
637 "gpt2-stanford-medium-e",
638 ],
639 "stanford-crfm/expanse-gpt2-small-x777": [
640 "stanford-gpt2-small-e",
641 "expanse-gpt2-small-x777",
642 "gpt2-mistral-small-e",
643 "gpt2-mistral-small-e",
644 ],
645 "swiss-ai/Apertus-8B-2509": ["apertus-8b", "apertus"],
646 "swiss-ai/Apertus-8B-Instruct-2509": ["apertus-8b-instruct", "apertus-instruct"],
647}
648"""Model aliases for models on HuggingFace."""
651# Sets a default model alias, by convention the first one in the model alias table, else the official name if it has no aliases
652DEFAULT_MODEL_ALIASES: list[str] = [
653 MODEL_ALIASES[name][0] if name in MODEL_ALIASES else name for name in OFFICIAL_MODEL_NAMES
654]