Coverage for transformer_lens/supported_models.py: 100%
5 statements
« prev ^ index » next coverage.py v7.10.1, created at 2026-04-30 01:33 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2026-04-30 01:33 +0000
1OFFICIAL_MODEL_NAMES: list[str] = [
2 "01-ai/Yi-34B",
3 "01-ai/Yi-34B-Chat",
4 "01-ai/Yi-6B",
5 "01-ai/Yi-6B-Chat",
6 "ai-forever/mGPT",
7 "allenai/OLMo-1B-hf",
8 "allenai/OLMo-2-0425-1B",
9 "allenai/OLMo-2-1124-7B",
10 "allenai/Olmo-3-32B-Think",
11 "allenai/Olmo-3-7B-Instruct",
12 "allenai/Olmo-3-7B-Think",
13 "allenai/Olmo-3.1-32B-Instruct",
14 "allenai/Olmo-3.1-32B-Think",
15 "allenai/OLMo-7B-hf",
16 "allenai/OLMoE-1B-7B-0924",
17 "ArthurConmy/redwood_attn_2l",
18 "Baidicoot/Othello-GPT-Transformer-Lens",
19 "bigcode/santacoder",
20 "bigscience/bloom-1b1",
21 "bigscience/bloom-1b7",
22 "bigscience/bloom-3b",
23 "bigscience/bloom-560m",
24 "bigscience/bloom-7b1",
25 "codellama/CodeLlama-7b-hf",
26 "codellama/CodeLlama-7b-Instruct-hf",
27 "codellama/CodeLlama-7b-Python-hf",
28 "distilgpt2",
29 "EleutherAI/gpt-j-6B",
30 "EleutherAI/gpt-neo-1.3B",
31 "EleutherAI/gpt-neo-125M",
32 "EleutherAI/gpt-neo-2.7B",
33 "EleutherAI/gpt-neox-20b",
34 "EleutherAI/pythia-1.4b",
35 "EleutherAI/pythia-1.4b-deduped",
36 "EleutherAI/pythia-1.4b-deduped-v0",
37 "EleutherAI/pythia-1.4b-v0",
38 "EleutherAI/pythia-12b",
39 "EleutherAI/pythia-12b-deduped",
40 "EleutherAI/pythia-12b-deduped-v0",
41 "EleutherAI/pythia-12b-v0",
42 "EleutherAI/pythia-14m",
43 "EleutherAI/pythia-160m",
44 "EleutherAI/pythia-160m-deduped",
45 "EleutherAI/pythia-160m-deduped-v0",
46 "EleutherAI/pythia-160m-seed1",
47 "EleutherAI/pythia-160m-seed2",
48 "EleutherAI/pythia-160m-seed3",
49 "EleutherAI/pythia-160m-v0",
50 "EleutherAI/pythia-1b",
51 "EleutherAI/pythia-1b-deduped",
52 "EleutherAI/pythia-1b-deduped-v0",
53 "EleutherAI/pythia-1b-v0",
54 "EleutherAI/pythia-2.8b",
55 "EleutherAI/pythia-2.8b-deduped",
56 "EleutherAI/pythia-2.8b-deduped-v0",
57 "EleutherAI/pythia-2.8b-v0",
58 "EleutherAI/pythia-31m",
59 "EleutherAI/pythia-410m",
60 "EleutherAI/pythia-410m-deduped",
61 "EleutherAI/pythia-410m-deduped-v0",
62 "EleutherAI/pythia-410m-v0",
63 "EleutherAI/pythia-6.9b",
64 "EleutherAI/pythia-6.9b-deduped",
65 "EleutherAI/pythia-6.9b-deduped-v0",
66 "EleutherAI/pythia-6.9b-v0",
67 "EleutherAI/pythia-70m",
68 "EleutherAI/pythia-70m-deduped",
69 "EleutherAI/pythia-70m-deduped-v0",
70 "EleutherAI/pythia-70m-v0",
71 "facebook/hubert-base-ls960",
72 "facebook/opt-1.3b",
73 "facebook/opt-125m",
74 "facebook/opt-13b",
75 "facebook/opt-2.7b",
76 "facebook/opt-30b",
77 "facebook/opt-6.7b",
78 "facebook/opt-66b",
79 "facebook/wav2vec2-base",
80 "facebook/wav2vec2-large",
81 "google-bert/bert-base-cased",
82 "google-bert/bert-base-uncased",
83 "google-bert/bert-large-cased",
84 "google-bert/bert-large-uncased",
85 "google-t5/t5-base",
86 "google-t5/t5-large",
87 "google-t5/t5-small",
88 "google/gemma-2-27b",
89 "google/gemma-2-27b-it",
90 "google/gemma-2-2b",
91 "google/gemma-2-2b-it",
92 "google/gemma-2-9b",
93 "google/gemma-2-9b-it",
94 "google/gemma-2b",
95 "google/gemma-2b-it",
96 "google/gemma-3-12b-it",
97 "google/gemma-3-12b-pt",
98 "google/gemma-3-1b-it",
99 "google/gemma-3-1b-pt",
100 "google/gemma-3-270m",
101 "google/gemma-3-270m-it",
102 "google/gemma-3-27b-it",
103 "google/gemma-3-27b-pt",
104 "google/gemma-3-4b-it",
105 "google/gemma-3-4b-pt",
106 "google/gemma-7b",
107 "google/gemma-7b-it",
108 "google/medgemma-27b-it",
109 "google/medgemma-27b-text-it",
110 "google/medgemma-4b-it",
111 "google/medgemma-4b-pt",
112 "gpt2",
113 "gpt2-large",
114 "gpt2-medium",
115 "gpt2-xl",
116 "llama-13b-hf",
117 "llama-30b-hf",
118 "llama-65b-hf",
119 "llama-7b-hf",
120 "meta-llama/Llama-2-13b-chat-hf",
121 "meta-llama/Llama-2-13b-hf",
122 "meta-llama/Llama-2-70b-chat-hf",
123 "meta-llama/Llama-2-7b-chat-hf",
124 "meta-llama/Llama-2-7b-hf",
125 "meta-llama/Llama-3.1-70B",
126 "meta-llama/Llama-3.1-70B-Instruct",
127 "meta-llama/Llama-3.1-8B",
128 "meta-llama/Llama-3.1-8B-Instruct",
129 "meta-llama/Llama-3.2-1B",
130 "meta-llama/Llama-3.2-1B-Instruct",
131 "meta-llama/Llama-3.2-3B",
132 "meta-llama/Llama-3.2-3B-Instruct",
133 "meta-llama/Llama-3.3-70B-Instruct",
134 "meta-llama/Meta-Llama-3-70B",
135 "meta-llama/Meta-Llama-3-70B-Instruct",
136 "meta-llama/Meta-Llama-3-8B",
137 "meta-llama/Meta-Llama-3-8B-Instruct",
138 "microsoft/phi-1",
139 "microsoft/phi-1_5",
140 "microsoft/phi-2",
141 "microsoft/Phi-3-mini-4k-instruct",
142 "microsoft/phi-4",
143 "mistralai/Mistral-7B-Instruct-v0.1",
144 "mistralai/Mistral-7B-v0.1",
145 "mistralai/Mistral-Nemo-Base-2407",
146 "mistralai/Mistral-Small-24B-Base-2501",
147 "mistralai/Mixtral-8x7B-Instruct-v0.1",
148 "mistralai/Mixtral-8x7B-v0.1",
149 "NeelNanda/Attn-Only-2L512W-Shortformer-6B-big-lr",
150 "NeelNanda/Attn_Only_1L512W_C4_Code",
151 "NeelNanda/Attn_Only_2L512W_C4_Code",
152 "NeelNanda/Attn_Only_3L512W_C4_Code",
153 "NeelNanda/Attn_Only_4L512W_C4_Code",
154 "NeelNanda/GELU_1L512W_C4_Code",
155 "NeelNanda/GELU_2L512W_C4_Code",
156 "NeelNanda/GELU_3L512W_C4_Code",
157 "NeelNanda/GELU_4L512W_C4_Code",
158 "NeelNanda/SoLU_10L1280W_C4_Code",
159 "NeelNanda/SoLU_10L_v22_old",
160 "NeelNanda/SoLU_12L1536W_C4_Code",
161 "NeelNanda/SoLU_12L_v23_old",
162 "NeelNanda/SoLU_1L512W_C4_Code",
163 "NeelNanda/SoLU_1L512W_Wiki_Finetune",
164 "NeelNanda/SoLU_1L_v9_old",
165 "NeelNanda/SoLU_2L512W_C4_Code",
166 "NeelNanda/SoLU_2L_v10_old",
167 "NeelNanda/SoLU_3L512W_C4_Code",
168 "NeelNanda/SoLU_4L512W_C4_Code",
169 "NeelNanda/SoLU_4L512W_Wiki_Finetune",
170 "NeelNanda/SoLU_4L_v11_old",
171 "NeelNanda/SoLU_6L768W_C4_Code",
172 "NeelNanda/SoLU_6L_v13_old",
173 "NeelNanda/SoLU_8L1024W_C4_Code",
174 "NeelNanda/SoLU_8L_v21_old",
175 "openai/gpt-oss-20b",
176 "Qwen/Qwen-14B",
177 "Qwen/Qwen-14B-Chat",
178 "Qwen/Qwen-1_8B",
179 "Qwen/Qwen-1_8B-Chat",
180 "Qwen/Qwen-7B",
181 "Qwen/Qwen-7B-Chat",
182 "Qwen/Qwen1.5-0.5B",
183 "Qwen/Qwen1.5-0.5B-Chat",
184 "Qwen/Qwen1.5-1.8B",
185 "Qwen/Qwen1.5-1.8B-Chat",
186 "Qwen/Qwen1.5-14B",
187 "Qwen/Qwen1.5-14B-Chat",
188 "Qwen/Qwen1.5-4B",
189 "Qwen/Qwen1.5-4B-Chat",
190 "Qwen/Qwen1.5-7B",
191 "Qwen/Qwen1.5-7B-Chat",
192 "Qwen/Qwen2-0.5B",
193 "Qwen/Qwen2-0.5B-Instruct",
194 "Qwen/Qwen2-1.5B",
195 "Qwen/Qwen2-1.5B-Instruct",
196 "Qwen/Qwen2-7B",
197 "Qwen/Qwen2-7B-Instruct",
198 "Qwen/Qwen2.5-0.5B",
199 "Qwen/Qwen2.5-0.5B-Instruct",
200 "Qwen/Qwen2.5-1.5B",
201 "Qwen/Qwen2.5-1.5B-Instruct",
202 "Qwen/Qwen2.5-14B",
203 "Qwen/Qwen2.5-14B-Instruct",
204 "Qwen/Qwen2.5-32B",
205 "Qwen/Qwen2.5-32B-Instruct",
206 "Qwen/Qwen2.5-3B",
207 "Qwen/Qwen2.5-3B-Instruct",
208 "Qwen/Qwen2.5-72B",
209 "Qwen/Qwen2.5-72B-Instruct",
210 "Qwen/Qwen2.5-7B",
211 "Qwen/Qwen2.5-7B-Instruct",
212 "Qwen/Qwen3-0.6B",
213 "Qwen/Qwen3-0.6B-Base",
214 "Qwen/Qwen3-1.7B",
215 "Qwen/Qwen3-14B",
216 "Qwen/Qwen3-4B",
217 "Qwen/Qwen3-8B",
218 "Qwen/QwQ-32B-Preview",
219 "roneneldan/TinyStories-1Layer-21M",
220 "roneneldan/TinyStories-1M",
221 "roneneldan/TinyStories-28M",
222 "roneneldan/TinyStories-2Layers-33M",
223 "roneneldan/TinyStories-33M",
224 "roneneldan/TinyStories-3M",
225 "roneneldan/TinyStories-8M",
226 "roneneldan/TinyStories-Instruct-1M",
227 "roneneldan/TinyStories-Instruct-28M",
228 "roneneldan/TinyStories-Instruct-2Layers-33M",
229 "roneneldan/TinyStories-Instruct-33M",
230 "roneneldan/TinyStories-Instruct-3M",
231 "roneneldan/TinyStories-Instruct-8M",
232 "roneneldan/TinyStories-Instuct-1Layer-21M",
233 "stabilityai/stablelm-base-alpha-3b",
234 "stabilityai/stablelm-base-alpha-7b",
235 "stabilityai/stablelm-tuned-alpha-3b",
236 "stabilityai/stablelm-tuned-alpha-7b",
237 "stanford-crfm/alias-gpt2-small-x21",
238 "stanford-crfm/arwen-gpt2-medium-x21",
239 "stanford-crfm/battlestar-gpt2-small-x49",
240 "stanford-crfm/beren-gpt2-medium-x49",
241 "stanford-crfm/caprica-gpt2-small-x81",
242 "stanford-crfm/celebrimbor-gpt2-medium-x81",
243 "stanford-crfm/darkmatter-gpt2-small-x343",
244 "stanford-crfm/durin-gpt2-medium-x343",
245 "stanford-crfm/eowyn-gpt2-medium-x777",
246 "stanford-crfm/expanse-gpt2-small-x777",
247 "swiss-ai/Apertus-8B-2509",
248 "swiss-ai/Apertus-8B-Instruct-2509",
249]
250"""Official model names for models on HuggingFace."""
252# Model Aliases:
253MODEL_ALIASES: dict[str, list[str]] = {
254 "01-ai/Yi-34B": ["yi-34b", "Yi-34B"],
255 "01-ai/Yi-34B-Chat": ["yi-34b-chat", "Yi-34B-Chat"],
256 "01-ai/Yi-6B": ["yi-6b", "Yi-6B"],
257 "01-ai/Yi-6B-Chat": ["yi-6b-chat", "Yi-6B-Chat"],
258 "ai-forever/mGPT": ["mGPT"],
259 "allenai/OLMo-1B-hf": ["olmo-1b"],
260 "allenai/OLMo-2-0425-1B": ["olmo-2-1b"],
261 "allenai/OLMo-2-1124-7B": ["olmo-2-7b"],
262 "allenai/Olmo-3-32B-Think": ["olmo-3-32b-think"],
263 "allenai/Olmo-3-7B-Instruct": ["olmo-3-7b-instruct"],
264 "allenai/Olmo-3-7B-Think": ["olmo-3-7b-think"],
265 "allenai/Olmo-3.1-32B-Instruct": ["olmo-3.1-32b-instruct"],
266 "allenai/Olmo-3.1-32B-Think": ["olmo-3.1-32b-think"],
267 "allenai/OLMo-7B-hf": ["olmo-7b"],
268 "allenai/OLMoE-1B-7B-0924": ["olmoe"],
269 "ArthurConmy/redwood_attn_2l": ["redwood_attn_2l"],
270 "Baidicoot/Othello-GPT-Transformer-Lens": ["othello-gpt"],
271 "bigcode/santacoder": ["santacoder"],
272 "bigscience/bloom-1b1": ["bloom-1b1"],
273 "bigscience/bloom-1b7": ["bloom-1b7"],
274 "bigscience/bloom-3b": ["bloom-3b"],
275 "bigscience/bloom-560m": ["bloom-560m"],
276 "bigscience/bloom-7b1": ["bloom-7b1"],
277 "codellama/CodeLlama-7b-hf": ["CodeLlamallama-2-7b"],
278 "codellama/CodeLlama-7b-Instruct-hf": ["CodeLlama-7b-instruct"],
279 "codellama/CodeLlama-7b-Python-hf": ["CodeLlama-7b-python"],
280 "distilgpt2": ["distillgpt2", "distill-gpt2", "distil-gpt2", "gpt2-xs"],
281 "EleutherAI/gpt-j-6B": ["gpt-j-6B", "gpt-j", "gptj"],
282 "EleutherAI/gpt-neo-1.3B": ["gpt-neo-1.3B", "gpt-neo-medium", "neo-medium"],
283 "EleutherAI/gpt-neo-125M": ["gpt-neo-125M", "gpt-neo-small", "neo-small", "neo"],
284 "EleutherAI/gpt-neo-2.7B": ["gpt-neo-2.7B", "gpt-neo-large", "neo-large"],
285 "EleutherAI/gpt-neox-20b": ["gpt-neox-20b", "gpt-neox", "neox"],
286 "EleutherAI/pythia-1.4b": ["pythia-1.4b", "EleutherAI/pythia-1.3b", "pythia-1.3b"],
287 "EleutherAI/pythia-1.4b-deduped": [
288 "pythia-1.4b-deduped",
289 "EleutherAI/pythia-1.3b-deduped",
290 "pythia-1.3b-deduped",
291 ],
292 "EleutherAI/pythia-1.4b-deduped-v0": [
293 "pythia-1.4b-deduped-v0",
294 "EleutherAI/pythia-1.3b-deduped-v0",
295 "pythia-1.3b-deduped-v0",
296 ],
297 "EleutherAI/pythia-1.4b-v0": ["pythia-1.4b-v0", "EleutherAI/pythia-1.3b-v0", "pythia-1.3b-v0"],
298 "EleutherAI/pythia-12b": ["pythia-12b", "EleutherAI/pythia-13b", "pythia-13b"],
299 "EleutherAI/pythia-12b-deduped": [
300 "pythia-12b-deduped",
301 "EleutherAI/pythia-13b-deduped",
302 "pythia-13b-deduped",
303 ],
304 "EleutherAI/pythia-12b-deduped-v0": [
305 "pythia-12b-deduped-v0",
306 "EleutherAI/pythia-13b-deduped-v0",
307 "pythia-13b-deduped-v0",
308 ],
309 "EleutherAI/pythia-12b-v0": ["pythia-12b-v0", "EleutherAI/pythia-13b-v0", "pythia-13b-v0"],
310 "EleutherAI/pythia-14m": ["pythia-14m"],
311 "EleutherAI/pythia-160m": ["pythia-160m", "EleutherAI/pythia-125m", "pythia-125m"],
312 "EleutherAI/pythia-160m-deduped": [
313 "pythia-160m-deduped",
314 "EleutherAI/pythia-125m-deduped",
315 "pythia-125m-deduped",
316 ],
317 "EleutherAI/pythia-160m-deduped-v0": [
318 "pythia-160m-deduped-v0",
319 "EleutherAI/pythia-125m-deduped-v0",
320 "pythia-125m-deduped-v0",
321 ],
322 "EleutherAI/pythia-160m-seed1": [
323 "pythia-160m-seed1",
324 "EleutherAI/pythia-125m-seed1",
325 "pythia-125m-seed1",
326 ],
327 "EleutherAI/pythia-160m-seed2": [
328 "pythia-160m-seed2",
329 "EleutherAI/pythia-125m-seed2",
330 "pythia-125m-seed2",
331 ],
332 "EleutherAI/pythia-160m-seed3": [
333 "pythia-160m-seed3",
334 "EleutherAI/pythia-125m-seed3",
335 "pythia-125m-seed3",
336 ],
337 "EleutherAI/pythia-160m-v0": ["pythia-160m-v0", "EleutherAI/pythia-125m-v0", "pythia-125m-v0"],
338 "EleutherAI/pythia-1b": ["pythia-1b", "EleutherAI/pythia-800m", "pythia-800m"],
339 "EleutherAI/pythia-1b-deduped": [
340 "pythia-1b-deduped",
341 "EleutherAI/pythia-800m-deduped",
342 "pythia-800m-deduped",
343 ],
344 "EleutherAI/pythia-1b-deduped-v0": [
345 "pythia-1b-deduped-v0",
346 "EleutherAI/pythia-800m-deduped-v0",
347 "pythia-800m-deduped-v0",
348 ],
349 "EleutherAI/pythia-1b-v0": ["pythia-1b-v0", "EleutherAI/pythia-800m-v0", "pythia-800m-v0"],
350 "EleutherAI/pythia-2.8b": ["pythia-2.8b", "EleutherAI/pythia-2.7b", "pythia-2.7b"],
351 "EleutherAI/pythia-2.8b-deduped": [
352 "pythia-2.8b-deduped",
353 "EleutherAI/pythia-2.7b-deduped",
354 "pythia-2.7b-deduped",
355 ],
356 "EleutherAI/pythia-2.8b-deduped-v0": [
357 "pythia-2.8b-deduped-v0",
358 "EleutherAI/pythia-2.7b-deduped-v0",
359 "pythia-2.7b-deduped-v0",
360 ],
361 "EleutherAI/pythia-2.8b-v0": ["pythia-2.8b-v0", "EleutherAI/pythia-2.7b-v0", "pythia-2.7b-v0"],
362 "EleutherAI/pythia-31m": ["pythia-31m"],
363 "EleutherAI/pythia-410m": ["pythia-410m", "EleutherAI/pythia-350m", "pythia-350m"],
364 "EleutherAI/pythia-410m-deduped": [
365 "pythia-410m-deduped",
366 "EleutherAI/pythia-350m-deduped",
367 "pythia-350m-deduped",
368 ],
369 "EleutherAI/pythia-410m-deduped-v0": [
370 "pythia-410m-deduped-v0",
371 "EleutherAI/pythia-350m-deduped-v0",
372 "pythia-350m-deduped-v0",
373 ],
374 "EleutherAI/pythia-410m-v0": ["pythia-410m-v0", "EleutherAI/pythia-350m-v0", "pythia-350m-v0"],
375 "EleutherAI/pythia-6.9b": ["pythia-6.9b", "EleutherAI/pythia-6.7b", "pythia-6.7b"],
376 "EleutherAI/pythia-6.9b-deduped": [
377 "pythia-6.9b-deduped",
378 "EleutherAI/pythia-6.7b-deduped",
379 "pythia-6.7b-deduped",
380 ],
381 "EleutherAI/pythia-6.9b-deduped-v0": [
382 "pythia-6.9b-deduped-v0",
383 "EleutherAI/pythia-6.7b-deduped-v0",
384 "pythia-6.7b-deduped-v0",
385 ],
386 "EleutherAI/pythia-6.9b-v0": ["pythia-6.9b-v0", "EleutherAI/pythia-6.7b-v0", "pythia-6.7b-v0"],
387 "EleutherAI/pythia-70m": ["pythia-70m", "pythia", "EleutherAI/pythia-19m", "pythia-19m"],
388 "EleutherAI/pythia-70m-deduped": [
389 "pythia-70m-deduped",
390 "EleutherAI/pythia-19m-deduped",
391 "pythia-19m-deduped",
392 ],
393 "EleutherAI/pythia-70m-deduped-v0": [
394 "pythia-70m-deduped-v0",
395 "EleutherAI/pythia-19m-deduped-v0",
396 "pythia-19m-deduped-v0",
397 ],
398 "EleutherAI/pythia-70m-v0": [
399 "pythia-70m-v0",
400 "pythia-v0",
401 "EleutherAI/pythia-19m-v0",
402 "pythia-19m-v0",
403 ],
404 "facebook/hubert-base-ls960": ["hubert-base-ls960"],
405 "facebook/opt-1.3b": ["opt-1.3b", "opt-medium"],
406 "facebook/opt-125m": ["opt-125m", "opt-small", "opt"],
407 "facebook/opt-13b": ["opt-13b", "opt-xxl"],
408 "facebook/opt-2.7b": ["opt-2.7b", "opt-large"],
409 "facebook/opt-30b": ["opt-30b", "opt-xxxl"],
410 "facebook/opt-6.7b": ["opt-6.7b", "opt-xl"],
411 "facebook/opt-66b": ["opt-66b", "opt-xxxxl"],
412 "facebook/wav2vec2-base": ["wav2vec2-base", "w2v2-base"],
413 "facebook/wav2vec2-large": ["wav2vec2-large", "w2v2-large"],
414 "google-bert/bert-base-cased": ["bert-base-cased"],
415 "google-bert/bert-base-uncased": ["bert-base-uncased"],
416 "google-bert/bert-large-cased": ["bert-large-cased"],
417 "google-bert/bert-large-uncased": ["bert-large-uncased"],
418 "google-t5/t5-base": ["t5-base"],
419 "google-t5/t5-large": ["t5-large"],
420 "google-t5/t5-small": ["t5-small"],
421 "google/gemma-2-27b": ["gemma-2-27b"],
422 "google/gemma-2-27b-it": ["gemma-2-27b-it"],
423 "google/gemma-2-2b": ["gemma-2-2b"],
424 "google/gemma-2-2b-it": ["gemma-2-2b-it"],
425 "google/gemma-2-9b": ["gemma-2-9b"],
426 "google/gemma-2-9b-it": ["gemma-2-9b-it"],
427 "google/gemma-2b": ["gemma-2b"],
428 "google/gemma-2b-it": ["gemma-2b-it"],
429 "google/gemma-3-12b-it": ["gemma-3-12b-it"],
430 "google/gemma-3-12b-pt": ["gemma-3-12b-pt"],
431 "google/gemma-3-1b-it": ["gemma-3-1b-it"],
432 "google/gemma-3-1b-pt": ["gemma-3-1b-pt"],
433 "google/gemma-3-270m": ["gemma-3-270m"],
434 "google/gemma-3-270m-it": ["gemma-3-270m-it"],
435 "google/gemma-3-27b-it": ["gemma-3-27b-it"],
436 "google/gemma-3-27b-pt": ["gemma-3-27b-pt"],
437 "google/gemma-3-4b-it": ["gemma-3-4b-it"],
438 "google/gemma-3-4b-pt": ["gemma-3-4b-pt"],
439 "google/gemma-7b": ["gemma-7b"],
440 "google/gemma-7b-it": ["gemma-7b-it"],
441 "google/medgemma-27b-it": ["medgemma-27b-it"],
442 "google/medgemma-27b-text-it": ["medgemma-27b-text-it"],
443 "google/medgemma-4b-it": ["medgemma-4b-it"],
444 "google/medgemma-4b-pt": ["medgemma-4b-pt"],
445 "gpt2": ["gpt2-small"],
446 "llama-13b-hf": ["llama-13b"],
447 "llama-30b-hf": ["llama-30b"],
448 "llama-65b-hf": ["llama-65b"],
449 "llama-7b-hf": ["llama-7b"],
450 "meta-llama/Llama-2-13b-chat-hf": ["Llama-2-13b-chat"],
451 "meta-llama/Llama-2-13b-hf": ["Llama-2-13b"],
452 "meta-llama/Llama-2-70b-chat-hf": ["Llama-2-70b-chat", "meta-llama-2-70b-chat-hf"],
453 "meta-llama/Llama-2-7b-chat-hf": ["Llama-2-7b-chat"],
454 "meta-llama/Llama-2-7b-hf": ["Llama-2-7b"],
455 "microsoft/phi-1": ["phi-1"],
456 "microsoft/phi-1_5": ["phi-1_5"],
457 "microsoft/phi-2": ["phi-2"],
458 "microsoft/Phi-3-mini-4k-instruct": ["phi-3"],
459 "microsoft/phi-4": ["phi-4"],
460 "mistralai/Mistral-7B-Instruct-v0.1": ["mistral-7b-instruct"],
461 "mistralai/Mistral-7B-v0.1": ["mistral-7b"],
462 "mistralai/Mistral-Nemo-Base-2407": ["mistral-nemo-base-2407"],
463 "mistralai/Mixtral-8x7B-Instruct-v0.1": ["mixtral-instruct", "mixtral-8x7b-instruct"],
464 "mistralai/Mixtral-8x7B-v0.1": ["mixtral", "mixtral-8x7b"],
465 "NeelNanda/Attn-Only-2L512W-Shortformer-6B-big-lr": [
466 "attn-only-2l-demo",
467 "attn-only-2l-shortformer-6b-big-lr",
468 "attn-only-2l-induction-demo",
469 "attn-only-demo",
470 ],
471 "NeelNanda/Attn_Only_1L512W_C4_Code": [
472 "attn-only-1l",
473 "attn-only-1l-new",
474 "attn-only-1l-c4-code",
475 ],
476 "NeelNanda/Attn_Only_2L512W_C4_Code": [
477 "attn-only-2l",
478 "attn-only-2l-new",
479 "attn-only-2l-c4-code",
480 ],
481 "NeelNanda/Attn_Only_3L512W_C4_Code": [
482 "attn-only-3l",
483 "attn-only-3l-new",
484 "attn-only-3l-c4-code",
485 ],
486 "NeelNanda/Attn_Only_4L512W_C4_Code": [
487 "attn-only-4l",
488 "attn-only-4l-new",
489 "attn-only-4l-c4-code",
490 ],
491 "NeelNanda/GELU_1L512W_C4_Code": ["gelu-1l", "gelu-1l-new", "gelu-1l-c4-code"],
492 "NeelNanda/GELU_2L512W_C4_Code": ["gelu-2l", "gelu-2l-new", "gelu-2l-c4-code"],
493 "NeelNanda/GELU_3L512W_C4_Code": ["gelu-3l", "gelu-3l-new", "gelu-3l-c4-code"],
494 "NeelNanda/GELU_4L512W_C4_Code": ["gelu-4l", "gelu-4l-new", "gelu-4l-c4-code"],
495 "NeelNanda/SoLU_10L1280W_C4_Code": ["solu-10l", "solu-10l-new", "solu-10l-c4-code"],
496 "NeelNanda/SoLU_10L_v22_old": ["solu-10l-pile", "solu-10l-old"],
497 "NeelNanda/SoLU_12L1536W_C4_Code": ["solu-12l", "solu-12l-new", "solu-12l-c4-code"],
498 "NeelNanda/SoLU_12L_v23_old": ["solu-12l-pile", "solu-12l-old"],
499 "NeelNanda/SoLU_1L512W_C4_Code": ["solu-1l", "solu-1l-new", "solu-1l-c4-code"],
500 "NeelNanda/SoLU_1L512W_Wiki_Finetune": [
501 "solu-1l-wiki",
502 "solu-1l-wiki-finetune",
503 "solu-1l-finetune",
504 ],
505 "NeelNanda/SoLU_1L_v9_old": ["solu-1l-pile", "solu-1l-old"],
506 "NeelNanda/SoLU_2L512W_C4_Code": ["solu-2l", "solu-2l-new", "solu-2l-c4-code"],
507 "NeelNanda/SoLU_2L_v10_old": ["solu-2l-pile", "solu-2l-old"],
508 "NeelNanda/SoLU_3L512W_C4_Code": ["solu-3l", "solu-3l-new", "solu-3l-c4-code"],
509 "NeelNanda/SoLU_4L512W_C4_Code": ["solu-4l", "solu-4l-new", "solu-4l-c4-code"],
510 "NeelNanda/SoLU_4L512W_Wiki_Finetune": [
511 "solu-4l-wiki",
512 "solu-4l-wiki-finetune",
513 "solu-4l-finetune",
514 ],
515 "NeelNanda/SoLU_4L_v11_old": ["solu-4l-pile", "solu-4l-old"],
516 "NeelNanda/SoLU_6L768W_C4_Code": ["solu-6l", "solu-6l-new", "solu-6l-c4-code"],
517 "NeelNanda/SoLU_6L_v13_old": ["solu-6l-pile", "solu-6l-old"],
518 "NeelNanda/SoLU_8L1024W_C4_Code": ["solu-8l", "solu-8l-new", "solu-8l-c4-code"],
519 "NeelNanda/SoLU_8L_v21_old": ["solu-8l-pile", "solu-8l-old"],
520 "openai/gpt-oss-20b": ["gpt-oss-20b", "gpt-oss"],
521 "Qwen/Qwen-14B": ["qwen-14b"],
522 "Qwen/Qwen-14B-Chat": ["qwen-14b-chat"],
523 "Qwen/Qwen-1_8B": ["qwen-1.8b"],
524 "Qwen/Qwen-1_8B-Chat": ["qwen-1.8b-chat"],
525 "Qwen/Qwen-7B": ["qwen-7b"],
526 "Qwen/Qwen-7B-Chat": ["qwen-7b-chat"],
527 "Qwen/Qwen1.5-0.5B": ["qwen1.5-0.5b"],
528 "Qwen/Qwen1.5-0.5B-Chat": ["qwen1.5-0.5b-chat"],
529 "Qwen/Qwen1.5-1.8B": ["qwen1.5-1.8b"],
530 "Qwen/Qwen1.5-1.8B-Chat": ["qwen1.5-1.8b-chat"],
531 "Qwen/Qwen1.5-14B": ["qwen1.5-14b"],
532 "Qwen/Qwen1.5-14B-Chat": ["qwen1.5-14b-chat"],
533 "Qwen/Qwen1.5-4B": ["qwen1.5-4b"],
534 "Qwen/Qwen1.5-4B-Chat": ["qwen1.5-4b-chat"],
535 "Qwen/Qwen1.5-7B": ["qwen1.5-7b"],
536 "Qwen/Qwen1.5-7B-Chat": ["qwen1.5-7b-chat"],
537 "Qwen/Qwen2-0.5B": ["qwen2-0.5b"],
538 "Qwen/Qwen2-0.5B-Instruct": ["qwen2-0.5b-instruct"],
539 "Qwen/Qwen2-1.5B": ["qwen2-1.5b"],
540 "Qwen/Qwen2-1.5B-Instruct": ["qwen2-1.5b-instruct"],
541 "Qwen/Qwen2-7B": ["qwen2-7b"],
542 "Qwen/Qwen2-7B-Instruct": ["qwen2-7b-instruct"],
543 "Qwen/Qwen2.5-0.5B": ["qwen2.5-0.5b"],
544 "Qwen/Qwen2.5-0.5B-Instruct": ["qwen2.5-0.5b-instruct"],
545 "Qwen/Qwen2.5-1.5B": ["qwen2.5-1.5b"],
546 "Qwen/Qwen2.5-1.5B-Instruct": ["qwen2.5-1.5b-instruct"],
547 "Qwen/Qwen2.5-14B": ["qwen2.5-14b"],
548 "Qwen/Qwen2.5-14B-Instruct": ["qwen2.5-14b-instruct"],
549 "Qwen/Qwen2.5-32B": ["qwen2.5-32b"],
550 "Qwen/Qwen2.5-32B-Instruct": ["qwen2.5-32b-instruct"],
551 "Qwen/Qwen2.5-3B": ["qwen2.5-3b"],
552 "Qwen/Qwen2.5-3B-Instruct": ["qwen2.5-3b-instruct"],
553 "Qwen/Qwen2.5-72B": ["qwen2.5-72b"],
554 "Qwen/Qwen2.5-72B-Instruct": ["qwen2.5-72b-instruct"],
555 "Qwen/Qwen2.5-7B": ["qwen2.5-7b"],
556 "Qwen/Qwen2.5-7B-Instruct": ["qwen2.5-7b-instruct"],
557 "Qwen/Qwen3-0.6B": ["qwen3-0.6b"],
558 "Qwen/Qwen3-0.6B-Base": ["qwen3-0.6b-base"],
559 "Qwen/Qwen3-1.7B": ["qwen3-1.7b"],
560 "Qwen/Qwen3-14B": ["qwen3-14b"],
561 "Qwen/Qwen3-4B": ["qwen3-4b"],
562 "Qwen/Qwen3-8B": ["qwen3-8b"],
563 "Qwen/QwQ-32B-Preview": ["qwen-32b-preview"],
564 "roneneldan/TinyStories-1Layer-21M": ["tiny-stories-1L-21M"],
565 "roneneldan/TinyStories-1M": ["tiny-stories-1M"],
566 "roneneldan/TinyStories-28M": ["tiny-stories-28M"],
567 "roneneldan/TinyStories-2Layers-33M": ["tiny-stories-2L-33M"],
568 "roneneldan/TinyStories-33M": ["tiny-stories-33M"],
569 "roneneldan/TinyStories-3M": ["tiny-stories-3M"],
570 "roneneldan/TinyStories-8M": ["tiny-stories-8M"],
571 "roneneldan/TinyStories-Instruct-1M": ["tiny-stories-instruct-1M"],
572 "roneneldan/TinyStories-Instruct-28M": ["tiny-stories-instruct-28M"],
573 "roneneldan/TinyStories-Instruct-2Layers-33M": ["tiny-stories-instruct-2L-33M"],
574 "roneneldan/TinyStories-Instruct-33M": ["tiny-stories-instruct-33M"],
575 "roneneldan/TinyStories-Instruct-3M": ["tiny-stories-instruct-3M"],
576 "roneneldan/TinyStories-Instruct-8M": ["tiny-stories-instruct-8M"],
577 "roneneldan/TinyStories-Instuct-1Layer-21M": ["tiny-stories-instruct-1L-21M"],
578 "stabilityai/stablelm-base-alpha-3b": ["stablelm-base-alpha-3b", "stablelm-base-3b"],
579 "stabilityai/stablelm-base-alpha-7b": ["stablelm-base-alpha-7b", "stablelm-base-7b"],
580 "stabilityai/stablelm-tuned-alpha-3b": ["stablelm-tuned-alpha-3b", "stablelm-tuned-3b"],
581 "stabilityai/stablelm-tuned-alpha-7b": ["stablelm-tuned-alpha-7b", "stablelm-tuned-7b"],
582 "stanford-crfm/alias-gpt2-small-x21": [
583 "stanford-gpt2-small-a",
584 "alias-gpt2-small-x21",
585 "gpt2-mistral-small-a",
586 "gpt2-stanford-small-a",
587 ],
588 "stanford-crfm/arwen-gpt2-medium-x21": [
589 "stanford-gpt2-medium-a",
590 "arwen-gpt2-medium-x21",
591 "gpt2-medium-small-a",
592 "gpt2-stanford-medium-a",
593 ],
594 "stanford-crfm/battlestar-gpt2-small-x49": [
595 "stanford-gpt2-small-b",
596 "battlestar-gpt2-small-x49",
597 "gpt2-mistral-small-b",
598 "gpt2-mistral-small-b",
599 ],
600 "stanford-crfm/beren-gpt2-medium-x49": [
601 "stanford-gpt2-medium-b",
602 "beren-gpt2-medium-x49",
603 "gpt2-medium-small-b",
604 "gpt2-stanford-medium-b",
605 ],
606 "stanford-crfm/caprica-gpt2-small-x81": [
607 "stanford-gpt2-small-c",
608 "caprica-gpt2-small-x81",
609 "gpt2-mistral-small-c",
610 "gpt2-stanford-small-c",
611 ],
612 "stanford-crfm/celebrimbor-gpt2-medium-x81": [
613 "stanford-gpt2-medium-c",
614 "celebrimbor-gpt2-medium-x81",
615 "gpt2-medium-small-c",
616 "gpt2-medium-small-c",
617 ],
618 "stanford-crfm/darkmatter-gpt2-small-x343": [
619 "stanford-gpt2-small-d",
620 "darkmatter-gpt2-small-x343",
621 "gpt2-mistral-small-d",
622 "gpt2-mistral-small-d",
623 ],
624 "stanford-crfm/durin-gpt2-medium-x343": [
625 "stanford-gpt2-medium-d",
626 "durin-gpt2-medium-x343",
627 "gpt2-medium-small-d",
628 "gpt2-stanford-medium-d",
629 ],
630 "stanford-crfm/eowyn-gpt2-medium-x777": [
631 "stanford-gpt2-medium-e",
632 "eowyn-gpt2-medium-x777",
633 "gpt2-medium-small-e",
634 "gpt2-stanford-medium-e",
635 ],
636 "stanford-crfm/expanse-gpt2-small-x777": [
637 "stanford-gpt2-small-e",
638 "expanse-gpt2-small-x777",
639 "gpt2-mistral-small-e",
640 "gpt2-mistral-small-e",
641 ],
642 "swiss-ai/Apertus-8B-2509": ["apertus-8b", "apertus"],
643 "swiss-ai/Apertus-8B-Instruct-2509": ["apertus-8b-instruct", "apertus-instruct"],
644}
645"""Model aliases for models on HuggingFace."""
648# Sets a default model alias, by convention the first one in the model alias table, else the official name if it has no aliases
649DEFAULT_MODEL_ALIASES: list[str] = [
650 MODEL_ALIASES[name][0] if name in MODEL_ALIASES else name for name in OFFICIAL_MODEL_NAMES
651]