Coverage for transformer_lens/supported_models.py: 100%

5 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2026-07-01 15:58 +0000

1# HookedTransformers model registry. If you are looking for TransformerBridge's supported models 

2# see transformer_lens/tools/model_registry/data/supported_models.json or the docs page at 

3# https://transformerlensorg.github.io/TransformerLens/generated/transformer_bridge_models.html 

4OFFICIAL_MODEL_NAMES: list[str] = [ 

5 "01-ai/Yi-34B", 

6 "01-ai/Yi-34B-Chat", 

7 "01-ai/Yi-6B", 

8 "01-ai/Yi-6B-Chat", 

9 "ai-forever/mGPT", 

10 "allenai/OLMo-1B-hf", 

11 "allenai/OLMo-2-0425-1B", 

12 "allenai/OLMo-2-1124-7B", 

13 "allenai/Olmo-3-32B-Think", 

14 "allenai/Olmo-3-7B-Instruct", 

15 "allenai/Olmo-3-7B-Think", 

16 "allenai/Olmo-3.1-32B-Instruct", 

17 "allenai/Olmo-3.1-32B-Think", 

18 "allenai/OLMo-7B-hf", 

19 "allenai/OLMoE-1B-7B-0924", 

20 "ArthurConmy/redwood_attn_2l", 

21 "Baidicoot/Othello-GPT-Transformer-Lens", 

22 "bigcode/santacoder", 

23 "bigscience/bloom-1b1", 

24 "bigscience/bloom-1b7", 

25 "bigscience/bloom-3b", 

26 "bigscience/bloom-560m", 

27 "bigscience/bloom-7b1", 

28 "codellama/CodeLlama-7b-hf", 

29 "codellama/CodeLlama-7b-Instruct-hf", 

30 "codellama/CodeLlama-7b-Python-hf", 

31 "distilgpt2", 

32 "EleutherAI/gpt-j-6B", 

33 "EleutherAI/gpt-neo-1.3B", 

34 "EleutherAI/gpt-neo-125M", 

35 "EleutherAI/gpt-neo-2.7B", 

36 "EleutherAI/gpt-neox-20b", 

37 "EleutherAI/pythia-1.4b", 

38 "EleutherAI/pythia-1.4b-deduped", 

39 "EleutherAI/pythia-1.4b-deduped-v0", 

40 "EleutherAI/pythia-1.4b-v0", 

41 "EleutherAI/pythia-12b", 

42 "EleutherAI/pythia-12b-deduped", 

43 "EleutherAI/pythia-12b-deduped-v0", 

44 "EleutherAI/pythia-12b-v0", 

45 "EleutherAI/pythia-14m", 

46 "EleutherAI/pythia-160m", 

47 "EleutherAI/pythia-160m-deduped", 

48 "EleutherAI/pythia-160m-deduped-v0", 

49 "EleutherAI/pythia-160m-seed1", 

50 "EleutherAI/pythia-160m-seed2", 

51 "EleutherAI/pythia-160m-seed3", 

52 "EleutherAI/pythia-160m-v0", 

53 "EleutherAI/pythia-1b", 

54 "EleutherAI/pythia-1b-deduped", 

55 "EleutherAI/pythia-1b-deduped-v0", 

56 "EleutherAI/pythia-1b-v0", 

57 "EleutherAI/pythia-2.8b", 

58 "EleutherAI/pythia-2.8b-deduped", 

59 "EleutherAI/pythia-2.8b-deduped-v0", 

60 "EleutherAI/pythia-2.8b-v0", 

61 "EleutherAI/pythia-31m", 

62 "EleutherAI/pythia-410m", 

63 "EleutherAI/pythia-410m-deduped", 

64 "EleutherAI/pythia-410m-deduped-v0", 

65 "EleutherAI/pythia-410m-v0", 

66 "EleutherAI/pythia-6.9b", 

67 "EleutherAI/pythia-6.9b-deduped", 

68 "EleutherAI/pythia-6.9b-deduped-v0", 

69 "EleutherAI/pythia-6.9b-v0", 

70 "EleutherAI/pythia-70m", 

71 "EleutherAI/pythia-70m-deduped", 

72 "EleutherAI/pythia-70m-deduped-v0", 

73 "EleutherAI/pythia-70m-v0", 

74 "facebook/hubert-base-ls960", 

75 "facebook/opt-1.3b", 

76 "facebook/opt-125m", 

77 "facebook/opt-13b", 

78 "facebook/opt-2.7b", 

79 "facebook/opt-30b", 

80 "facebook/opt-6.7b", 

81 "facebook/opt-66b", 

82 "facebook/wav2vec2-base", 

83 "facebook/wav2vec2-large", 

84 "google-bert/bert-base-cased", 

85 "google-bert/bert-base-uncased", 

86 "google-bert/bert-large-cased", 

87 "google-bert/bert-large-uncased", 

88 "google-t5/t5-base", 

89 "google-t5/t5-large", 

90 "google-t5/t5-small", 

91 "google/gemma-2-27b", 

92 "google/gemma-2-27b-it", 

93 "google/gemma-2-2b", 

94 "google/gemma-2-2b-it", 

95 "google/gemma-2-9b", 

96 "google/gemma-2-9b-it", 

97 "google/gemma-2b", 

98 "google/gemma-2b-it", 

99 "google/gemma-3-12b-it", 

100 "google/gemma-3-12b-pt", 

101 "google/gemma-3-1b-it", 

102 "google/gemma-3-1b-pt", 

103 "google/gemma-3-270m", 

104 "google/gemma-3-270m-it", 

105 "google/gemma-3-27b-it", 

106 "google/gemma-3-27b-pt", 

107 "google/gemma-3-4b-it", 

108 "google/gemma-3-4b-pt", 

109 "google/gemma-7b", 

110 "google/gemma-7b-it", 

111 "google/medgemma-27b-it", 

112 "google/medgemma-27b-text-it", 

113 "google/medgemma-4b-it", 

114 "google/medgemma-4b-pt", 

115 "gpt2", 

116 "gpt2-large", 

117 "gpt2-medium", 

118 "gpt2-xl", 

119 "llama-13b-hf", 

120 "llama-30b-hf", 

121 "llama-65b-hf", 

122 "llama-7b-hf", 

123 "meta-llama/Llama-2-13b-chat-hf", 

124 "meta-llama/Llama-2-13b-hf", 

125 "meta-llama/Llama-2-70b-chat-hf", 

126 "meta-llama/Llama-2-7b-chat-hf", 

127 "meta-llama/Llama-2-7b-hf", 

128 "meta-llama/Llama-3.1-70B", 

129 "meta-llama/Llama-3.1-70B-Instruct", 

130 "meta-llama/Llama-3.1-8B", 

131 "meta-llama/Llama-3.1-8B-Instruct", 

132 "meta-llama/Llama-3.2-1B", 

133 "meta-llama/Llama-3.2-1B-Instruct", 

134 "meta-llama/Llama-3.2-3B", 

135 "meta-llama/Llama-3.2-3B-Instruct", 

136 "meta-llama/Llama-3.3-70B-Instruct", 

137 "meta-llama/Meta-Llama-3-70B", 

138 "meta-llama/Meta-Llama-3-70B-Instruct", 

139 "meta-llama/Meta-Llama-3-8B", 

140 "meta-llama/Meta-Llama-3-8B-Instruct", 

141 "microsoft/phi-1", 

142 "microsoft/phi-1_5", 

143 "microsoft/phi-2", 

144 "microsoft/Phi-3-mini-4k-instruct", 

145 "microsoft/phi-4", 

146 "mistralai/Mistral-7B-Instruct-v0.1", 

147 "mistralai/Mistral-7B-v0.1", 

148 "mistralai/Mistral-Nemo-Base-2407", 

149 "mistralai/Mistral-Small-24B-Base-2501", 

150 "mistralai/Mixtral-8x7B-Instruct-v0.1", 

151 "mistralai/Mixtral-8x7B-v0.1", 

152 "NeelNanda/Attn-Only-2L512W-Shortformer-6B-big-lr", 

153 "NeelNanda/Attn_Only_1L512W_C4_Code", 

154 "NeelNanda/Attn_Only_2L512W_C4_Code", 

155 "NeelNanda/Attn_Only_3L512W_C4_Code", 

156 "NeelNanda/Attn_Only_4L512W_C4_Code", 

157 "NeelNanda/GELU_1L512W_C4_Code", 

158 "NeelNanda/GELU_2L512W_C4_Code", 

159 "NeelNanda/GELU_3L512W_C4_Code", 

160 "NeelNanda/GELU_4L512W_C4_Code", 

161 "NeelNanda/SoLU_10L1280W_C4_Code", 

162 "NeelNanda/SoLU_10L_v22_old", 

163 "NeelNanda/SoLU_12L1536W_C4_Code", 

164 "NeelNanda/SoLU_12L_v23_old", 

165 "NeelNanda/SoLU_1L512W_C4_Code", 

166 "NeelNanda/SoLU_1L512W_Wiki_Finetune", 

167 "NeelNanda/SoLU_1L_v9_old", 

168 "NeelNanda/SoLU_2L512W_C4_Code", 

169 "NeelNanda/SoLU_2L_v10_old", 

170 "NeelNanda/SoLU_3L512W_C4_Code", 

171 "NeelNanda/SoLU_4L512W_C4_Code", 

172 "NeelNanda/SoLU_4L512W_Wiki_Finetune", 

173 "NeelNanda/SoLU_4L_v11_old", 

174 "NeelNanda/SoLU_6L768W_C4_Code", 

175 "NeelNanda/SoLU_6L_v13_old", 

176 "NeelNanda/SoLU_8L1024W_C4_Code", 

177 "NeelNanda/SoLU_8L_v21_old", 

178 "openai/gpt-oss-20b", 

179 "Qwen/Qwen-14B", 

180 "Qwen/Qwen-14B-Chat", 

181 "Qwen/Qwen-1_8B", 

182 "Qwen/Qwen-1_8B-Chat", 

183 "Qwen/Qwen-7B", 

184 "Qwen/Qwen-7B-Chat", 

185 "Qwen/Qwen1.5-0.5B", 

186 "Qwen/Qwen1.5-0.5B-Chat", 

187 "Qwen/Qwen1.5-1.8B", 

188 "Qwen/Qwen1.5-1.8B-Chat", 

189 "Qwen/Qwen1.5-14B", 

190 "Qwen/Qwen1.5-14B-Chat", 

191 "Qwen/Qwen1.5-4B", 

192 "Qwen/Qwen1.5-4B-Chat", 

193 "Qwen/Qwen1.5-7B", 

194 "Qwen/Qwen1.5-7B-Chat", 

195 "Qwen/Qwen2-0.5B", 

196 "Qwen/Qwen2-0.5B-Instruct", 

197 "Qwen/Qwen2-1.5B", 

198 "Qwen/Qwen2-1.5B-Instruct", 

199 "Qwen/Qwen2-7B", 

200 "Qwen/Qwen2-7B-Instruct", 

201 "Qwen/Qwen2.5-0.5B", 

202 "Qwen/Qwen2.5-0.5B-Instruct", 

203 "Qwen/Qwen2.5-1.5B", 

204 "Qwen/Qwen2.5-1.5B-Instruct", 

205 "Qwen/Qwen2.5-14B", 

206 "Qwen/Qwen2.5-14B-Instruct", 

207 "Qwen/Qwen2.5-32B", 

208 "Qwen/Qwen2.5-32B-Instruct", 

209 "Qwen/Qwen2.5-3B", 

210 "Qwen/Qwen2.5-3B-Instruct", 

211 "Qwen/Qwen2.5-72B", 

212 "Qwen/Qwen2.5-72B-Instruct", 

213 "Qwen/Qwen2.5-7B", 

214 "Qwen/Qwen2.5-7B-Instruct", 

215 "Qwen/Qwen3-0.6B", 

216 "Qwen/Qwen3-0.6B-Base", 

217 "Qwen/Qwen3-1.7B", 

218 "Qwen/Qwen3-14B", 

219 "Qwen/Qwen3-4B", 

220 "Qwen/Qwen3-8B", 

221 "Qwen/QwQ-32B-Preview", 

222 "roneneldan/TinyStories-1Layer-21M", 

223 "roneneldan/TinyStories-1M", 

224 "roneneldan/TinyStories-28M", 

225 "roneneldan/TinyStories-2Layers-33M", 

226 "roneneldan/TinyStories-33M", 

227 "roneneldan/TinyStories-3M", 

228 "roneneldan/TinyStories-8M", 

229 "roneneldan/TinyStories-Instruct-1M", 

230 "roneneldan/TinyStories-Instruct-28M", 

231 "roneneldan/TinyStories-Instruct-2Layers-33M", 

232 "roneneldan/TinyStories-Instruct-33M", 

233 "roneneldan/TinyStories-Instruct-3M", 

234 "roneneldan/TinyStories-Instruct-8M", 

235 "roneneldan/TinyStories-Instuct-1Layer-21M", 

236 "stabilityai/stablelm-base-alpha-3b", 

237 "stabilityai/stablelm-base-alpha-7b", 

238 "stabilityai/stablelm-tuned-alpha-3b", 

239 "stabilityai/stablelm-tuned-alpha-7b", 

240 "stanford-crfm/alias-gpt2-small-x21", 

241 "stanford-crfm/arwen-gpt2-medium-x21", 

242 "stanford-crfm/battlestar-gpt2-small-x49", 

243 "stanford-crfm/beren-gpt2-medium-x49", 

244 "stanford-crfm/caprica-gpt2-small-x81", 

245 "stanford-crfm/celebrimbor-gpt2-medium-x81", 

246 "stanford-crfm/darkmatter-gpt2-small-x343", 

247 "stanford-crfm/durin-gpt2-medium-x343", 

248 "stanford-crfm/eowyn-gpt2-medium-x777", 

249 "stanford-crfm/expanse-gpt2-small-x777", 

250 "swiss-ai/Apertus-8B-2509", 

251 "swiss-ai/Apertus-8B-Instruct-2509", 

252] 

253"""Official model names for models on HuggingFace.""" 

254 

255# Model Aliases: 

256MODEL_ALIASES: dict[str, list[str]] = { 

257 "01-ai/Yi-34B": ["yi-34b", "Yi-34B"], 

258 "01-ai/Yi-34B-Chat": ["yi-34b-chat", "Yi-34B-Chat"], 

259 "01-ai/Yi-6B": ["yi-6b", "Yi-6B"], 

260 "01-ai/Yi-6B-Chat": ["yi-6b-chat", "Yi-6B-Chat"], 

261 "ai-forever/mGPT": ["mGPT"], 

262 "allenai/OLMo-1B-hf": ["olmo-1b"], 

263 "allenai/OLMo-2-0425-1B": ["olmo-2-1b"], 

264 "allenai/OLMo-2-1124-7B": ["olmo-2-7b"], 

265 "allenai/Olmo-3-32B-Think": ["olmo-3-32b-think"], 

266 "allenai/Olmo-3-7B-Instruct": ["olmo-3-7b-instruct"], 

267 "allenai/Olmo-3-7B-Think": ["olmo-3-7b-think"], 

268 "allenai/Olmo-3.1-32B-Instruct": ["olmo-3.1-32b-instruct"], 

269 "allenai/Olmo-3.1-32B-Think": ["olmo-3.1-32b-think"], 

270 "allenai/OLMo-7B-hf": ["olmo-7b"], 

271 "allenai/OLMoE-1B-7B-0924": ["olmoe"], 

272 "ArthurConmy/redwood_attn_2l": ["redwood_attn_2l"], 

273 "Baidicoot/Othello-GPT-Transformer-Lens": ["othello-gpt"], 

274 "bigcode/santacoder": ["santacoder"], 

275 "bigscience/bloom-1b1": ["bloom-1b1"], 

276 "bigscience/bloom-1b7": ["bloom-1b7"], 

277 "bigscience/bloom-3b": ["bloom-3b"], 

278 "bigscience/bloom-560m": ["bloom-560m"], 

279 "bigscience/bloom-7b1": ["bloom-7b1"], 

280 "codellama/CodeLlama-7b-hf": ["CodeLlamallama-2-7b"], 

281 "codellama/CodeLlama-7b-Instruct-hf": ["CodeLlama-7b-instruct"], 

282 "codellama/CodeLlama-7b-Python-hf": ["CodeLlama-7b-python"], 

283 "distilgpt2": ["distillgpt2", "distill-gpt2", "distil-gpt2", "gpt2-xs"], 

284 "EleutherAI/gpt-j-6B": ["gpt-j-6B", "gpt-j", "gptj"], 

285 "EleutherAI/gpt-neo-1.3B": ["gpt-neo-1.3B", "gpt-neo-medium", "neo-medium"], 

286 "EleutherAI/gpt-neo-125M": ["gpt-neo-125M", "gpt-neo-small", "neo-small", "neo"], 

287 "EleutherAI/gpt-neo-2.7B": ["gpt-neo-2.7B", "gpt-neo-large", "neo-large"], 

288 "EleutherAI/gpt-neox-20b": ["gpt-neox-20b", "gpt-neox", "neox"], 

289 "EleutherAI/pythia-1.4b": ["pythia-1.4b", "EleutherAI/pythia-1.3b", "pythia-1.3b"], 

290 "EleutherAI/pythia-1.4b-deduped": [ 

291 "pythia-1.4b-deduped", 

292 "EleutherAI/pythia-1.3b-deduped", 

293 "pythia-1.3b-deduped", 

294 ], 

295 "EleutherAI/pythia-1.4b-deduped-v0": [ 

296 "pythia-1.4b-deduped-v0", 

297 "EleutherAI/pythia-1.3b-deduped-v0", 

298 "pythia-1.3b-deduped-v0", 

299 ], 

300 "EleutherAI/pythia-1.4b-v0": ["pythia-1.4b-v0", "EleutherAI/pythia-1.3b-v0", "pythia-1.3b-v0"], 

301 "EleutherAI/pythia-12b": ["pythia-12b", "EleutherAI/pythia-13b", "pythia-13b"], 

302 "EleutherAI/pythia-12b-deduped": [ 

303 "pythia-12b-deduped", 

304 "EleutherAI/pythia-13b-deduped", 

305 "pythia-13b-deduped", 

306 ], 

307 "EleutherAI/pythia-12b-deduped-v0": [ 

308 "pythia-12b-deduped-v0", 

309 "EleutherAI/pythia-13b-deduped-v0", 

310 "pythia-13b-deduped-v0", 

311 ], 

312 "EleutherAI/pythia-12b-v0": ["pythia-12b-v0", "EleutherAI/pythia-13b-v0", "pythia-13b-v0"], 

313 "EleutherAI/pythia-14m": ["pythia-14m"], 

314 "EleutherAI/pythia-160m": ["pythia-160m", "EleutherAI/pythia-125m", "pythia-125m"], 

315 "EleutherAI/pythia-160m-deduped": [ 

316 "pythia-160m-deduped", 

317 "EleutherAI/pythia-125m-deduped", 

318 "pythia-125m-deduped", 

319 ], 

320 "EleutherAI/pythia-160m-deduped-v0": [ 

321 "pythia-160m-deduped-v0", 

322 "EleutherAI/pythia-125m-deduped-v0", 

323 "pythia-125m-deduped-v0", 

324 ], 

325 "EleutherAI/pythia-160m-seed1": [ 

326 "pythia-160m-seed1", 

327 "EleutherAI/pythia-125m-seed1", 

328 "pythia-125m-seed1", 

329 ], 

330 "EleutherAI/pythia-160m-seed2": [ 

331 "pythia-160m-seed2", 

332 "EleutherAI/pythia-125m-seed2", 

333 "pythia-125m-seed2", 

334 ], 

335 "EleutherAI/pythia-160m-seed3": [ 

336 "pythia-160m-seed3", 

337 "EleutherAI/pythia-125m-seed3", 

338 "pythia-125m-seed3", 

339 ], 

340 "EleutherAI/pythia-160m-v0": ["pythia-160m-v0", "EleutherAI/pythia-125m-v0", "pythia-125m-v0"], 

341 "EleutherAI/pythia-1b": ["pythia-1b", "EleutherAI/pythia-800m", "pythia-800m"], 

342 "EleutherAI/pythia-1b-deduped": [ 

343 "pythia-1b-deduped", 

344 "EleutherAI/pythia-800m-deduped", 

345 "pythia-800m-deduped", 

346 ], 

347 "EleutherAI/pythia-1b-deduped-v0": [ 

348 "pythia-1b-deduped-v0", 

349 "EleutherAI/pythia-800m-deduped-v0", 

350 "pythia-800m-deduped-v0", 

351 ], 

352 "EleutherAI/pythia-1b-v0": ["pythia-1b-v0", "EleutherAI/pythia-800m-v0", "pythia-800m-v0"], 

353 "EleutherAI/pythia-2.8b": ["pythia-2.8b", "EleutherAI/pythia-2.7b", "pythia-2.7b"], 

354 "EleutherAI/pythia-2.8b-deduped": [ 

355 "pythia-2.8b-deduped", 

356 "EleutherAI/pythia-2.7b-deduped", 

357 "pythia-2.7b-deduped", 

358 ], 

359 "EleutherAI/pythia-2.8b-deduped-v0": [ 

360 "pythia-2.8b-deduped-v0", 

361 "EleutherAI/pythia-2.7b-deduped-v0", 

362 "pythia-2.7b-deduped-v0", 

363 ], 

364 "EleutherAI/pythia-2.8b-v0": ["pythia-2.8b-v0", "EleutherAI/pythia-2.7b-v0", "pythia-2.7b-v0"], 

365 "EleutherAI/pythia-31m": ["pythia-31m"], 

366 "EleutherAI/pythia-410m": ["pythia-410m", "EleutherAI/pythia-350m", "pythia-350m"], 

367 "EleutherAI/pythia-410m-deduped": [ 

368 "pythia-410m-deduped", 

369 "EleutherAI/pythia-350m-deduped", 

370 "pythia-350m-deduped", 

371 ], 

372 "EleutherAI/pythia-410m-deduped-v0": [ 

373 "pythia-410m-deduped-v0", 

374 "EleutherAI/pythia-350m-deduped-v0", 

375 "pythia-350m-deduped-v0", 

376 ], 

377 "EleutherAI/pythia-410m-v0": ["pythia-410m-v0", "EleutherAI/pythia-350m-v0", "pythia-350m-v0"], 

378 "EleutherAI/pythia-6.9b": ["pythia-6.9b", "EleutherAI/pythia-6.7b", "pythia-6.7b"], 

379 "EleutherAI/pythia-6.9b-deduped": [ 

380 "pythia-6.9b-deduped", 

381 "EleutherAI/pythia-6.7b-deduped", 

382 "pythia-6.7b-deduped", 

383 ], 

384 "EleutherAI/pythia-6.9b-deduped-v0": [ 

385 "pythia-6.9b-deduped-v0", 

386 "EleutherAI/pythia-6.7b-deduped-v0", 

387 "pythia-6.7b-deduped-v0", 

388 ], 

389 "EleutherAI/pythia-6.9b-v0": ["pythia-6.9b-v0", "EleutherAI/pythia-6.7b-v0", "pythia-6.7b-v0"], 

390 "EleutherAI/pythia-70m": ["pythia-70m", "pythia", "EleutherAI/pythia-19m", "pythia-19m"], 

391 "EleutherAI/pythia-70m-deduped": [ 

392 "pythia-70m-deduped", 

393 "EleutherAI/pythia-19m-deduped", 

394 "pythia-19m-deduped", 

395 ], 

396 "EleutherAI/pythia-70m-deduped-v0": [ 

397 "pythia-70m-deduped-v0", 

398 "EleutherAI/pythia-19m-deduped-v0", 

399 "pythia-19m-deduped-v0", 

400 ], 

401 "EleutherAI/pythia-70m-v0": [ 

402 "pythia-70m-v0", 

403 "pythia-v0", 

404 "EleutherAI/pythia-19m-v0", 

405 "pythia-19m-v0", 

406 ], 

407 "facebook/hubert-base-ls960": ["hubert-base-ls960"], 

408 "facebook/opt-1.3b": ["opt-1.3b", "opt-medium"], 

409 "facebook/opt-125m": ["opt-125m", "opt-small", "opt"], 

410 "facebook/opt-13b": ["opt-13b", "opt-xxl"], 

411 "facebook/opt-2.7b": ["opt-2.7b", "opt-large"], 

412 "facebook/opt-30b": ["opt-30b", "opt-xxxl"], 

413 "facebook/opt-6.7b": ["opt-6.7b", "opt-xl"], 

414 "facebook/opt-66b": ["opt-66b", "opt-xxxxl"], 

415 "facebook/wav2vec2-base": ["wav2vec2-base", "w2v2-base"], 

416 "facebook/wav2vec2-large": ["wav2vec2-large", "w2v2-large"], 

417 "google-bert/bert-base-cased": ["bert-base-cased"], 

418 "google-bert/bert-base-uncased": ["bert-base-uncased"], 

419 "google-bert/bert-large-cased": ["bert-large-cased"], 

420 "google-bert/bert-large-uncased": ["bert-large-uncased"], 

421 "google-t5/t5-base": ["t5-base"], 

422 "google-t5/t5-large": ["t5-large"], 

423 "google-t5/t5-small": ["t5-small"], 

424 "google/gemma-2-27b": ["gemma-2-27b"], 

425 "google/gemma-2-27b-it": ["gemma-2-27b-it"], 

426 "google/gemma-2-2b": ["gemma-2-2b"], 

427 "google/gemma-2-2b-it": ["gemma-2-2b-it"], 

428 "google/gemma-2-9b": ["gemma-2-9b"], 

429 "google/gemma-2-9b-it": ["gemma-2-9b-it"], 

430 "google/gemma-2b": ["gemma-2b"], 

431 "google/gemma-2b-it": ["gemma-2b-it"], 

432 "google/gemma-3-12b-it": ["gemma-3-12b-it"], 

433 "google/gemma-3-12b-pt": ["gemma-3-12b-pt"], 

434 "google/gemma-3-1b-it": ["gemma-3-1b-it"], 

435 "google/gemma-3-1b-pt": ["gemma-3-1b-pt"], 

436 "google/gemma-3-270m": ["gemma-3-270m"], 

437 "google/gemma-3-270m-it": ["gemma-3-270m-it"], 

438 "google/gemma-3-27b-it": ["gemma-3-27b-it"], 

439 "google/gemma-3-27b-pt": ["gemma-3-27b-pt"], 

440 "google/gemma-3-4b-it": ["gemma-3-4b-it"], 

441 "google/gemma-3-4b-pt": ["gemma-3-4b-pt"], 

442 "google/gemma-7b": ["gemma-7b"], 

443 "google/gemma-7b-it": ["gemma-7b-it"], 

444 "google/medgemma-27b-it": ["medgemma-27b-it"], 

445 "google/medgemma-27b-text-it": ["medgemma-27b-text-it"], 

446 "google/medgemma-4b-it": ["medgemma-4b-it"], 

447 "google/medgemma-4b-pt": ["medgemma-4b-pt"], 

448 "gpt2": ["gpt2-small"], 

449 "llama-13b-hf": ["llama-13b"], 

450 "llama-30b-hf": ["llama-30b"], 

451 "llama-65b-hf": ["llama-65b"], 

452 "llama-7b-hf": ["llama-7b"], 

453 "meta-llama/Llama-2-13b-chat-hf": ["Llama-2-13b-chat"], 

454 "meta-llama/Llama-2-13b-hf": ["Llama-2-13b"], 

455 "meta-llama/Llama-2-70b-chat-hf": ["Llama-2-70b-chat", "meta-llama-2-70b-chat-hf"], 

456 "meta-llama/Llama-2-7b-chat-hf": ["Llama-2-7b-chat"], 

457 "meta-llama/Llama-2-7b-hf": ["Llama-2-7b"], 

458 "microsoft/phi-1": ["phi-1"], 

459 "microsoft/phi-1_5": ["phi-1_5"], 

460 "microsoft/phi-2": ["phi-2"], 

461 "microsoft/Phi-3-mini-4k-instruct": ["phi-3"], 

462 "microsoft/phi-4": ["phi-4"], 

463 "mistralai/Mistral-7B-Instruct-v0.1": ["mistral-7b-instruct"], 

464 "mistralai/Mistral-7B-v0.1": ["mistral-7b"], 

465 "mistralai/Mistral-Nemo-Base-2407": ["mistral-nemo-base-2407"], 

466 "mistralai/Mixtral-8x7B-Instruct-v0.1": ["mixtral-instruct", "mixtral-8x7b-instruct"], 

467 "mistralai/Mixtral-8x7B-v0.1": ["mixtral", "mixtral-8x7b"], 

468 "NeelNanda/Attn-Only-2L512W-Shortformer-6B-big-lr": [ 

469 "attn-only-2l-demo", 

470 "attn-only-2l-shortformer-6b-big-lr", 

471 "attn-only-2l-induction-demo", 

472 "attn-only-demo", 

473 ], 

474 "NeelNanda/Attn_Only_1L512W_C4_Code": [ 

475 "attn-only-1l", 

476 "attn-only-1l-new", 

477 "attn-only-1l-c4-code", 

478 ], 

479 "NeelNanda/Attn_Only_2L512W_C4_Code": [ 

480 "attn-only-2l", 

481 "attn-only-2l-new", 

482 "attn-only-2l-c4-code", 

483 ], 

484 "NeelNanda/Attn_Only_3L512W_C4_Code": [ 

485 "attn-only-3l", 

486 "attn-only-3l-new", 

487 "attn-only-3l-c4-code", 

488 ], 

489 "NeelNanda/Attn_Only_4L512W_C4_Code": [ 

490 "attn-only-4l", 

491 "attn-only-4l-new", 

492 "attn-only-4l-c4-code", 

493 ], 

494 "NeelNanda/GELU_1L512W_C4_Code": ["gelu-1l", "gelu-1l-new", "gelu-1l-c4-code"], 

495 "NeelNanda/GELU_2L512W_C4_Code": ["gelu-2l", "gelu-2l-new", "gelu-2l-c4-code"], 

496 "NeelNanda/GELU_3L512W_C4_Code": ["gelu-3l", "gelu-3l-new", "gelu-3l-c4-code"], 

497 "NeelNanda/GELU_4L512W_C4_Code": ["gelu-4l", "gelu-4l-new", "gelu-4l-c4-code"], 

498 "NeelNanda/SoLU_10L1280W_C4_Code": ["solu-10l", "solu-10l-new", "solu-10l-c4-code"], 

499 "NeelNanda/SoLU_10L_v22_old": ["solu-10l-pile", "solu-10l-old"], 

500 "NeelNanda/SoLU_12L1536W_C4_Code": ["solu-12l", "solu-12l-new", "solu-12l-c4-code"], 

501 "NeelNanda/SoLU_12L_v23_old": ["solu-12l-pile", "solu-12l-old"], 

502 "NeelNanda/SoLU_1L512W_C4_Code": ["solu-1l", "solu-1l-new", "solu-1l-c4-code"], 

503 "NeelNanda/SoLU_1L512W_Wiki_Finetune": [ 

504 "solu-1l-wiki", 

505 "solu-1l-wiki-finetune", 

506 "solu-1l-finetune", 

507 ], 

508 "NeelNanda/SoLU_1L_v9_old": ["solu-1l-pile", "solu-1l-old"], 

509 "NeelNanda/SoLU_2L512W_C4_Code": ["solu-2l", "solu-2l-new", "solu-2l-c4-code"], 

510 "NeelNanda/SoLU_2L_v10_old": ["solu-2l-pile", "solu-2l-old"], 

511 "NeelNanda/SoLU_3L512W_C4_Code": ["solu-3l", "solu-3l-new", "solu-3l-c4-code"], 

512 "NeelNanda/SoLU_4L512W_C4_Code": ["solu-4l", "solu-4l-new", "solu-4l-c4-code"], 

513 "NeelNanda/SoLU_4L512W_Wiki_Finetune": [ 

514 "solu-4l-wiki", 

515 "solu-4l-wiki-finetune", 

516 "solu-4l-finetune", 

517 ], 

518 "NeelNanda/SoLU_4L_v11_old": ["solu-4l-pile", "solu-4l-old"], 

519 "NeelNanda/SoLU_6L768W_C4_Code": ["solu-6l", "solu-6l-new", "solu-6l-c4-code"], 

520 "NeelNanda/SoLU_6L_v13_old": ["solu-6l-pile", "solu-6l-old"], 

521 "NeelNanda/SoLU_8L1024W_C4_Code": ["solu-8l", "solu-8l-new", "solu-8l-c4-code"], 

522 "NeelNanda/SoLU_8L_v21_old": ["solu-8l-pile", "solu-8l-old"], 

523 "openai/gpt-oss-20b": ["gpt-oss-20b", "gpt-oss"], 

524 "Qwen/Qwen-14B": ["qwen-14b"], 

525 "Qwen/Qwen-14B-Chat": ["qwen-14b-chat"], 

526 "Qwen/Qwen-1_8B": ["qwen-1.8b"], 

527 "Qwen/Qwen-1_8B-Chat": ["qwen-1.8b-chat"], 

528 "Qwen/Qwen-7B": ["qwen-7b"], 

529 "Qwen/Qwen-7B-Chat": ["qwen-7b-chat"], 

530 "Qwen/Qwen1.5-0.5B": ["qwen1.5-0.5b"], 

531 "Qwen/Qwen1.5-0.5B-Chat": ["qwen1.5-0.5b-chat"], 

532 "Qwen/Qwen1.5-1.8B": ["qwen1.5-1.8b"], 

533 "Qwen/Qwen1.5-1.8B-Chat": ["qwen1.5-1.8b-chat"], 

534 "Qwen/Qwen1.5-14B": ["qwen1.5-14b"], 

535 "Qwen/Qwen1.5-14B-Chat": ["qwen1.5-14b-chat"], 

536 "Qwen/Qwen1.5-4B": ["qwen1.5-4b"], 

537 "Qwen/Qwen1.5-4B-Chat": ["qwen1.5-4b-chat"], 

538 "Qwen/Qwen1.5-7B": ["qwen1.5-7b"], 

539 "Qwen/Qwen1.5-7B-Chat": ["qwen1.5-7b-chat"], 

540 "Qwen/Qwen2-0.5B": ["qwen2-0.5b"], 

541 "Qwen/Qwen2-0.5B-Instruct": ["qwen2-0.5b-instruct"], 

542 "Qwen/Qwen2-1.5B": ["qwen2-1.5b"], 

543 "Qwen/Qwen2-1.5B-Instruct": ["qwen2-1.5b-instruct"], 

544 "Qwen/Qwen2-7B": ["qwen2-7b"], 

545 "Qwen/Qwen2-7B-Instruct": ["qwen2-7b-instruct"], 

546 "Qwen/Qwen2.5-0.5B": ["qwen2.5-0.5b"], 

547 "Qwen/Qwen2.5-0.5B-Instruct": ["qwen2.5-0.5b-instruct"], 

548 "Qwen/Qwen2.5-1.5B": ["qwen2.5-1.5b"], 

549 "Qwen/Qwen2.5-1.5B-Instruct": ["qwen2.5-1.5b-instruct"], 

550 "Qwen/Qwen2.5-14B": ["qwen2.5-14b"], 

551 "Qwen/Qwen2.5-14B-Instruct": ["qwen2.5-14b-instruct"], 

552 "Qwen/Qwen2.5-32B": ["qwen2.5-32b"], 

553 "Qwen/Qwen2.5-32B-Instruct": ["qwen2.5-32b-instruct"], 

554 "Qwen/Qwen2.5-3B": ["qwen2.5-3b"], 

555 "Qwen/Qwen2.5-3B-Instruct": ["qwen2.5-3b-instruct"], 

556 "Qwen/Qwen2.5-72B": ["qwen2.5-72b"], 

557 "Qwen/Qwen2.5-72B-Instruct": ["qwen2.5-72b-instruct"], 

558 "Qwen/Qwen2.5-7B": ["qwen2.5-7b"], 

559 "Qwen/Qwen2.5-7B-Instruct": ["qwen2.5-7b-instruct"], 

560 "Qwen/Qwen3-0.6B": ["qwen3-0.6b"], 

561 "Qwen/Qwen3-0.6B-Base": ["qwen3-0.6b-base"], 

562 "Qwen/Qwen3-1.7B": ["qwen3-1.7b"], 

563 "Qwen/Qwen3-14B": ["qwen3-14b"], 

564 "Qwen/Qwen3-4B": ["qwen3-4b"], 

565 "Qwen/Qwen3-8B": ["qwen3-8b"], 

566 "Qwen/QwQ-32B-Preview": ["qwen-32b-preview"], 

567 "roneneldan/TinyStories-1Layer-21M": ["tiny-stories-1L-21M"], 

568 "roneneldan/TinyStories-1M": ["tiny-stories-1M"], 

569 "roneneldan/TinyStories-28M": ["tiny-stories-28M"], 

570 "roneneldan/TinyStories-2Layers-33M": ["tiny-stories-2L-33M"], 

571 "roneneldan/TinyStories-33M": ["tiny-stories-33M"], 

572 "roneneldan/TinyStories-3M": ["tiny-stories-3M"], 

573 "roneneldan/TinyStories-8M": ["tiny-stories-8M"], 

574 "roneneldan/TinyStories-Instruct-1M": ["tiny-stories-instruct-1M"], 

575 "roneneldan/TinyStories-Instruct-28M": ["tiny-stories-instruct-28M"], 

576 "roneneldan/TinyStories-Instruct-2Layers-33M": ["tiny-stories-instruct-2L-33M"], 

577 "roneneldan/TinyStories-Instruct-33M": ["tiny-stories-instruct-33M"], 

578 "roneneldan/TinyStories-Instruct-3M": ["tiny-stories-instruct-3M"], 

579 "roneneldan/TinyStories-Instruct-8M": ["tiny-stories-instruct-8M"], 

580 "roneneldan/TinyStories-Instuct-1Layer-21M": ["tiny-stories-instruct-1L-21M"], 

581 "stabilityai/stablelm-base-alpha-3b": ["stablelm-base-alpha-3b", "stablelm-base-3b"], 

582 "stabilityai/stablelm-base-alpha-7b": ["stablelm-base-alpha-7b", "stablelm-base-7b"], 

583 "stabilityai/stablelm-tuned-alpha-3b": ["stablelm-tuned-alpha-3b", "stablelm-tuned-3b"], 

584 "stabilityai/stablelm-tuned-alpha-7b": ["stablelm-tuned-alpha-7b", "stablelm-tuned-7b"], 

585 "stanford-crfm/alias-gpt2-small-x21": [ 

586 "stanford-gpt2-small-a", 

587 "alias-gpt2-small-x21", 

588 "gpt2-mistral-small-a", 

589 "gpt2-stanford-small-a", 

590 ], 

591 "stanford-crfm/arwen-gpt2-medium-x21": [ 

592 "stanford-gpt2-medium-a", 

593 "arwen-gpt2-medium-x21", 

594 "gpt2-medium-small-a", 

595 "gpt2-stanford-medium-a", 

596 ], 

597 "stanford-crfm/battlestar-gpt2-small-x49": [ 

598 "stanford-gpt2-small-b", 

599 "battlestar-gpt2-small-x49", 

600 "gpt2-mistral-small-b", 

601 "gpt2-mistral-small-b", 

602 ], 

603 "stanford-crfm/beren-gpt2-medium-x49": [ 

604 "stanford-gpt2-medium-b", 

605 "beren-gpt2-medium-x49", 

606 "gpt2-medium-small-b", 

607 "gpt2-stanford-medium-b", 

608 ], 

609 "stanford-crfm/caprica-gpt2-small-x81": [ 

610 "stanford-gpt2-small-c", 

611 "caprica-gpt2-small-x81", 

612 "gpt2-mistral-small-c", 

613 "gpt2-stanford-small-c", 

614 ], 

615 "stanford-crfm/celebrimbor-gpt2-medium-x81": [ 

616 "stanford-gpt2-medium-c", 

617 "celebrimbor-gpt2-medium-x81", 

618 "gpt2-medium-small-c", 

619 "gpt2-medium-small-c", 

620 ], 

621 "stanford-crfm/darkmatter-gpt2-small-x343": [ 

622 "stanford-gpt2-small-d", 

623 "darkmatter-gpt2-small-x343", 

624 "gpt2-mistral-small-d", 

625 "gpt2-mistral-small-d", 

626 ], 

627 "stanford-crfm/durin-gpt2-medium-x343": [ 

628 "stanford-gpt2-medium-d", 

629 "durin-gpt2-medium-x343", 

630 "gpt2-medium-small-d", 

631 "gpt2-stanford-medium-d", 

632 ], 

633 "stanford-crfm/eowyn-gpt2-medium-x777": [ 

634 "stanford-gpt2-medium-e", 

635 "eowyn-gpt2-medium-x777", 

636 "gpt2-medium-small-e", 

637 "gpt2-stanford-medium-e", 

638 ], 

639 "stanford-crfm/expanse-gpt2-small-x777": [ 

640 "stanford-gpt2-small-e", 

641 "expanse-gpt2-small-x777", 

642 "gpt2-mistral-small-e", 

643 "gpt2-mistral-small-e", 

644 ], 

645 "swiss-ai/Apertus-8B-2509": ["apertus-8b", "apertus"], 

646 "swiss-ai/Apertus-8B-Instruct-2509": ["apertus-8b-instruct", "apertus-instruct"], 

647} 

648"""Model aliases for models on HuggingFace.""" 

649 

650 

651# Sets a default model alias, by convention the first one in the model alias table, else the official name if it has no aliases 

652DEFAULT_MODEL_ALIASES: list[str] = [ 

653 MODEL_ALIASES[name][0] if name in MODEL_ALIASES else name for name in OFFICIAL_MODEL_NAMES 

654]