Coverage for transformer_lens/supported_models.py: 100%

5 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2026-04-30 01:33 +0000

1OFFICIAL_MODEL_NAMES: list[str] = [ 

2 "01-ai/Yi-34B", 

3 "01-ai/Yi-34B-Chat", 

4 "01-ai/Yi-6B", 

5 "01-ai/Yi-6B-Chat", 

6 "ai-forever/mGPT", 

7 "allenai/OLMo-1B-hf", 

8 "allenai/OLMo-2-0425-1B", 

9 "allenai/OLMo-2-1124-7B", 

10 "allenai/Olmo-3-32B-Think", 

11 "allenai/Olmo-3-7B-Instruct", 

12 "allenai/Olmo-3-7B-Think", 

13 "allenai/Olmo-3.1-32B-Instruct", 

14 "allenai/Olmo-3.1-32B-Think", 

15 "allenai/OLMo-7B-hf", 

16 "allenai/OLMoE-1B-7B-0924", 

17 "ArthurConmy/redwood_attn_2l", 

18 "Baidicoot/Othello-GPT-Transformer-Lens", 

19 "bigcode/santacoder", 

20 "bigscience/bloom-1b1", 

21 "bigscience/bloom-1b7", 

22 "bigscience/bloom-3b", 

23 "bigscience/bloom-560m", 

24 "bigscience/bloom-7b1", 

25 "codellama/CodeLlama-7b-hf", 

26 "codellama/CodeLlama-7b-Instruct-hf", 

27 "codellama/CodeLlama-7b-Python-hf", 

28 "distilgpt2", 

29 "EleutherAI/gpt-j-6B", 

30 "EleutherAI/gpt-neo-1.3B", 

31 "EleutherAI/gpt-neo-125M", 

32 "EleutherAI/gpt-neo-2.7B", 

33 "EleutherAI/gpt-neox-20b", 

34 "EleutherAI/pythia-1.4b", 

35 "EleutherAI/pythia-1.4b-deduped", 

36 "EleutherAI/pythia-1.4b-deduped-v0", 

37 "EleutherAI/pythia-1.4b-v0", 

38 "EleutherAI/pythia-12b", 

39 "EleutherAI/pythia-12b-deduped", 

40 "EleutherAI/pythia-12b-deduped-v0", 

41 "EleutherAI/pythia-12b-v0", 

42 "EleutherAI/pythia-14m", 

43 "EleutherAI/pythia-160m", 

44 "EleutherAI/pythia-160m-deduped", 

45 "EleutherAI/pythia-160m-deduped-v0", 

46 "EleutherAI/pythia-160m-seed1", 

47 "EleutherAI/pythia-160m-seed2", 

48 "EleutherAI/pythia-160m-seed3", 

49 "EleutherAI/pythia-160m-v0", 

50 "EleutherAI/pythia-1b", 

51 "EleutherAI/pythia-1b-deduped", 

52 "EleutherAI/pythia-1b-deduped-v0", 

53 "EleutherAI/pythia-1b-v0", 

54 "EleutherAI/pythia-2.8b", 

55 "EleutherAI/pythia-2.8b-deduped", 

56 "EleutherAI/pythia-2.8b-deduped-v0", 

57 "EleutherAI/pythia-2.8b-v0", 

58 "EleutherAI/pythia-31m", 

59 "EleutherAI/pythia-410m", 

60 "EleutherAI/pythia-410m-deduped", 

61 "EleutherAI/pythia-410m-deduped-v0", 

62 "EleutherAI/pythia-410m-v0", 

63 "EleutherAI/pythia-6.9b", 

64 "EleutherAI/pythia-6.9b-deduped", 

65 "EleutherAI/pythia-6.9b-deduped-v0", 

66 "EleutherAI/pythia-6.9b-v0", 

67 "EleutherAI/pythia-70m", 

68 "EleutherAI/pythia-70m-deduped", 

69 "EleutherAI/pythia-70m-deduped-v0", 

70 "EleutherAI/pythia-70m-v0", 

71 "facebook/hubert-base-ls960", 

72 "facebook/opt-1.3b", 

73 "facebook/opt-125m", 

74 "facebook/opt-13b", 

75 "facebook/opt-2.7b", 

76 "facebook/opt-30b", 

77 "facebook/opt-6.7b", 

78 "facebook/opt-66b", 

79 "facebook/wav2vec2-base", 

80 "facebook/wav2vec2-large", 

81 "google-bert/bert-base-cased", 

82 "google-bert/bert-base-uncased", 

83 "google-bert/bert-large-cased", 

84 "google-bert/bert-large-uncased", 

85 "google-t5/t5-base", 

86 "google-t5/t5-large", 

87 "google-t5/t5-small", 

88 "google/gemma-2-27b", 

89 "google/gemma-2-27b-it", 

90 "google/gemma-2-2b", 

91 "google/gemma-2-2b-it", 

92 "google/gemma-2-9b", 

93 "google/gemma-2-9b-it", 

94 "google/gemma-2b", 

95 "google/gemma-2b-it", 

96 "google/gemma-3-12b-it", 

97 "google/gemma-3-12b-pt", 

98 "google/gemma-3-1b-it", 

99 "google/gemma-3-1b-pt", 

100 "google/gemma-3-270m", 

101 "google/gemma-3-270m-it", 

102 "google/gemma-3-27b-it", 

103 "google/gemma-3-27b-pt", 

104 "google/gemma-3-4b-it", 

105 "google/gemma-3-4b-pt", 

106 "google/gemma-7b", 

107 "google/gemma-7b-it", 

108 "google/medgemma-27b-it", 

109 "google/medgemma-27b-text-it", 

110 "google/medgemma-4b-it", 

111 "google/medgemma-4b-pt", 

112 "gpt2", 

113 "gpt2-large", 

114 "gpt2-medium", 

115 "gpt2-xl", 

116 "llama-13b-hf", 

117 "llama-30b-hf", 

118 "llama-65b-hf", 

119 "llama-7b-hf", 

120 "meta-llama/Llama-2-13b-chat-hf", 

121 "meta-llama/Llama-2-13b-hf", 

122 "meta-llama/Llama-2-70b-chat-hf", 

123 "meta-llama/Llama-2-7b-chat-hf", 

124 "meta-llama/Llama-2-7b-hf", 

125 "meta-llama/Llama-3.1-70B", 

126 "meta-llama/Llama-3.1-70B-Instruct", 

127 "meta-llama/Llama-3.1-8B", 

128 "meta-llama/Llama-3.1-8B-Instruct", 

129 "meta-llama/Llama-3.2-1B", 

130 "meta-llama/Llama-3.2-1B-Instruct", 

131 "meta-llama/Llama-3.2-3B", 

132 "meta-llama/Llama-3.2-3B-Instruct", 

133 "meta-llama/Llama-3.3-70B-Instruct", 

134 "meta-llama/Meta-Llama-3-70B", 

135 "meta-llama/Meta-Llama-3-70B-Instruct", 

136 "meta-llama/Meta-Llama-3-8B", 

137 "meta-llama/Meta-Llama-3-8B-Instruct", 

138 "microsoft/phi-1", 

139 "microsoft/phi-1_5", 

140 "microsoft/phi-2", 

141 "microsoft/Phi-3-mini-4k-instruct", 

142 "microsoft/phi-4", 

143 "mistralai/Mistral-7B-Instruct-v0.1", 

144 "mistralai/Mistral-7B-v0.1", 

145 "mistralai/Mistral-Nemo-Base-2407", 

146 "mistralai/Mistral-Small-24B-Base-2501", 

147 "mistralai/Mixtral-8x7B-Instruct-v0.1", 

148 "mistralai/Mixtral-8x7B-v0.1", 

149 "NeelNanda/Attn-Only-2L512W-Shortformer-6B-big-lr", 

150 "NeelNanda/Attn_Only_1L512W_C4_Code", 

151 "NeelNanda/Attn_Only_2L512W_C4_Code", 

152 "NeelNanda/Attn_Only_3L512W_C4_Code", 

153 "NeelNanda/Attn_Only_4L512W_C4_Code", 

154 "NeelNanda/GELU_1L512W_C4_Code", 

155 "NeelNanda/GELU_2L512W_C4_Code", 

156 "NeelNanda/GELU_3L512W_C4_Code", 

157 "NeelNanda/GELU_4L512W_C4_Code", 

158 "NeelNanda/SoLU_10L1280W_C4_Code", 

159 "NeelNanda/SoLU_10L_v22_old", 

160 "NeelNanda/SoLU_12L1536W_C4_Code", 

161 "NeelNanda/SoLU_12L_v23_old", 

162 "NeelNanda/SoLU_1L512W_C4_Code", 

163 "NeelNanda/SoLU_1L512W_Wiki_Finetune", 

164 "NeelNanda/SoLU_1L_v9_old", 

165 "NeelNanda/SoLU_2L512W_C4_Code", 

166 "NeelNanda/SoLU_2L_v10_old", 

167 "NeelNanda/SoLU_3L512W_C4_Code", 

168 "NeelNanda/SoLU_4L512W_C4_Code", 

169 "NeelNanda/SoLU_4L512W_Wiki_Finetune", 

170 "NeelNanda/SoLU_4L_v11_old", 

171 "NeelNanda/SoLU_6L768W_C4_Code", 

172 "NeelNanda/SoLU_6L_v13_old", 

173 "NeelNanda/SoLU_8L1024W_C4_Code", 

174 "NeelNanda/SoLU_8L_v21_old", 

175 "openai/gpt-oss-20b", 

176 "Qwen/Qwen-14B", 

177 "Qwen/Qwen-14B-Chat", 

178 "Qwen/Qwen-1_8B", 

179 "Qwen/Qwen-1_8B-Chat", 

180 "Qwen/Qwen-7B", 

181 "Qwen/Qwen-7B-Chat", 

182 "Qwen/Qwen1.5-0.5B", 

183 "Qwen/Qwen1.5-0.5B-Chat", 

184 "Qwen/Qwen1.5-1.8B", 

185 "Qwen/Qwen1.5-1.8B-Chat", 

186 "Qwen/Qwen1.5-14B", 

187 "Qwen/Qwen1.5-14B-Chat", 

188 "Qwen/Qwen1.5-4B", 

189 "Qwen/Qwen1.5-4B-Chat", 

190 "Qwen/Qwen1.5-7B", 

191 "Qwen/Qwen1.5-7B-Chat", 

192 "Qwen/Qwen2-0.5B", 

193 "Qwen/Qwen2-0.5B-Instruct", 

194 "Qwen/Qwen2-1.5B", 

195 "Qwen/Qwen2-1.5B-Instruct", 

196 "Qwen/Qwen2-7B", 

197 "Qwen/Qwen2-7B-Instruct", 

198 "Qwen/Qwen2.5-0.5B", 

199 "Qwen/Qwen2.5-0.5B-Instruct", 

200 "Qwen/Qwen2.5-1.5B", 

201 "Qwen/Qwen2.5-1.5B-Instruct", 

202 "Qwen/Qwen2.5-14B", 

203 "Qwen/Qwen2.5-14B-Instruct", 

204 "Qwen/Qwen2.5-32B", 

205 "Qwen/Qwen2.5-32B-Instruct", 

206 "Qwen/Qwen2.5-3B", 

207 "Qwen/Qwen2.5-3B-Instruct", 

208 "Qwen/Qwen2.5-72B", 

209 "Qwen/Qwen2.5-72B-Instruct", 

210 "Qwen/Qwen2.5-7B", 

211 "Qwen/Qwen2.5-7B-Instruct", 

212 "Qwen/Qwen3-0.6B", 

213 "Qwen/Qwen3-0.6B-Base", 

214 "Qwen/Qwen3-1.7B", 

215 "Qwen/Qwen3-14B", 

216 "Qwen/Qwen3-4B", 

217 "Qwen/Qwen3-8B", 

218 "Qwen/QwQ-32B-Preview", 

219 "roneneldan/TinyStories-1Layer-21M", 

220 "roneneldan/TinyStories-1M", 

221 "roneneldan/TinyStories-28M", 

222 "roneneldan/TinyStories-2Layers-33M", 

223 "roneneldan/TinyStories-33M", 

224 "roneneldan/TinyStories-3M", 

225 "roneneldan/TinyStories-8M", 

226 "roneneldan/TinyStories-Instruct-1M", 

227 "roneneldan/TinyStories-Instruct-28M", 

228 "roneneldan/TinyStories-Instruct-2Layers-33M", 

229 "roneneldan/TinyStories-Instruct-33M", 

230 "roneneldan/TinyStories-Instruct-3M", 

231 "roneneldan/TinyStories-Instruct-8M", 

232 "roneneldan/TinyStories-Instuct-1Layer-21M", 

233 "stabilityai/stablelm-base-alpha-3b", 

234 "stabilityai/stablelm-base-alpha-7b", 

235 "stabilityai/stablelm-tuned-alpha-3b", 

236 "stabilityai/stablelm-tuned-alpha-7b", 

237 "stanford-crfm/alias-gpt2-small-x21", 

238 "stanford-crfm/arwen-gpt2-medium-x21", 

239 "stanford-crfm/battlestar-gpt2-small-x49", 

240 "stanford-crfm/beren-gpt2-medium-x49", 

241 "stanford-crfm/caprica-gpt2-small-x81", 

242 "stanford-crfm/celebrimbor-gpt2-medium-x81", 

243 "stanford-crfm/darkmatter-gpt2-small-x343", 

244 "stanford-crfm/durin-gpt2-medium-x343", 

245 "stanford-crfm/eowyn-gpt2-medium-x777", 

246 "stanford-crfm/expanse-gpt2-small-x777", 

247 "swiss-ai/Apertus-8B-2509", 

248 "swiss-ai/Apertus-8B-Instruct-2509", 

249] 

250"""Official model names for models on HuggingFace.""" 

251 

252# Model Aliases: 

253MODEL_ALIASES: dict[str, list[str]] = { 

254 "01-ai/Yi-34B": ["yi-34b", "Yi-34B"], 

255 "01-ai/Yi-34B-Chat": ["yi-34b-chat", "Yi-34B-Chat"], 

256 "01-ai/Yi-6B": ["yi-6b", "Yi-6B"], 

257 "01-ai/Yi-6B-Chat": ["yi-6b-chat", "Yi-6B-Chat"], 

258 "ai-forever/mGPT": ["mGPT"], 

259 "allenai/OLMo-1B-hf": ["olmo-1b"], 

260 "allenai/OLMo-2-0425-1B": ["olmo-2-1b"], 

261 "allenai/OLMo-2-1124-7B": ["olmo-2-7b"], 

262 "allenai/Olmo-3-32B-Think": ["olmo-3-32b-think"], 

263 "allenai/Olmo-3-7B-Instruct": ["olmo-3-7b-instruct"], 

264 "allenai/Olmo-3-7B-Think": ["olmo-3-7b-think"], 

265 "allenai/Olmo-3.1-32B-Instruct": ["olmo-3.1-32b-instruct"], 

266 "allenai/Olmo-3.1-32B-Think": ["olmo-3.1-32b-think"], 

267 "allenai/OLMo-7B-hf": ["olmo-7b"], 

268 "allenai/OLMoE-1B-7B-0924": ["olmoe"], 

269 "ArthurConmy/redwood_attn_2l": ["redwood_attn_2l"], 

270 "Baidicoot/Othello-GPT-Transformer-Lens": ["othello-gpt"], 

271 "bigcode/santacoder": ["santacoder"], 

272 "bigscience/bloom-1b1": ["bloom-1b1"], 

273 "bigscience/bloom-1b7": ["bloom-1b7"], 

274 "bigscience/bloom-3b": ["bloom-3b"], 

275 "bigscience/bloom-560m": ["bloom-560m"], 

276 "bigscience/bloom-7b1": ["bloom-7b1"], 

277 "codellama/CodeLlama-7b-hf": ["CodeLlamallama-2-7b"], 

278 "codellama/CodeLlama-7b-Instruct-hf": ["CodeLlama-7b-instruct"], 

279 "codellama/CodeLlama-7b-Python-hf": ["CodeLlama-7b-python"], 

280 "distilgpt2": ["distillgpt2", "distill-gpt2", "distil-gpt2", "gpt2-xs"], 

281 "EleutherAI/gpt-j-6B": ["gpt-j-6B", "gpt-j", "gptj"], 

282 "EleutherAI/gpt-neo-1.3B": ["gpt-neo-1.3B", "gpt-neo-medium", "neo-medium"], 

283 "EleutherAI/gpt-neo-125M": ["gpt-neo-125M", "gpt-neo-small", "neo-small", "neo"], 

284 "EleutherAI/gpt-neo-2.7B": ["gpt-neo-2.7B", "gpt-neo-large", "neo-large"], 

285 "EleutherAI/gpt-neox-20b": ["gpt-neox-20b", "gpt-neox", "neox"], 

286 "EleutherAI/pythia-1.4b": ["pythia-1.4b", "EleutherAI/pythia-1.3b", "pythia-1.3b"], 

287 "EleutherAI/pythia-1.4b-deduped": [ 

288 "pythia-1.4b-deduped", 

289 "EleutherAI/pythia-1.3b-deduped", 

290 "pythia-1.3b-deduped", 

291 ], 

292 "EleutherAI/pythia-1.4b-deduped-v0": [ 

293 "pythia-1.4b-deduped-v0", 

294 "EleutherAI/pythia-1.3b-deduped-v0", 

295 "pythia-1.3b-deduped-v0", 

296 ], 

297 "EleutherAI/pythia-1.4b-v0": ["pythia-1.4b-v0", "EleutherAI/pythia-1.3b-v0", "pythia-1.3b-v0"], 

298 "EleutherAI/pythia-12b": ["pythia-12b", "EleutherAI/pythia-13b", "pythia-13b"], 

299 "EleutherAI/pythia-12b-deduped": [ 

300 "pythia-12b-deduped", 

301 "EleutherAI/pythia-13b-deduped", 

302 "pythia-13b-deduped", 

303 ], 

304 "EleutherAI/pythia-12b-deduped-v0": [ 

305 "pythia-12b-deduped-v0", 

306 "EleutherAI/pythia-13b-deduped-v0", 

307 "pythia-13b-deduped-v0", 

308 ], 

309 "EleutherAI/pythia-12b-v0": ["pythia-12b-v0", "EleutherAI/pythia-13b-v0", "pythia-13b-v0"], 

310 "EleutherAI/pythia-14m": ["pythia-14m"], 

311 "EleutherAI/pythia-160m": ["pythia-160m", "EleutherAI/pythia-125m", "pythia-125m"], 

312 "EleutherAI/pythia-160m-deduped": [ 

313 "pythia-160m-deduped", 

314 "EleutherAI/pythia-125m-deduped", 

315 "pythia-125m-deduped", 

316 ], 

317 "EleutherAI/pythia-160m-deduped-v0": [ 

318 "pythia-160m-deduped-v0", 

319 "EleutherAI/pythia-125m-deduped-v0", 

320 "pythia-125m-deduped-v0", 

321 ], 

322 "EleutherAI/pythia-160m-seed1": [ 

323 "pythia-160m-seed1", 

324 "EleutherAI/pythia-125m-seed1", 

325 "pythia-125m-seed1", 

326 ], 

327 "EleutherAI/pythia-160m-seed2": [ 

328 "pythia-160m-seed2", 

329 "EleutherAI/pythia-125m-seed2", 

330 "pythia-125m-seed2", 

331 ], 

332 "EleutherAI/pythia-160m-seed3": [ 

333 "pythia-160m-seed3", 

334 "EleutherAI/pythia-125m-seed3", 

335 "pythia-125m-seed3", 

336 ], 

337 "EleutherAI/pythia-160m-v0": ["pythia-160m-v0", "EleutherAI/pythia-125m-v0", "pythia-125m-v0"], 

338 "EleutherAI/pythia-1b": ["pythia-1b", "EleutherAI/pythia-800m", "pythia-800m"], 

339 "EleutherAI/pythia-1b-deduped": [ 

340 "pythia-1b-deduped", 

341 "EleutherAI/pythia-800m-deduped", 

342 "pythia-800m-deduped", 

343 ], 

344 "EleutherAI/pythia-1b-deduped-v0": [ 

345 "pythia-1b-deduped-v0", 

346 "EleutherAI/pythia-800m-deduped-v0", 

347 "pythia-800m-deduped-v0", 

348 ], 

349 "EleutherAI/pythia-1b-v0": ["pythia-1b-v0", "EleutherAI/pythia-800m-v0", "pythia-800m-v0"], 

350 "EleutherAI/pythia-2.8b": ["pythia-2.8b", "EleutherAI/pythia-2.7b", "pythia-2.7b"], 

351 "EleutherAI/pythia-2.8b-deduped": [ 

352 "pythia-2.8b-deduped", 

353 "EleutherAI/pythia-2.7b-deduped", 

354 "pythia-2.7b-deduped", 

355 ], 

356 "EleutherAI/pythia-2.8b-deduped-v0": [ 

357 "pythia-2.8b-deduped-v0", 

358 "EleutherAI/pythia-2.7b-deduped-v0", 

359 "pythia-2.7b-deduped-v0", 

360 ], 

361 "EleutherAI/pythia-2.8b-v0": ["pythia-2.8b-v0", "EleutherAI/pythia-2.7b-v0", "pythia-2.7b-v0"], 

362 "EleutherAI/pythia-31m": ["pythia-31m"], 

363 "EleutherAI/pythia-410m": ["pythia-410m", "EleutherAI/pythia-350m", "pythia-350m"], 

364 "EleutherAI/pythia-410m-deduped": [ 

365 "pythia-410m-deduped", 

366 "EleutherAI/pythia-350m-deduped", 

367 "pythia-350m-deduped", 

368 ], 

369 "EleutherAI/pythia-410m-deduped-v0": [ 

370 "pythia-410m-deduped-v0", 

371 "EleutherAI/pythia-350m-deduped-v0", 

372 "pythia-350m-deduped-v0", 

373 ], 

374 "EleutherAI/pythia-410m-v0": ["pythia-410m-v0", "EleutherAI/pythia-350m-v0", "pythia-350m-v0"], 

375 "EleutherAI/pythia-6.9b": ["pythia-6.9b", "EleutherAI/pythia-6.7b", "pythia-6.7b"], 

376 "EleutherAI/pythia-6.9b-deduped": [ 

377 "pythia-6.9b-deduped", 

378 "EleutherAI/pythia-6.7b-deduped", 

379 "pythia-6.7b-deduped", 

380 ], 

381 "EleutherAI/pythia-6.9b-deduped-v0": [ 

382 "pythia-6.9b-deduped-v0", 

383 "EleutherAI/pythia-6.7b-deduped-v0", 

384 "pythia-6.7b-deduped-v0", 

385 ], 

386 "EleutherAI/pythia-6.9b-v0": ["pythia-6.9b-v0", "EleutherAI/pythia-6.7b-v0", "pythia-6.7b-v0"], 

387 "EleutherAI/pythia-70m": ["pythia-70m", "pythia", "EleutherAI/pythia-19m", "pythia-19m"], 

388 "EleutherAI/pythia-70m-deduped": [ 

389 "pythia-70m-deduped", 

390 "EleutherAI/pythia-19m-deduped", 

391 "pythia-19m-deduped", 

392 ], 

393 "EleutherAI/pythia-70m-deduped-v0": [ 

394 "pythia-70m-deduped-v0", 

395 "EleutherAI/pythia-19m-deduped-v0", 

396 "pythia-19m-deduped-v0", 

397 ], 

398 "EleutherAI/pythia-70m-v0": [ 

399 "pythia-70m-v0", 

400 "pythia-v0", 

401 "EleutherAI/pythia-19m-v0", 

402 "pythia-19m-v0", 

403 ], 

404 "facebook/hubert-base-ls960": ["hubert-base-ls960"], 

405 "facebook/opt-1.3b": ["opt-1.3b", "opt-medium"], 

406 "facebook/opt-125m": ["opt-125m", "opt-small", "opt"], 

407 "facebook/opt-13b": ["opt-13b", "opt-xxl"], 

408 "facebook/opt-2.7b": ["opt-2.7b", "opt-large"], 

409 "facebook/opt-30b": ["opt-30b", "opt-xxxl"], 

410 "facebook/opt-6.7b": ["opt-6.7b", "opt-xl"], 

411 "facebook/opt-66b": ["opt-66b", "opt-xxxxl"], 

412 "facebook/wav2vec2-base": ["wav2vec2-base", "w2v2-base"], 

413 "facebook/wav2vec2-large": ["wav2vec2-large", "w2v2-large"], 

414 "google-bert/bert-base-cased": ["bert-base-cased"], 

415 "google-bert/bert-base-uncased": ["bert-base-uncased"], 

416 "google-bert/bert-large-cased": ["bert-large-cased"], 

417 "google-bert/bert-large-uncased": ["bert-large-uncased"], 

418 "google-t5/t5-base": ["t5-base"], 

419 "google-t5/t5-large": ["t5-large"], 

420 "google-t5/t5-small": ["t5-small"], 

421 "google/gemma-2-27b": ["gemma-2-27b"], 

422 "google/gemma-2-27b-it": ["gemma-2-27b-it"], 

423 "google/gemma-2-2b": ["gemma-2-2b"], 

424 "google/gemma-2-2b-it": ["gemma-2-2b-it"], 

425 "google/gemma-2-9b": ["gemma-2-9b"], 

426 "google/gemma-2-9b-it": ["gemma-2-9b-it"], 

427 "google/gemma-2b": ["gemma-2b"], 

428 "google/gemma-2b-it": ["gemma-2b-it"], 

429 "google/gemma-3-12b-it": ["gemma-3-12b-it"], 

430 "google/gemma-3-12b-pt": ["gemma-3-12b-pt"], 

431 "google/gemma-3-1b-it": ["gemma-3-1b-it"], 

432 "google/gemma-3-1b-pt": ["gemma-3-1b-pt"], 

433 "google/gemma-3-270m": ["gemma-3-270m"], 

434 "google/gemma-3-270m-it": ["gemma-3-270m-it"], 

435 "google/gemma-3-27b-it": ["gemma-3-27b-it"], 

436 "google/gemma-3-27b-pt": ["gemma-3-27b-pt"], 

437 "google/gemma-3-4b-it": ["gemma-3-4b-it"], 

438 "google/gemma-3-4b-pt": ["gemma-3-4b-pt"], 

439 "google/gemma-7b": ["gemma-7b"], 

440 "google/gemma-7b-it": ["gemma-7b-it"], 

441 "google/medgemma-27b-it": ["medgemma-27b-it"], 

442 "google/medgemma-27b-text-it": ["medgemma-27b-text-it"], 

443 "google/medgemma-4b-it": ["medgemma-4b-it"], 

444 "google/medgemma-4b-pt": ["medgemma-4b-pt"], 

445 "gpt2": ["gpt2-small"], 

446 "llama-13b-hf": ["llama-13b"], 

447 "llama-30b-hf": ["llama-30b"], 

448 "llama-65b-hf": ["llama-65b"], 

449 "llama-7b-hf": ["llama-7b"], 

450 "meta-llama/Llama-2-13b-chat-hf": ["Llama-2-13b-chat"], 

451 "meta-llama/Llama-2-13b-hf": ["Llama-2-13b"], 

452 "meta-llama/Llama-2-70b-chat-hf": ["Llama-2-70b-chat", "meta-llama-2-70b-chat-hf"], 

453 "meta-llama/Llama-2-7b-chat-hf": ["Llama-2-7b-chat"], 

454 "meta-llama/Llama-2-7b-hf": ["Llama-2-7b"], 

455 "microsoft/phi-1": ["phi-1"], 

456 "microsoft/phi-1_5": ["phi-1_5"], 

457 "microsoft/phi-2": ["phi-2"], 

458 "microsoft/Phi-3-mini-4k-instruct": ["phi-3"], 

459 "microsoft/phi-4": ["phi-4"], 

460 "mistralai/Mistral-7B-Instruct-v0.1": ["mistral-7b-instruct"], 

461 "mistralai/Mistral-7B-v0.1": ["mistral-7b"], 

462 "mistralai/Mistral-Nemo-Base-2407": ["mistral-nemo-base-2407"], 

463 "mistralai/Mixtral-8x7B-Instruct-v0.1": ["mixtral-instruct", "mixtral-8x7b-instruct"], 

464 "mistralai/Mixtral-8x7B-v0.1": ["mixtral", "mixtral-8x7b"], 

465 "NeelNanda/Attn-Only-2L512W-Shortformer-6B-big-lr": [ 

466 "attn-only-2l-demo", 

467 "attn-only-2l-shortformer-6b-big-lr", 

468 "attn-only-2l-induction-demo", 

469 "attn-only-demo", 

470 ], 

471 "NeelNanda/Attn_Only_1L512W_C4_Code": [ 

472 "attn-only-1l", 

473 "attn-only-1l-new", 

474 "attn-only-1l-c4-code", 

475 ], 

476 "NeelNanda/Attn_Only_2L512W_C4_Code": [ 

477 "attn-only-2l", 

478 "attn-only-2l-new", 

479 "attn-only-2l-c4-code", 

480 ], 

481 "NeelNanda/Attn_Only_3L512W_C4_Code": [ 

482 "attn-only-3l", 

483 "attn-only-3l-new", 

484 "attn-only-3l-c4-code", 

485 ], 

486 "NeelNanda/Attn_Only_4L512W_C4_Code": [ 

487 "attn-only-4l", 

488 "attn-only-4l-new", 

489 "attn-only-4l-c4-code", 

490 ], 

491 "NeelNanda/GELU_1L512W_C4_Code": ["gelu-1l", "gelu-1l-new", "gelu-1l-c4-code"], 

492 "NeelNanda/GELU_2L512W_C4_Code": ["gelu-2l", "gelu-2l-new", "gelu-2l-c4-code"], 

493 "NeelNanda/GELU_3L512W_C4_Code": ["gelu-3l", "gelu-3l-new", "gelu-3l-c4-code"], 

494 "NeelNanda/GELU_4L512W_C4_Code": ["gelu-4l", "gelu-4l-new", "gelu-4l-c4-code"], 

495 "NeelNanda/SoLU_10L1280W_C4_Code": ["solu-10l", "solu-10l-new", "solu-10l-c4-code"], 

496 "NeelNanda/SoLU_10L_v22_old": ["solu-10l-pile", "solu-10l-old"], 

497 "NeelNanda/SoLU_12L1536W_C4_Code": ["solu-12l", "solu-12l-new", "solu-12l-c4-code"], 

498 "NeelNanda/SoLU_12L_v23_old": ["solu-12l-pile", "solu-12l-old"], 

499 "NeelNanda/SoLU_1L512W_C4_Code": ["solu-1l", "solu-1l-new", "solu-1l-c4-code"], 

500 "NeelNanda/SoLU_1L512W_Wiki_Finetune": [ 

501 "solu-1l-wiki", 

502 "solu-1l-wiki-finetune", 

503 "solu-1l-finetune", 

504 ], 

505 "NeelNanda/SoLU_1L_v9_old": ["solu-1l-pile", "solu-1l-old"], 

506 "NeelNanda/SoLU_2L512W_C4_Code": ["solu-2l", "solu-2l-new", "solu-2l-c4-code"], 

507 "NeelNanda/SoLU_2L_v10_old": ["solu-2l-pile", "solu-2l-old"], 

508 "NeelNanda/SoLU_3L512W_C4_Code": ["solu-3l", "solu-3l-new", "solu-3l-c4-code"], 

509 "NeelNanda/SoLU_4L512W_C4_Code": ["solu-4l", "solu-4l-new", "solu-4l-c4-code"], 

510 "NeelNanda/SoLU_4L512W_Wiki_Finetune": [ 

511 "solu-4l-wiki", 

512 "solu-4l-wiki-finetune", 

513 "solu-4l-finetune", 

514 ], 

515 "NeelNanda/SoLU_4L_v11_old": ["solu-4l-pile", "solu-4l-old"], 

516 "NeelNanda/SoLU_6L768W_C4_Code": ["solu-6l", "solu-6l-new", "solu-6l-c4-code"], 

517 "NeelNanda/SoLU_6L_v13_old": ["solu-6l-pile", "solu-6l-old"], 

518 "NeelNanda/SoLU_8L1024W_C4_Code": ["solu-8l", "solu-8l-new", "solu-8l-c4-code"], 

519 "NeelNanda/SoLU_8L_v21_old": ["solu-8l-pile", "solu-8l-old"], 

520 "openai/gpt-oss-20b": ["gpt-oss-20b", "gpt-oss"], 

521 "Qwen/Qwen-14B": ["qwen-14b"], 

522 "Qwen/Qwen-14B-Chat": ["qwen-14b-chat"], 

523 "Qwen/Qwen-1_8B": ["qwen-1.8b"], 

524 "Qwen/Qwen-1_8B-Chat": ["qwen-1.8b-chat"], 

525 "Qwen/Qwen-7B": ["qwen-7b"], 

526 "Qwen/Qwen-7B-Chat": ["qwen-7b-chat"], 

527 "Qwen/Qwen1.5-0.5B": ["qwen1.5-0.5b"], 

528 "Qwen/Qwen1.5-0.5B-Chat": ["qwen1.5-0.5b-chat"], 

529 "Qwen/Qwen1.5-1.8B": ["qwen1.5-1.8b"], 

530 "Qwen/Qwen1.5-1.8B-Chat": ["qwen1.5-1.8b-chat"], 

531 "Qwen/Qwen1.5-14B": ["qwen1.5-14b"], 

532 "Qwen/Qwen1.5-14B-Chat": ["qwen1.5-14b-chat"], 

533 "Qwen/Qwen1.5-4B": ["qwen1.5-4b"], 

534 "Qwen/Qwen1.5-4B-Chat": ["qwen1.5-4b-chat"], 

535 "Qwen/Qwen1.5-7B": ["qwen1.5-7b"], 

536 "Qwen/Qwen1.5-7B-Chat": ["qwen1.5-7b-chat"], 

537 "Qwen/Qwen2-0.5B": ["qwen2-0.5b"], 

538 "Qwen/Qwen2-0.5B-Instruct": ["qwen2-0.5b-instruct"], 

539 "Qwen/Qwen2-1.5B": ["qwen2-1.5b"], 

540 "Qwen/Qwen2-1.5B-Instruct": ["qwen2-1.5b-instruct"], 

541 "Qwen/Qwen2-7B": ["qwen2-7b"], 

542 "Qwen/Qwen2-7B-Instruct": ["qwen2-7b-instruct"], 

543 "Qwen/Qwen2.5-0.5B": ["qwen2.5-0.5b"], 

544 "Qwen/Qwen2.5-0.5B-Instruct": ["qwen2.5-0.5b-instruct"], 

545 "Qwen/Qwen2.5-1.5B": ["qwen2.5-1.5b"], 

546 "Qwen/Qwen2.5-1.5B-Instruct": ["qwen2.5-1.5b-instruct"], 

547 "Qwen/Qwen2.5-14B": ["qwen2.5-14b"], 

548 "Qwen/Qwen2.5-14B-Instruct": ["qwen2.5-14b-instruct"], 

549 "Qwen/Qwen2.5-32B": ["qwen2.5-32b"], 

550 "Qwen/Qwen2.5-32B-Instruct": ["qwen2.5-32b-instruct"], 

551 "Qwen/Qwen2.5-3B": ["qwen2.5-3b"], 

552 "Qwen/Qwen2.5-3B-Instruct": ["qwen2.5-3b-instruct"], 

553 "Qwen/Qwen2.5-72B": ["qwen2.5-72b"], 

554 "Qwen/Qwen2.5-72B-Instruct": ["qwen2.5-72b-instruct"], 

555 "Qwen/Qwen2.5-7B": ["qwen2.5-7b"], 

556 "Qwen/Qwen2.5-7B-Instruct": ["qwen2.5-7b-instruct"], 

557 "Qwen/Qwen3-0.6B": ["qwen3-0.6b"], 

558 "Qwen/Qwen3-0.6B-Base": ["qwen3-0.6b-base"], 

559 "Qwen/Qwen3-1.7B": ["qwen3-1.7b"], 

560 "Qwen/Qwen3-14B": ["qwen3-14b"], 

561 "Qwen/Qwen3-4B": ["qwen3-4b"], 

562 "Qwen/Qwen3-8B": ["qwen3-8b"], 

563 "Qwen/QwQ-32B-Preview": ["qwen-32b-preview"], 

564 "roneneldan/TinyStories-1Layer-21M": ["tiny-stories-1L-21M"], 

565 "roneneldan/TinyStories-1M": ["tiny-stories-1M"], 

566 "roneneldan/TinyStories-28M": ["tiny-stories-28M"], 

567 "roneneldan/TinyStories-2Layers-33M": ["tiny-stories-2L-33M"], 

568 "roneneldan/TinyStories-33M": ["tiny-stories-33M"], 

569 "roneneldan/TinyStories-3M": ["tiny-stories-3M"], 

570 "roneneldan/TinyStories-8M": ["tiny-stories-8M"], 

571 "roneneldan/TinyStories-Instruct-1M": ["tiny-stories-instruct-1M"], 

572 "roneneldan/TinyStories-Instruct-28M": ["tiny-stories-instruct-28M"], 

573 "roneneldan/TinyStories-Instruct-2Layers-33M": ["tiny-stories-instruct-2L-33M"], 

574 "roneneldan/TinyStories-Instruct-33M": ["tiny-stories-instruct-33M"], 

575 "roneneldan/TinyStories-Instruct-3M": ["tiny-stories-instruct-3M"], 

576 "roneneldan/TinyStories-Instruct-8M": ["tiny-stories-instruct-8M"], 

577 "roneneldan/TinyStories-Instuct-1Layer-21M": ["tiny-stories-instruct-1L-21M"], 

578 "stabilityai/stablelm-base-alpha-3b": ["stablelm-base-alpha-3b", "stablelm-base-3b"], 

579 "stabilityai/stablelm-base-alpha-7b": ["stablelm-base-alpha-7b", "stablelm-base-7b"], 

580 "stabilityai/stablelm-tuned-alpha-3b": ["stablelm-tuned-alpha-3b", "stablelm-tuned-3b"], 

581 "stabilityai/stablelm-tuned-alpha-7b": ["stablelm-tuned-alpha-7b", "stablelm-tuned-7b"], 

582 "stanford-crfm/alias-gpt2-small-x21": [ 

583 "stanford-gpt2-small-a", 

584 "alias-gpt2-small-x21", 

585 "gpt2-mistral-small-a", 

586 "gpt2-stanford-small-a", 

587 ], 

588 "stanford-crfm/arwen-gpt2-medium-x21": [ 

589 "stanford-gpt2-medium-a", 

590 "arwen-gpt2-medium-x21", 

591 "gpt2-medium-small-a", 

592 "gpt2-stanford-medium-a", 

593 ], 

594 "stanford-crfm/battlestar-gpt2-small-x49": [ 

595 "stanford-gpt2-small-b", 

596 "battlestar-gpt2-small-x49", 

597 "gpt2-mistral-small-b", 

598 "gpt2-mistral-small-b", 

599 ], 

600 "stanford-crfm/beren-gpt2-medium-x49": [ 

601 "stanford-gpt2-medium-b", 

602 "beren-gpt2-medium-x49", 

603 "gpt2-medium-small-b", 

604 "gpt2-stanford-medium-b", 

605 ], 

606 "stanford-crfm/caprica-gpt2-small-x81": [ 

607 "stanford-gpt2-small-c", 

608 "caprica-gpt2-small-x81", 

609 "gpt2-mistral-small-c", 

610 "gpt2-stanford-small-c", 

611 ], 

612 "stanford-crfm/celebrimbor-gpt2-medium-x81": [ 

613 "stanford-gpt2-medium-c", 

614 "celebrimbor-gpt2-medium-x81", 

615 "gpt2-medium-small-c", 

616 "gpt2-medium-small-c", 

617 ], 

618 "stanford-crfm/darkmatter-gpt2-small-x343": [ 

619 "stanford-gpt2-small-d", 

620 "darkmatter-gpt2-small-x343", 

621 "gpt2-mistral-small-d", 

622 "gpt2-mistral-small-d", 

623 ], 

624 "stanford-crfm/durin-gpt2-medium-x343": [ 

625 "stanford-gpt2-medium-d", 

626 "durin-gpt2-medium-x343", 

627 "gpt2-medium-small-d", 

628 "gpt2-stanford-medium-d", 

629 ], 

630 "stanford-crfm/eowyn-gpt2-medium-x777": [ 

631 "stanford-gpt2-medium-e", 

632 "eowyn-gpt2-medium-x777", 

633 "gpt2-medium-small-e", 

634 "gpt2-stanford-medium-e", 

635 ], 

636 "stanford-crfm/expanse-gpt2-small-x777": [ 

637 "stanford-gpt2-small-e", 

638 "expanse-gpt2-small-x777", 

639 "gpt2-mistral-small-e", 

640 "gpt2-mistral-small-e", 

641 ], 

642 "swiss-ai/Apertus-8B-2509": ["apertus-8b", "apertus"], 

643 "swiss-ai/Apertus-8B-Instruct-2509": ["apertus-8b-instruct", "apertus-instruct"], 

644} 

645"""Model aliases for models on HuggingFace.""" 

646 

647 

648# Sets a default model alias, by convention the first one in the model alias table, else the official name if it has no aliases 

649DEFAULT_MODEL_ALIASES: list[str] = [ 

650 MODEL_ALIASES[name][0] if name in MODEL_ALIASES else name for name in OFFICIAL_MODEL_NAMES 

651]