Coverage for transformer_lens/tools/model_registry/__init__.py: 100%

6 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2026-07-01 15:58 +0000

1"""Model Registry tools for TransformerLens. 

2 

3This package provides tools for discovering and documenting HuggingFace models 

4that are compatible with TransformerLens. 

5 

6Main modules: 

7 - api: Public API for programmatic access to model registry data 

8 - schemas: Data classes for model entries, architecture gaps, etc. 

9 - verification: Verification tracking for model compatibility 

10 - exceptions: Custom exceptions for the model registry 

11 

12Example usage: 

13 >>> from transformer_lens.tools.model_registry import api # doctest: +SKIP 

14 >>> api.is_model_supported("openai-community/gpt2") # doctest: +SKIP 

15 True 

16 >>> models = api.get_architecture_models("GPT2LMHeadModel") # doctest: +SKIP 

17""" 

18 

19from .exceptions import ( 

20 ArchitectureNotSupportedError, 

21 DataNotLoadedError, 

22 DataValidationError, 

23 ModelNotFoundError, 

24 ModelRegistryError, 

25) 

26from .schemas import ( 

27 ArchitectureAnalysis, 

28 ArchitectureGap, 

29 ArchitectureGapsReport, 

30 ArchitectureStats, 

31 ModelEntry, 

32 ModelMetadata, 

33 ScanInfo, 

34 SupportedModelsReport, 

35) 

36from .verification import VerificationHistory, VerificationRecord 

37 

38# Canonical set of HuggingFace architecture class names supported by TransformerBridge. 

39# These must match the exact strings found in HF model config.architectures[] 

40# and correspond to adapters registered in architecture_adapter_factory.py. 

41# 

42# Internal-only architectures (NanoGPT, MinGPT, NeelSoluOld, GPT2LMHeadCustomModel, 

43# TransformerLensNative) are excluded since they never appear on HuggingFace Hub. 

44# Factory-internal alias casings (Gemma1, Neo, NeoX) are also excluded since they 

45# route to canonical adapters but HF reports the canonical names (Gemma, GPTNeo, 

46# GPTNeoX) in config.architectures instead. 

47HF_SUPPORTED_ARCHITECTURES: set[str] = { 

48 "ApertusForCausalLM", 

49 "BaiChuanForCausalLM", 

50 "BaichuanForCausalLM", 

51 "BartForConditionalGeneration", 

52 "BertForMaskedLM", 

53 "BloomForCausalLM", 

54 "CodeGenForCausalLM", 

55 "CohereForCausalLM", 

56 "DeepseekV2ForCausalLM", 

57 "DeepseekV3ForCausalLM", 

58 "FalconForCausalLM", 

59 "GemmaForCausalLM", 

60 "Gemma2ForCausalLM", 

61 "Gemma3ForCausalLM", 

62 "Gemma3ForConditionalGeneration", 

63 "Gemma3nForConditionalGeneration", 

64 "Gemma4ForConditionalGeneration", 

65 "Gemma4UnifiedForConditionalGeneration", 

66 "Glm4MoeForCausalLM", 

67 "GlmMoeDsaForCausalLM", 

68 "GraniteForCausalLM", 

69 "GraniteMoeForCausalLM", 

70 "GraniteMoeHybridForCausalLM", 

71 "GPT2LMHeadModel", 

72 "GPTBigCodeForCausalLM", 

73 "GptOssForCausalLM", 

74 "GPTJForCausalLM", 

75 "GPTNeoForCausalLM", 

76 "OpenELMForCausalLM", 

77 "GPTNeoXForCausalLM", 

78 "HubertForCTC", 

79 "HubertModel", 

80 "HunYuanDenseV1ForCausalLM", 

81 "InternLM2ForCausalLM", 

82 "LlamaForCausalLM", 

83 "LlavaForConditionalGeneration", 

84 "LlavaNextForConditionalGeneration", 

85 "LlavaOnevisionForConditionalGeneration", 

86 "Lfm2MoeForCausalLM", 

87 "MambaForCausalLM", 

88 "Mamba2ForCausalLM", 

89 "NemotronHForCausalLM", 

90 "MPTForCausalLM", 

91 "MistralForCausalLM", 

92 "MixtralForCausalLM", 

93 "Olmo2ForCausalLM", 

94 "Olmo3ForCausalLM", 

95 "OlmoForCausalLM", 

96 "OlmoeForCausalLM", 

97 "OPTForCausalLM", 

98 "PhiForCausalLM", 

99 "Phi3ForCausalLM", 

100 "PhiMoEForCausalLM", 

101 "QwenForCausalLM", 

102 "Qwen2ForCausalLM", 

103 "Qwen3ForCausalLM", 

104 "Qwen3MoeForCausalLM", 

105 "Qwen3NextForCausalLM", 

106 "Qwen3_5ForCausalLM", 

107 "Qwen3_5ForConditionalGeneration", 

108 "SmolLM3ForCausalLM", 

109 "StableLmForCausalLM", 

110 "T5ForConditionalGeneration", 

111 "MT5ForConditionalGeneration", 

112 "T5GemmaForConditionalGeneration", 

113 "XGLMForCausalLM", 

114} 

115 

116# Foundation-trained orgs per architecture. Source of truth for the scraper's 

117# download-threshold bypass and the docs table's "Canonical only" toggle. 

118CANONICAL_AUTHORS_BY_ARCH: dict[str, list[str]] = { 

119 "ApertusForCausalLM": ["swiss-ai"], 

120 "BaiChuanForCausalLM": ["baichuan-inc"], 

121 "BaichuanForCausalLM": ["baichuan-inc"], 

122 "BartForConditionalGeneration": ["facebook"], 

123 "BertForMaskedLM": ["google-bert"], 

124 "BloomForCausalLM": ["bigscience"], 

125 "CodeGenForCausalLM": ["Salesforce"], 

126 "CohereForCausalLM": ["CohereLabs"], 

127 "DeepseekV2ForCausalLM": ["deepseek-ai"], 

128 "DeepseekV3ForCausalLM": ["deepseek-ai"], 

129 "FalconForCausalLM": ["tiiuae"], 

130 "Gemma2ForCausalLM": ["google"], 

131 "Gemma3ForCausalLM": ["google"], 

132 "Gemma3ForConditionalGeneration": ["google"], 

133 "Gemma3nForConditionalGeneration": ["google"], 

134 "Gemma4ForConditionalGeneration": ["google"], 

135 "Gemma4UnifiedForConditionalGeneration": ["google"], 

136 "GemmaForCausalLM": ["google"], 

137 "Glm4MoeForCausalLM": ["zai-org"], 

138 "GlmMoeDsaForCausalLM": ["zai-org"], 

139 "GPT2LMHeadModel": ["openai-community", "stanford-crfm", "Writer"], 

140 "GPTBigCodeForCausalLM": ["bigcode"], 

141 "GptOssForCausalLM": ["openai"], 

142 "GPTJForCausalLM": ["EleutherAI", "togethercomputer"], 

143 "GPTNeoForCausalLM": ["EleutherAI", "roneneldan"], 

144 "GPTNeoXForCausalLM": ["EleutherAI", "cyberagent", "stabilityai", "togethercomputer"], 

145 "GraniteForCausalLM": ["ibm-granite"], 

146 "GraniteMoeForCausalLM": ["ibm-granite"], 

147 "GraniteMoeHybridForCausalLM": ["ibm-granite"], 

148 "HubertForCTC": ["facebook"], 

149 "HubertModel": ["facebook"], 

150 "HunYuanDenseV1ForCausalLM": ["tencent"], 

151 "InternLM2ForCausalLM": ["internlm"], 

152 "LlamaForCausalLM": ["meta-llama", "huggyllama", "codellama", "SimpleStories"], 

153 "LlavaForConditionalGeneration": ["llava-hf"], 

154 "LlavaNextForConditionalGeneration": ["llava-hf"], 

155 "LlavaOnevisionForConditionalGeneration": ["llava-hf"], 

156 "Lfm2MoeForCausalLM": ["LiquidAI"], 

157 "Mamba2ForCausalLM": ["state-spaces"], 

158 "MambaForCausalLM": ["state-spaces"], 

159 "NemotronHForCausalLM": ["nvidia"], 

160 "MistralForCausalLM": ["mistralai"], 

161 "MixtralForCausalLM": ["mistralai"], 

162 "MPTForCausalLM": ["mosaicml"], 

163 "MT5ForConditionalGeneration": ["google", "bigscience", "csebuetnlp"], 

164 "Olmo2ForCausalLM": ["allenai", "HPLT"], 

165 "Olmo3ForCausalLM": ["allenai"], 

166 "OlmoeForCausalLM": ["allenai"], 

167 "OlmoForCausalLM": ["allenai"], 

168 "OpenELMForCausalLM": ["apple"], 

169 "OPTForCausalLM": ["facebook"], 

170 "Phi3ForCausalLM": ["microsoft"], 

171 "PhiMoEForCausalLM": ["microsoft"], 

172 "PhiForCausalLM": ["microsoft"], 

173 "Qwen2ForCausalLM": ["Qwen", "nvidia"], 

174 "Qwen3ForCausalLM": ["Qwen", "nvidia"], 

175 "Qwen3MoeForCausalLM": ["Qwen"], 

176 "Qwen3NextForCausalLM": ["Qwen"], 

177 "Qwen3_5ForCausalLM": ["Qwen"], 

178 "Qwen3_5ForConditionalGeneration": ["Qwen"], 

179 "QwenForCausalLM": ["Qwen"], 

180 "SmolLM3ForCausalLM": ["HuggingFaceTB"], 

181 "StableLmForCausalLM": ["stabilityai"], 

182 "T5ForConditionalGeneration": ["google-t5", "google", "Salesforce", "MBZUAI"], 

183 "T5GemmaForConditionalGeneration": ["google"], 

184 "XGLMForCausalLM": ["facebook"], 

185} 

186 

187__all__ = [ 

188 # Constants 

189 "HF_SUPPORTED_ARCHITECTURES", 

190 "CANONICAL_AUTHORS_BY_ARCH", 

191 # Exceptions 

192 "ModelRegistryError", 

193 "ModelNotFoundError", 

194 "ArchitectureNotSupportedError", 

195 "DataNotLoadedError", 

196 "DataValidationError", 

197 # Schemas 

198 "ModelEntry", 

199 "ModelMetadata", 

200 "ScanInfo", 

201 "ArchitectureGap", 

202 "ArchitectureStats", 

203 "ArchitectureAnalysis", 

204 "SupportedModelsReport", 

205 "ArchitectureGapsReport", 

206 # Verification 

207 "VerificationRecord", 

208 "VerificationHistory", 

209]