Coverage for transformer_lens/tools/model_registry/__init__.py: 100%

6 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2026-06-09 00:32 +0000

1"""Model Registry tools for TransformerLens. 

2 

3This package provides tools for discovering and documenting HuggingFace models 

4that are compatible with TransformerLens. 

5 

6Main modules: 

7 - api: Public API for programmatic access to model registry data 

8 - schemas: Data classes for model entries, architecture gaps, etc. 

9 - verification: Verification tracking for model compatibility 

10 - exceptions: Custom exceptions for the model registry 

11 

12Example usage: 

13 >>> from transformer_lens.tools.model_registry import api # doctest: +SKIP 

14 >>> api.is_model_supported("openai-community/gpt2") # doctest: +SKIP 

15 True 

16 >>> models = api.get_architecture_models("GPT2LMHeadModel") # doctest: +SKIP 

17""" 

18 

19from .exceptions import ( 

20 ArchitectureNotSupportedError, 

21 DataNotLoadedError, 

22 DataValidationError, 

23 ModelNotFoundError, 

24 ModelRegistryError, 

25) 

26from .schemas import ( 

27 ArchitectureAnalysis, 

28 ArchitectureGap, 

29 ArchitectureGapsReport, 

30 ArchitectureStats, 

31 ModelEntry, 

32 ModelMetadata, 

33 ScanInfo, 

34 SupportedModelsReport, 

35) 

36from .verification import VerificationHistory, VerificationRecord 

37 

38# Canonical set of HuggingFace architecture class names supported by TransformerBridge. 

39# These must match the exact strings found in HF model config.architectures[] 

40# and correspond to adapters registered in architecture_adapter_factory.py. 

41# 

42# Internal-only architectures (NanoGPT, MinGPT, NeelSoluOld, GPT2LMHeadCustomModel, 

43# TransformerLensNative) are excluded since they never appear on HuggingFace Hub. 

44# Factory-internal alias casings (Gemma1, Neo, NeoX) are also excluded since they 

45# route to canonical adapters but HF reports the canonical names (Gemma, GPTNeo, 

46# GPTNeoX) in config.architectures instead. 

47HF_SUPPORTED_ARCHITECTURES: set[str] = { 

48 "ApertusForCausalLM", 

49 "BaiChuanForCausalLM", 

50 "BaichuanForCausalLM", 

51 "BertForMaskedLM", 

52 "BloomForCausalLM", 

53 "CodeGenForCausalLM", 

54 "CohereForCausalLM", 

55 "DeepseekV3ForCausalLM", 

56 "FalconForCausalLM", 

57 "GemmaForCausalLM", 

58 "Gemma2ForCausalLM", 

59 "Gemma3ForCausalLM", 

60 "Gemma3ForConditionalGeneration", 

61 "Gemma3nForConditionalGeneration", 

62 "GraniteForCausalLM", 

63 "GraniteMoeForCausalLM", 

64 "GraniteMoeHybridForCausalLM", 

65 "GPT2LMHeadModel", 

66 "GPTBigCodeForCausalLM", 

67 "GptOssForCausalLM", 

68 "GPTJForCausalLM", 

69 "GPTNeoForCausalLM", 

70 "OpenELMForCausalLM", 

71 "GPTNeoXForCausalLM", 

72 "HubertForCTC", 

73 "HubertModel", 

74 "InternLM2ForCausalLM", 

75 "LlamaForCausalLM", 

76 "LlavaForConditionalGeneration", 

77 "LlavaNextForConditionalGeneration", 

78 "LlavaOnevisionForConditionalGeneration", 

79 "MambaForCausalLM", 

80 "Mamba2ForCausalLM", 

81 "MPTForCausalLM", 

82 "MistralForCausalLM", 

83 "MixtralForCausalLM", 

84 "Olmo2ForCausalLM", 

85 "Olmo3ForCausalLM", 

86 "OlmoForCausalLM", 

87 "OlmoeForCausalLM", 

88 "OPTForCausalLM", 

89 "PhiForCausalLM", 

90 "Phi3ForCausalLM", 

91 "QwenForCausalLM", 

92 "Qwen2ForCausalLM", 

93 "Qwen3ForCausalLM", 

94 "Qwen3MoeForCausalLM", 

95 "Qwen3NextForCausalLM", 

96 "Qwen3_5ForCausalLM", 

97 "Qwen3_5ForConditionalGeneration", 

98 "SmolLM3ForCausalLM", 

99 "StableLmForCausalLM", 

100 "T5ForConditionalGeneration", 

101 "MT5ForConditionalGeneration", 

102 "XGLMForCausalLM", 

103} 

104 

105# Foundation-trained orgs per architecture. Source of truth for the scraper's 

106# download-threshold bypass and the docs table's "Canonical only" toggle. 

107CANONICAL_AUTHORS_BY_ARCH: dict[str, list[str]] = { 

108 "ApertusForCausalLM": ["swiss-ai"], 

109 "BaiChuanForCausalLM": ["baichuan-inc"], 

110 "BaichuanForCausalLM": ["baichuan-inc"], 

111 "BertForMaskedLM": ["google-bert"], 

112 "BloomForCausalLM": ["bigscience"], 

113 "CodeGenForCausalLM": ["Salesforce"], 

114 "CohereForCausalLM": ["CohereLabs"], 

115 "DeepseekV3ForCausalLM": ["deepseek-ai"], 

116 "FalconForCausalLM": ["tiiuae"], 

117 "Gemma2ForCausalLM": ["google"], 

118 "Gemma3ForCausalLM": ["google"], 

119 "Gemma3ForConditionalGeneration": ["google"], 

120 "Gemma3nForConditionalGeneration": ["google"], 

121 "GemmaForCausalLM": ["google"], 

122 "GPT2LMHeadModel": ["openai-community", "stanford-crfm", "Writer"], 

123 "GPTBigCodeForCausalLM": ["bigcode"], 

124 "GptOssForCausalLM": ["openai"], 

125 "GPTJForCausalLM": ["EleutherAI", "togethercomputer"], 

126 "GPTNeoForCausalLM": ["EleutherAI", "roneneldan"], 

127 "GPTNeoXForCausalLM": ["EleutherAI", "cyberagent", "stabilityai", "togethercomputer"], 

128 "GraniteForCausalLM": ["ibm-granite"], 

129 "GraniteMoeForCausalLM": ["ibm-granite"], 

130 "GraniteMoeHybridForCausalLM": ["ibm-granite"], 

131 "HubertForCTC": ["facebook"], 

132 "HubertModel": ["facebook"], 

133 "InternLM2ForCausalLM": ["internlm"], 

134 "LlamaForCausalLM": ["meta-llama", "huggyllama", "codellama", "SimpleStories"], 

135 "LlavaForConditionalGeneration": ["llava-hf"], 

136 "LlavaNextForConditionalGeneration": ["llava-hf"], 

137 "LlavaOnevisionForConditionalGeneration": ["llava-hf"], 

138 "Mamba2ForCausalLM": ["state-spaces"], 

139 "MambaForCausalLM": ["state-spaces"], 

140 "MistralForCausalLM": ["mistralai"], 

141 "MixtralForCausalLM": ["mistralai"], 

142 "MPTForCausalLM": ["mosaicml"], 

143 "MT5ForConditionalGeneration": ["google", "bigscience", "csebuetnlp"], 

144 "Olmo2ForCausalLM": ["allenai", "HPLT"], 

145 "Olmo3ForCausalLM": ["allenai"], 

146 "OlmoeForCausalLM": ["allenai"], 

147 "OlmoForCausalLM": ["allenai"], 

148 "OpenELMForCausalLM": ["apple"], 

149 "OPTForCausalLM": ["facebook"], 

150 "Phi3ForCausalLM": ["microsoft"], 

151 "PhiForCausalLM": ["microsoft"], 

152 "Qwen2ForCausalLM": ["Qwen", "nvidia"], 

153 "Qwen3ForCausalLM": ["Qwen", "nvidia"], 

154 "Qwen3MoeForCausalLM": ["Qwen"], 

155 "Qwen3NextForCausalLM": ["Qwen"], 

156 "Qwen3_5ForCausalLM": ["Qwen"], 

157 "Qwen3_5ForConditionalGeneration": ["Qwen"], 

158 "QwenForCausalLM": ["Qwen"], 

159 "SmolLM3ForCausalLM": ["HuggingFaceTB"], 

160 "StableLmForCausalLM": ["stabilityai"], 

161 "T5ForConditionalGeneration": ["google-t5", "google", "Salesforce", "MBZUAI"], 

162 "XGLMForCausalLM": ["facebook"], 

163} 

164 

165__all__ = [ 

166 # Constants 

167 "HF_SUPPORTED_ARCHITECTURES", 

168 "CANONICAL_AUTHORS_BY_ARCH", 

169 # Exceptions 

170 "ModelRegistryError", 

171 "ModelNotFoundError", 

172 "ArchitectureNotSupportedError", 

173 "DataNotLoadedError", 

174 "DataValidationError", 

175 # Schemas 

176 "ModelEntry", 

177 "ModelMetadata", 

178 "ScanInfo", 

179 "ArchitectureGap", 

180 "ArchitectureStats", 

181 "ArchitectureAnalysis", 

182 "SupportedModelsReport", 

183 "ArchitectureGapsReport", 

184 # Verification 

185 "VerificationRecord", 

186 "VerificationHistory", 

187]