Coverage for transformer_lens/tools/model_registry/generate

1#!/usr/bin/env python3

2"""Generate a markdown report of supported and unsupported models.

4This script generates a comprehensive report showing:

5- All supported model IDs grouped by architecture

6- Total count of supported models

7- Unsupported architectures with model counts and descriptions

9Usage:

10 python -m transformer_lens.tools.model_registry.generate_report

11 python -m transformer_lens.tools.model_registry.generate_report --output custom_report.md

12 python -m transformer_lens.tools.model_registry.generate_report --help

13"""

15import argparse

16from datetime import datetime

17from pathlib import Path

19from .api import (

20 get_registry_stats,

21 get_supported_architectures,

22 get_supported_models,

23 get_unsupported_architectures,

24)

26# Descriptions of common architectures (both supported and unsupported)

27ARCHITECTURE_DESCRIPTIONS: dict[str, str] = {

28 # Supported architectures

29 "GPT2LMHeadModel": "OpenAI's GPT-2 decoder-only transformer for causal language modeling",

30 "GPTNeoForCausalLM": "EleutherAI's GPT-Neo, an open-source GPT-3-like model",

31 "GPTNeoXForCausalLM": "EleutherAI's GPT-NeoX architecture used in Pythia models",

32 "GPTJForCausalLM": "EleutherAI's GPT-J 6B parameter model",

33 "LlamaForCausalLM": "Meta's LLaMA architecture, basis for many open models",

34 "MistralForCausalLM": "Mistral AI's efficient 7B parameter model with sliding window attention",

35 "MixtralForCausalLM": "Mistral AI's Mixture of Experts model",

36 "GemmaForCausalLM": "Google's Gemma lightweight open model family",

37 "Gemma2ForCausalLM": "Google's Gemma 2 with improved architecture",

38 "Gemma3ForCausalLM": "Google's Gemma 3 latest generation",

39 "Qwen2ForCausalLM": "Alibaba's Qwen2 multilingual model",

40 "Qwen3ForCausalLM": "Alibaba's Qwen3 latest generation",

41 "BloomForCausalLM": "BigScience's BLOOM multilingual model",

42 "OPTForCausalLM": "Meta's Open Pre-trained Transformer",

43 "PhiForCausalLM": "Microsoft's Phi small language model",

44 "Phi3ForCausalLM": "Microsoft's Phi-3 improved small model",

45 "FalconForCausalLM": "TII's Falcon model series",

46 "OlmoForCausalLM": "Allen AI's OLMo open language model",

47 "Olmo2ForCausalLM": "Allen AI's OLMo 2 with improved training",

48 "Olmo3ForCausalLM": "Allen AI's OLMo 3 latest generation",

49 "OlmoeForCausalLM": "Allen AI's OLMoE Mixture of Experts model",

50 "StableLmForCausalLM": "Stability AI's StableLM model",

51 "T5ForConditionalGeneration": "Google's T5 encoder-decoder model (partial support)",

52 # Unsupported architectures

53 "BertModel": "Google's BERT bidirectional encoder for understanding tasks",

54 "BertForMaskedLM": "BERT with masked language modeling head",

55 "BertForSequenceClassification": "BERT fine-tuned for classification",

56 "RobertaModel": "Facebook's RoBERTa, optimized BERT training",

57 "RobertaForMaskedLM": "RoBERTa with masked language modeling head",

58 "DistilBertModel": "Distilled version of BERT, 40% smaller",

59 "AlbertModel": "A Lite BERT with parameter sharing",

60 "XLNetLMHeadModel": "Google/CMU's XLNet with permutation language modeling",

61 "ElectraModel": "Google's ELECTRA with replaced token detection",

62 "DebertaModel": "Microsoft's DeBERTa with disentangled attention",

63 "DebertaV2Model": "DeBERTa version 2 with improved architecture",

64 "MPNetModel": "Microsoft's MPNet combining MLM and PLM",

65 "LongformerModel": "Allen AI's Longformer for long documents",

66 "BigBirdModel": "Google's BigBird with sparse attention",

67 "ReformerModel": "Google's Reformer with locality-sensitive hashing",

68 "BartForConditionalGeneration": "Facebook's BART encoder-decoder model",

69 "MBartForConditionalGeneration": "Multilingual BART",

70 "PegasusForConditionalGeneration": "Google's PEGASUS for summarization",

71 "MT5ForConditionalGeneration": "Multilingual T5",

72 "WhisperForConditionalGeneration": "OpenAI's Whisper speech recognition",

73 "CLIPModel": "OpenAI's CLIP vision-language model",

74 "ViTModel": "Google's Vision Transformer",

75 "SwinModel": "Microsoft's Swin Transformer for vision",

76 "DeiTModel": "Facebook's Data-efficient Image Transformer",

77 "BeitModel": "Microsoft's BERT pre-training for images",

78 "ConvNextModel": "Facebook's ConvNeXt modernized ConvNet",

79 "SegformerModel": "NVIDIA's SegFormer for segmentation",

80 "Wav2Vec2Model": "Facebook's Wav2Vec 2.0 for speech",

81 "HubertModel": "Facebook's HuBERT for speech",

82 "SpeechT5Model": "Microsoft's SpeechT5 for speech tasks",

83 "BlipModel": "Salesforce's BLIP vision-language model",

84 "Blip2Model": "Salesforce's BLIP-2 with frozen LLM",

85 "LlavaForConditionalGeneration": "Visual instruction-tuned LLaMA",

86 "GitModel": "Microsoft's GIT for vision-language",

87 "PaliGemmaForConditionalGeneration": "Google's PaliGemma vision-language",

88 "CohereForCausalLM": "Cohere's Command models",

89 "DeepseekForCausalLM": "DeepSeek's open models",

90 "InternLMForCausalLM": "Shanghai AI Lab's InternLM",

91 "BaichuanForCausalLM": "Baichuan's Chinese-focused models",

92 "YiForCausalLM": "01.AI's Yi model series",

93 "OrionForCausalLM": "OrionStar's Orion models",

94 "StarcoderForCausalLM": "BigCode's StarCoder for code",

95 "CodeLlamaForCausalLM": "Meta's Code Llama for programming",

96 "CodeGenForCausalLM": "Salesforce's CodeGen models",

97 "SantacoderForCausalLM": "BigCode's SantaCoder",

98}

100

101def get_architecture_description(arch_id: str) -> str:

102 """Get a description for an architecture, with fallback."""

103 if arch_id in ARCHITECTURE_DESCRIPTIONS:

104 return ARCHITECTURE_DESCRIPTIONS[arch_id]

105

106 # Generate a basic description from the name

107 if "ForCausalLM" in arch_id:

108 base = arch_id.replace("ForCausalLM", "")

109 return f"{base} architecture for causal language modeling"

110 elif "ForConditionalGeneration" in arch_id:

111 base = arch_id.replace("ForConditionalGeneration", "")

112 return f"{base} encoder-decoder for conditional generation"

113 elif "ForMaskedLM" in arch_id:

114 base = arch_id.replace("ForMaskedLM", "")

115 return f"{base} with masked language modeling head"

116 elif "ForSequenceClassification" in arch_id:

117 base = arch_id.replace("ForSequenceClassification", "")

118 return f"{base} fine-tuned for sequence classification"

119 elif "Model" in arch_id:

120 base = arch_id.replace("Model", "")

121 return f"{base} base model architecture"

122 else:

123 return "Transformer architecture"

124

125

126def generate_report(output_path: Path | None = None) -> str:

127 """Generate the markdown report.

128

129 Args:

130 output_path: Optional path to write the report. If None, only returns the string.

131

132 Returns:

133 The generated markdown report as a string.

134 """

135 # Gather data

136 models = get_supported_models()

137 architectures = get_supported_architectures()

138 gaps = get_unsupported_architectures()

139 stats = get_registry_stats()

140

141 # Group models by architecture

142 models_by_arch: dict[str, list[str]] = {}

143 for model in models:

144 arch = model.architecture_id

145 if arch not in models_by_arch:

146 models_by_arch[arch] = []

147 models_by_arch[arch].append(model.model_id)

148

149 # Sort models within each architecture

150 for arch in models_by_arch:

151 models_by_arch[arch].sort()

152

153 # Calculate totals

154 total_supported = len(models)

155 total_unsupported = sum(g.total_models for g in gaps)

156 total_all = total_supported + total_unsupported

157

158 # Build report

159 lines = []

160 lines.append("# TransformerLens Model Compatibility Report")

161 lines.append("")

162 lines.append(f"*Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}*")

163 lines.append("")

164

165 # Summary

166 lines.append("## Summary")

167 lines.append("")

168 lines.append(f"| Metric | Count |")

169 lines.append(f"|--------|-------|")

170 lines.append(f"| Supported Models | {total_supported:,} |")

171 lines.append(f"| Supported Architectures | {len(architectures)} |")

172 lines.append(f"| Verified Models | {stats['total_verified']} |")

173 lines.append(f"| Unsupported Architectures | {len(gaps)} |")

174 lines.append(f"| Models in Unsupported Architectures | {total_unsupported:,} |")

175 lines.append(f"| **Total Potential Models** | **{total_all:,}** |")

176 lines.append("")

177

178 # Supported models section

179 lines.append("## Supported Models")

180 lines.append("")

181 lines.append(

182 f"TransformerLens supports **{total_supported:,} models** across **{len(architectures)} architectures**."

183 )

184 lines.append("")

185

186 for arch in sorted(models_by_arch.keys()):

187 model_list = models_by_arch[arch]

188 desc = get_architecture_description(arch)

189 lines.append(f"### {arch}")

190 lines.append("")

191 lines.append(f"*{desc}*")

192 lines.append("")

193 lines.append(f"**{len(model_list)} models:**")

194 lines.append("")

195 for model_id in model_list:

196 # Check if verified

197 model_entry = next((m for m in models if m.model_id == model_id), None)

198 verified_badge = " ✓" if model_entry and model_entry.status == 1 else ""

199 lines.append(f"- `{model_id}`{verified_badge}")

200 lines.append("")

201

202 # Unsupported architectures section

203 lines.append("## Unsupported Architectures")

204 lines.append("")

205 lines.append(

206 f"The following **{len(gaps)} architectures** are not yet supported by TransformerLens,"

207 )

208 lines.append(f"representing **{total_unsupported:,} models** on HuggingFace.")

209 lines.append("")

210 lines.append("| Architecture | Models | Description |")

211 lines.append("|--------------|--------|-------------|")

212

213 for gap in gaps:

214 desc = get_architecture_description(gap.architecture_id)

215 lines.append(f"| `{gap.architecture_id}` | {gap.total_models:,} | {desc} |")

216

217 lines.append("")

218

219 # Footer

220 lines.append("---")

221 lines.append("")

222 lines.append(

223 "*Report generated by `python -m transformer_lens.tools.model_registry.generate_report`*"

224 )

225 lines.append("")

226 lines.append("✓ = Verified to work with TransformerLens")

227

228 report = "\n".join(lines)

229

230 # Write to file if path provided

231 if output_path:

232 output_path.write_text(report)

233 print(f"Report written to: {output_path}")

234

235 return report

236

237

238def main():

239 """CLI entry point."""

240 parser = argparse.ArgumentParser(

241 description="Generate a markdown report of TransformerLens model compatibility.",

242 formatter_class=argparse.RawDescriptionHelpFormatter,

243 epilog="""

244Examples:

245 # Generate report to default location (MODEL_COMPATIBILITY_REPORT.md)

246 python -m transformer_lens.tools.model_registry.generate_report

247

248 # Generate report to custom location

249 python -m transformer_lens.tools.model_registry.generate_report -o my_report.md

250

251 # Print report to stdout only

252 python -m transformer_lens.tools.model_registry.generate_report --stdout

253""",

254 )

255 parser.add_argument(

256 "-o",

257 "--output",

258 type=Path,

259 default=None,

260 help="Output file path (default: MODEL_COMPATIBILITY_REPORT.md in current directory)",

261 )

262 parser.add_argument(

263 "--stdout",

264 action="store_true",

265 help="Print report to stdout instead of writing to file",

266 )

267

268 args = parser.parse_args()

269

270 if args.stdout:

271 report = generate_report()

272 print(report)

273 else:

274 output_path = args.output or Path("MODEL_COMPATIBILITY_REPORT.md")

275 generate_report(output_path)

276

277

278if __name__ == "__main__":

279 main()

Coverage for transformer_lens/tools/model_registry/generate_report.py: 0%

107 statements