Coverage for transformer_lens/model_bridge/supported_architectures/qwen3_5.py: 96%
21 statements
« prev ^ index » next coverage.py v7.10.1, created at 2026-06-09 00:32 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2026-06-09 00:32 +0000
1"""Qwen3.5 architecture adapter.
3Hybrid linear-attention (GatedDeltaNet) + full-attention with dense gated MLP.
43 linear-attn layers per 1 full-attn layer. Extends Qwen3 base with
5optional attention mapping and fold_ln disabled.
6"""
8from typing import Any
10import torch
12from transformer_lens.model_bridge.supported_architectures.qwen3 import (
13 Qwen3ArchitectureAdapter,
14)
17class Qwen3_5ArchitectureAdapter(Qwen3ArchitectureAdapter):
18 """Hybrid linear-attention + full-attention with dense gated MLP.
20 Inherits Qwen3 config/attention/MLP structure. Differences:
21 - Attention + linear_attn are optional (per-layer type)
22 - Gated q_proj (2x wide) sliced by preprocess_weights for weight analysis
23 """
25 def __init__(self, cfg: Any) -> None:
26 setattr(cfg, "gated_q_proj", True)
27 super().__init__(cfg, hybrid=True)
29 def prepare_loading(self, model_name: str, model_kwargs: dict) -> None:
30 """Swap multimodal Qwen3_5Config for text-only Qwen3_5TextConfig.
32 Published checkpoints carry architectures=['Qwen3_5ForConditionalGeneration'].
33 We replace config with text_config so AutoModelForCausalLM loads the
34 text-only Qwen3_5ForCausalLM.
35 """
36 config = model_kwargs.get("config")
37 if config is not None and hasattr(config, "text_config"): 37 ↛ exitline 37 didn't return from function 'prepare_loading' because the condition on line 37 was always true
38 model_kwargs["config"] = config.text_config
40 def prepare_model(self, hf_model: Any) -> None:
41 """Reject full multimodal Qwen3.5 models on this text-only adapter."""
42 config = getattr(hf_model, "config", None)
43 architectures = getattr(config, "architectures", []) or []
44 class_name = type(hf_model).__name__
46 is_conditional_generation = (
47 class_name == "Qwen3_5ForConditionalGeneration"
48 or "Qwen3_5ForConditionalGeneration" in architectures
49 )
50 still_has_top_level_multimodal_config = hasattr(config, "text_config")
51 if is_conditional_generation or still_has_top_level_multimodal_config:
52 raise ValueError(
53 "Qwen3.5 support in TransformerLens is text-only. Pass a "
54 "Qwen3_5ForCausalLM / Qwen3_5TextConfig model, or load by model id "
55 "with TransformerBridge.boot_transformers(...) so the text_config is "
56 "selected automatically. Qwen3_5ForConditionalGeneration, image/video "
57 "inputs, and Qwen3.5 MoE are not supported by this adapter."
58 )
60 def preprocess_weights(self, state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]:
61 """Slice query half from gated q_proj.weight for weight-space analysis.
63 In processed mode, W_Q is the pure query projection (for composition
64 scores, logit lens). Gate signal available in unprocessed mode on
65 full-attention layers via blocks.N.attn.hook_q_gate.
66 """
67 return self._preprocess_gated_q_proj(state_dict, self.cfg.n_heads, self.cfg.d_head)