Coverage for transformer_lens/model_bridge/supported_architectures/qwen3_5.py: 96%

21 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2026-06-09 00:32 +0000

1"""Qwen3.5 architecture adapter. 

2 

3Hybrid linear-attention (GatedDeltaNet) + full-attention with dense gated MLP. 

43 linear-attn layers per 1 full-attn layer. Extends Qwen3 base with 

5optional attention mapping and fold_ln disabled. 

6""" 

7 

8from typing import Any 

9 

10import torch 

11 

12from transformer_lens.model_bridge.supported_architectures.qwen3 import ( 

13 Qwen3ArchitectureAdapter, 

14) 

15 

16 

17class Qwen3_5ArchitectureAdapter(Qwen3ArchitectureAdapter): 

18 """Hybrid linear-attention + full-attention with dense gated MLP. 

19 

20 Inherits Qwen3 config/attention/MLP structure. Differences: 

21 - Attention + linear_attn are optional (per-layer type) 

22 - Gated q_proj (2x wide) sliced by preprocess_weights for weight analysis 

23 """ 

24 

25 def __init__(self, cfg: Any) -> None: 

26 setattr(cfg, "gated_q_proj", True) 

27 super().__init__(cfg, hybrid=True) 

28 

29 def prepare_loading(self, model_name: str, model_kwargs: dict) -> None: 

30 """Swap multimodal Qwen3_5Config for text-only Qwen3_5TextConfig. 

31 

32 Published checkpoints carry architectures=['Qwen3_5ForConditionalGeneration']. 

33 We replace config with text_config so AutoModelForCausalLM loads the 

34 text-only Qwen3_5ForCausalLM. 

35 """ 

36 config = model_kwargs.get("config") 

37 if config is not None and hasattr(config, "text_config"): 37 ↛ exitline 37 didn't return from function 'prepare_loading' because the condition on line 37 was always true

38 model_kwargs["config"] = config.text_config 

39 

40 def prepare_model(self, hf_model: Any) -> None: 

41 """Reject full multimodal Qwen3.5 models on this text-only adapter.""" 

42 config = getattr(hf_model, "config", None) 

43 architectures = getattr(config, "architectures", []) or [] 

44 class_name = type(hf_model).__name__ 

45 

46 is_conditional_generation = ( 

47 class_name == "Qwen3_5ForConditionalGeneration" 

48 or "Qwen3_5ForConditionalGeneration" in architectures 

49 ) 

50 still_has_top_level_multimodal_config = hasattr(config, "text_config") 

51 if is_conditional_generation or still_has_top_level_multimodal_config: 

52 raise ValueError( 

53 "Qwen3.5 support in TransformerLens is text-only. Pass a " 

54 "Qwen3_5ForCausalLM / Qwen3_5TextConfig model, or load by model id " 

55 "with TransformerBridge.boot_transformers(...) so the text_config is " 

56 "selected automatically. Qwen3_5ForConditionalGeneration, image/video " 

57 "inputs, and Qwen3.5 MoE are not supported by this adapter." 

58 ) 

59 

60 def preprocess_weights(self, state_dict: dict[str, torch.Tensor]) -> dict[str, torch.Tensor]: 

61 """Slice query half from gated q_proj.weight for weight-space analysis. 

62 

63 In processed mode, W_Q is the pure query projection (for composition 

64 scores, logit lens). Gate signal available in unprocessed mode on 

65 full-attention layers via blocks.N.attn.hook_q_gate. 

66 """ 

67 return self._preprocess_gated_q_proj(state_dict, self.cfg.n_heads, self.cfg.d_head)