Index _ | A | B | C | D | E | F | G | H | I | K | L | M | N | O | P | Q | R | S | T | U | V | W _ __init__() (transformer_lens.components.abstract_attention.AbstractAttention method) (transformer_lens.components.attention.Attention method) (transformer_lens.components.grouped_query_attention.GroupedQueryAttention method) (transformer_lens.components.layer_norm.LayerNorm method) (transformer_lens.components.layer_norm_pre.LayerNormPre method) (transformer_lens.components.rms_norm.RMSNorm method) (transformer_lens.components.rms_norm_pre.RMSNormPre method) (transformer_lens.HookedTransformer.HookedTransformer method) (transformer_lens.utils.LocallyOverridenDefaults method) (transformer_lens.utils.Slice method) A AB (transformer_lens.FactoredMatrix.FactoredMatrix property) AbstractAttention (class in transformer_lens.components.abstract_attention) accumulated_bias() (transformer_lens.HookedTransformer.HookedTransformer method) accumulated_resid() (transformer_lens.ActivationCache.ActivationCache method) act_fn (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) ActivationCache (class in transformer_lens.ActivationCache) add_caching_hooks() (transformer_lens.hook_points.HookedRootModule method) add_hook() (transformer_lens.hook_points.HookedRootModule method) (transformer_lens.hook_points.HookPoint method) add_perma_hook() (transformer_lens.hook_points.HookedRootModule method) (transformer_lens.hook_points.HookPoint method) alibi (transformer_lens.components.abstract_attention.AbstractAttention attribute) all_composition_scores() (transformer_lens.HookedTransformer.HookedTransformer method) all_head_labels() (transformer_lens.HookedEncoder.HookedEncoder method) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder method) (transformer_lens.HookedTransformer.HookedTransformer method) append() (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCacheEntry method) append_attention_mask() (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCache method) apply() (transformer_lens.utils.Slice method) apply_causal_mask() (transformer_lens.components.abstract_attention.AbstractAttention method) apply_ln_to_stack() (transformer_lens.ActivationCache.ActivationCache method) apply_mlp() (transformer_lens.components.transformer_block.TransformerBlock method) apply_rotary() (transformer_lens.components.abstract_attention.AbstractAttention method) apply_slice_to_batch_dim() (transformer_lens.ActivationCache.ActivationCache method) Attention (class in transformer_lens.components.attention) attention_dir (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) attn_only (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) attn_scale (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) attn_scores_soft_cap (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) attn_types (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) B b_in (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) b_K (transformer_lens.components.grouped_query_attention.GroupedQueryAttention property) (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) b_O (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) b_out (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) b_Q (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) b_U (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) b_V (transformer_lens.components.grouped_query_attention.GroupedQueryAttention property) (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) BA (transformer_lens.FactoredMatrix.FactoredMatrix property) batch_addmm() (in module transformer_lens.utilities.addmm) batch_size (transformer_lens.train.HookedTransformerTrainConfig attribute) BertBlock (class in transformer_lens.components.bert_block) BertEmbed (class in transformer_lens.components.bert_embed) BertMLMHead (class in transformer_lens.components.bert_mlm_head) C cache_all() (transformer_lens.hook_points.HookedRootModule method) cache_some() (transformer_lens.hook_points.HookedRootModule method) calc_fan_in_and_fan_out() (in module transformer_lens.utils) calculate_attention_scores() (transformer_lens.components.abstract_attention.AbstractAttention method) (transformer_lens.components.grouped_query_attention.GroupedQueryAttention method) calculate_qkv_matrices() (transformer_lens.components.abstract_attention.AbstractAttention method) (transformer_lens.components.grouped_query_attention.GroupedQueryAttention method) calculate_sin_cos_rotary() (transformer_lens.components.abstract_attention.AbstractAttention method) calculate_z_scores() (transformer_lens.components.abstract_attention.AbstractAttention method) (transformer_lens.components.grouped_query_attention.GroupedQueryAttention method) center_unembed() (transformer_lens.HookedTransformer.HookedTransformer method) center_writing_weights() (transformer_lens.HookedTransformer.HookedTransformer method) check_and_add_hook() (transformer_lens.hook_points.HookedRootModule method) check_hooks_to_add() (transformer_lens.hook_points.HookedRootModule method) (transformer_lens.HookedTransformer.HookedTransformer method) checkpoint_index (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) checkpoint_label_type (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) checkpoint_value (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) clear_context() (transformer_lens.hook_points.HookPoint method) clear_contexts() (transformer_lens.hook_points.HookedRootModule method) collapse_l() (transformer_lens.FactoredMatrix.FactoredMatrix method) collapse_r() (transformer_lens.FactoredMatrix.FactoredMatrix method) complex_attn_linear() (in module transformer_lens.utilities.attention) composition_scores() (in module transformer_lens.utils) compute_head_attention_similarity_score() (in module transformer_lens.head_detector) compute_head_results() (transformer_lens.ActivationCache.ActivationCache method) compute_relative_attention_bias() (transformer_lens.components.t5_attention.T5Attention method) Config (class in transformer_lens.loading_from_pretrained) context_level (transformer_lens.hook_points.LensHandle attribute) convert_bloom_weights() (in module transformer_lens.pretrained.weight_conversions.bloom) convert_coder_weights() (in module transformer_lens.pretrained.weight_conversions.coder) convert_mistral_weights() (in module transformer_lens.pretrained.weight_conversions.mistral) convert_mixtral_weights() (in module transformer_lens.pretrained.weight_conversions.mixtral) convert_phi3_weights() (in module transformer_lens.pretrained.weight_conversions.phi3) convert_phi_weights() (in module transformer_lens.pretrained.weight_conversions.phi) convert_qwen2_weights() (in module transformer_lens.pretrained.weight_conversions.qwen2) convert_qwen_weights() (in module transformer_lens.pretrained.weight_conversions.qwen) convert_t5_weights() (in module transformer_lens.pretrained.weight_conversions.t5) cpu() (transformer_lens.HookedEncoder.HookedEncoder method) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder method) (transformer_lens.HookedTransformer.HookedTransformer method) create_alibi_bias() (transformer_lens.components.abstract_attention.AbstractAttention static method) create_alibi_multipliers() (transformer_lens.components.abstract_attention.AbstractAttention static method) create_alibi_slope() (transformer_lens.components.abstract_attention.AbstractAttention static method) cuda() (transformer_lens.HookedEncoder.HookedEncoder method) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder method) (transformer_lens.HookedTransformer.HookedTransformer method) D d_head (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) (transformer_lens.loading_from_pretrained.Config attribute) d_mlp (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) (transformer_lens.loading_from_pretrained.Config attribute) d_model (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) (transformer_lens.loading_from_pretrained.Config attribute) d_vocab (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) (transformer_lens.loading_from_pretrained.Config attribute) d_vocab_out (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) debug (transformer_lens.loading_from_pretrained.Config attribute) decoder_start_token_id (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) decompose_resid() (transformer_lens.ActivationCache.ActivationCache method) default_prepend_bos (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) detect_head() (in module transformer_lens.head_detector) device (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) (transformer_lens.train.HookedTransformerTrainConfig attribute) download_file_from_hf() (in module transformer_lens.utils) dtype (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) E eigenvalues (transformer_lens.FactoredMatrix.FactoredMatrix property) Embed (class in transformer_lens.components.embed) entries (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCache attribute) eps (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) evaluate() (in module transformer_lens.evals) evaluate_on_dataset() (in module transformer_lens.evals) experts_per_token (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) F FactoredMatrix (class in transformer_lens.FactoredMatrix) final_rms (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) fold_layer_norm() (transformer_lens.HookedTransformer.HookedTransformer method) fold_value_biases() (transformer_lens.HookedTransformer.HookedTransformer method) forward() (transformer_lens.components.abstract_attention.AbstractAttention method) (transformer_lens.components.bert_block.BertBlock method) (transformer_lens.components.bert_embed.BertEmbed method) (transformer_lens.components.bert_mlm_head.BertMLMHead method) (transformer_lens.components.embed.Embed method) (transformer_lens.components.layer_norm.LayerNorm method) (transformer_lens.components.layer_norm_pre.LayerNormPre method) (transformer_lens.components.pos_embed.PosEmbed method) (transformer_lens.components.rms_norm.RMSNorm method) (transformer_lens.components.rms_norm_pre.RMSNormPre method) (transformer_lens.components.t5_block.T5Block method) (transformer_lens.components.token_typed_embed.TokenTypeEmbed method) (transformer_lens.components.transformer_block.TransformerBlock method) (transformer_lens.components.unembed.Unembed method) (transformer_lens.hook_points.HookPoint method) (transformer_lens.HookedEncoder.HookedEncoder method) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder method) (transformer_lens.HookedTransformer.HookedTransformer method) freeze() (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCache method) from_checkpoint (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) from_dict() (transformer_lens.HookedTransformerConfig.HookedTransformerConfig class method) from_pretrained() (transformer_lens.HookedEncoder.HookedEncoder class method) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder class method) (transformer_lens.HookedTransformer.HookedTransformer class method) from_pretrained_no_processing() (transformer_lens.HookedTransformer.HookedTransformer class method) frozen (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCache attribute) (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCacheEntry attribute) G gated_mlp (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) gelu_fast() (in module transformer_lens.utils) gelu_new() (in module transformer_lens.utils) generate() (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder method) (transformer_lens.HookedTransformer.HookedTransformer method) generic_activation_patch() (in module transformer_lens.patching) get_act_name() (in module transformer_lens.utils) get_act_patch_attn_head_all_pos_every() (in module transformer_lens.patching) get_act_patch_attn_head_by_pos_every() (in module transformer_lens.patching) get_act_patch_attn_head_k_all_pos() (in module transformer_lens.patching) get_act_patch_attn_head_k_by_pos() (in module transformer_lens.patching) get_act_patch_attn_head_out_all_pos() (in module transformer_lens.patching) get_act_patch_attn_head_out_by_pos() (in module transformer_lens.patching) get_act_patch_attn_head_pattern_all_pos() (in module transformer_lens.patching) get_act_patch_attn_head_pattern_by_pos() (in module transformer_lens.patching) get_act_patch_attn_head_pattern_dest_src_pos() (in module transformer_lens.patching) get_act_patch_attn_head_q_all_pos() (in module transformer_lens.patching) get_act_patch_attn_head_q_by_pos() (in module transformer_lens.patching) get_act_patch_attn_head_v_all_pos() (in module transformer_lens.patching) get_act_patch_attn_head_v_by_pos() (in module transformer_lens.patching) get_act_patch_attn_out() (in module transformer_lens.patching) get_act_patch_block_every() (in module transformer_lens.patching) get_act_patch_mlp_out() (in module transformer_lens.patching) get_act_patch_resid_mid() (in module transformer_lens.patching) get_act_patch_resid_pre() (in module transformer_lens.patching) get_attention_mask() (in module transformer_lens.utils) get_caching_hooks() (transformer_lens.hook_points.HookedRootModule method) get_checkpoint_labels() (in module transformer_lens.loading_from_pretrained) get_corner() (in module transformer_lens.utils) (transformer_lens.FactoredMatrix.FactoredMatrix method) get_cumsum_along_dim() (in module transformer_lens.utils) get_dataset() (in module transformer_lens.utils) get_default_names() (transformer_lens.evals.IOIDataset static method) get_default_nouns() (transformer_lens.evals.IOIDataset static method) get_default_templates() (transformer_lens.evals.IOIDataset static method) get_device() (in module transformer_lens.utils) get_device_for_block_index() (in module transformer_lens.utilities.devices) get_duplicate_token_head_detection_pattern() (in module transformer_lens.head_detector) get_full_resid_decomposition() (transformer_lens.ActivationCache.ActivationCache method) get_induction_head_detection_pattern() (in module transformer_lens.head_detector) get_input_with_manually_prepended_bos() (in module transformer_lens.utils) get_nested_attr() (in module transformer_lens.utils) get_neuron_results() (transformer_lens.ActivationCache.ActivationCache method) get_num_params_of_pretrained() (in module transformer_lens.loading_from_pretrained) get_offset_position_ids() (in module transformer_lens.utils) get_pretrained_model_config() (in module transformer_lens.loading_from_pretrained) get_previous_token_head_detection_pattern() (in module transformer_lens.head_detector) get_sample() (transformer_lens.evals.IOIDataset method) get_singular_vectors() (transformer_lens.SVDInterpreter.SVDInterpreter method) get_supported_heads() (in module transformer_lens.head_detector) get_token_position() (transformer_lens.HookedTransformer.HookedTransformer method) get_tokenizer_with_bos() (in module transformer_lens.utils) get_tokens_with_bos_removed() (in module transformer_lens.utils) GroupedQueryAttention (class in transformer_lens.components.grouped_query_attention) H hook (transformer_lens.hook_points.LensHandle attribute) hook_dict (transformer_lens.hook_points.HookedRootModule attribute) hook_points() (transformer_lens.hook_points.HookedRootModule method) HookedEncoder (class in transformer_lens.HookedEncoder) HookedEncoderDecoder (class in transformer_lens.HookedEncoderDecoder) HookedRootModule (class in transformer_lens.hook_points) HookedTransformer (class in transformer_lens.HookedTransformer) HookedTransformerConfig (class in transformer_lens.HookedTransformerConfig) HookedTransformerKeyValueCache (class in transformer_lens.past_key_value_caching) HookedTransformerKeyValueCacheEntry (class in transformer_lens.past_key_value_caching) HookedTransformerTrainConfig (class in transformer_lens.train) HookFunction (in module transformer_lens.hook_points) HookPoint (class in transformer_lens.hook_points) hooks() (transformer_lens.hook_points.HookedRootModule method) I indices() (transformer_lens.utils.Slice method) induction_loss() (in module transformer_lens.evals) init_cache() (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCache class method) init_cache_entry() (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCacheEntry class method) init_kaiming_normal_() (in module transformer_lens.utils) init_kaiming_uniform_() (in module transformer_lens.utils) init_mode (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) init_range (transformer_lens.loading_from_pretrained.Config attribute) init_weights (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) init_weights() (transformer_lens.HookedTransformer.HookedTransformer method) init_xavier_normal_() (in module transformer_lens.utils) init_xavier_uniform_() (in module transformer_lens.utils) initializer_range (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) input_to_embed() (transformer_lens.HookedTransformer.HookedTransformer method) ioi_eval() (in module transformer_lens.evals) IOIDataset (class in transformer_lens.evals) is_layer_norm_activation() (transformer_lens.HookedTransformerConfig.HookedTransformerConfig method) is_lower_triangular() (in module transformer_lens.utils) is_permanent (transformer_lens.hook_points.LensHandle attribute) is_square() (in module transformer_lens.utils) items() (transformer_lens.ActivationCache.ActivationCache method) K keep_single_column() (in module transformer_lens.utils) keys() (transformer_lens.ActivationCache.ActivationCache method) L layer() (transformer_lens.hook_points.HookPoint method) layer_head_dest_src_pos_pattern_patch_setter() (in module transformer_lens.patching) layer_head_pattern_patch_setter() (in module transformer_lens.patching) layer_head_pos_pattern_patch_setter() (in module transformer_lens.patching) layer_head_vector_patch_setter() (in module transformer_lens.patching) layer_norm_eps (transformer_lens.loading_from_pretrained.Config attribute) layer_pos_head_vector_patch_setter() (in module transformer_lens.patching) layer_pos_patch_setter() (in module transformer_lens.patching) LayerNorm (class in transformer_lens.components.layer_norm) LayerNormPre (class in transformer_lens.components.layer_norm_pre) LensHandle (class in transformer_lens.hook_points) lm_accuracy() (in module transformer_lens.utils) lm_cross_entropy_loss() (in module transformer_lens.utils) ln1 (transformer_lens.components.transformer_block.TransformerBlock attribute) ln2 (transformer_lens.components.transformer_block.TransformerBlock attribute) ln_final (transformer_lens.HookedTransformer.HookedTransformer attribute) load_and_process_state_dict() (transformer_lens.HookedTransformer.HookedTransformer method) load_in_4bit (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) load_sample_training_dataset() (transformer_lens.HookedTransformer.HookedTransformer method) LocallyOverridenDefaults (class in transformer_lens.utils) logit_attrs() (transformer_lens.ActivationCache.ActivationCache method) logits (transformer_lens.HookedTransformer.Output attribute) loss (transformer_lens.HookedTransformer.Output attribute) loss_fn() (transformer_lens.HookedTransformer.HookedTransformer method) lr (transformer_lens.train.HookedTransformerTrainConfig attribute) M make_code_data_loader() (in module transformer_lens.evals) make_even() (transformer_lens.FactoredMatrix.FactoredMatrix method) make_owt_data_loader() (in module transformer_lens.evals) make_pile_data_loader() (in module transformer_lens.evals) make_wiki_data_loader() (in module transformer_lens.evals) max_grad_norm (transformer_lens.train.HookedTransformerTrainConfig attribute) max_steps (transformer_lens.train.HookedTransformerTrainConfig attribute) mlp (transformer_lens.components.transformer_block.TransformerBlock attribute) mod_dict (transformer_lens.hook_points.HookedRootModule attribute) MODEL_ALIASES (in module transformer_lens.loading_from_pretrained) model_name (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) module transformer_lens.ActivationCache transformer_lens.components.abstract_attention transformer_lens.components.attention transformer_lens.components.bert_block transformer_lens.components.bert_embed transformer_lens.components.bert_mlm_head transformer_lens.components.embed transformer_lens.components.grouped_query_attention transformer_lens.components.layer_norm transformer_lens.components.layer_norm_pre transformer_lens.components.pos_embed transformer_lens.components.rms_norm transformer_lens.components.rms_norm_pre transformer_lens.components.t5_attention transformer_lens.components.t5_block transformer_lens.components.token_typed_embed transformer_lens.components.transformer_block transformer_lens.components.unembed transformer_lens.evals transformer_lens.FactoredMatrix transformer_lens.head_detector transformer_lens.hook_points transformer_lens.HookedEncoder transformer_lens.HookedEncoderDecoder transformer_lens.HookedTransformer transformer_lens.HookedTransformerConfig transformer_lens.loading_from_pretrained transformer_lens.past_key_value_caching transformer_lens.patching transformer_lens.pretrained.weight_conversions.bert transformer_lens.pretrained.weight_conversions.bloom transformer_lens.pretrained.weight_conversions.coder transformer_lens.pretrained.weight_conversions.gemma transformer_lens.pretrained.weight_conversions.gpt2 transformer_lens.pretrained.weight_conversions.gptj transformer_lens.pretrained.weight_conversions.llama transformer_lens.pretrained.weight_conversions.mingpt transformer_lens.pretrained.weight_conversions.mistral transformer_lens.pretrained.weight_conversions.mixtral transformer_lens.pretrained.weight_conversions.nanogpt transformer_lens.pretrained.weight_conversions.neel_solu_old transformer_lens.pretrained.weight_conversions.neo transformer_lens.pretrained.weight_conversions.neox transformer_lens.pretrained.weight_conversions.opt transformer_lens.pretrained.weight_conversions.phi transformer_lens.pretrained.weight_conversions.phi3 transformer_lens.pretrained.weight_conversions.qwen transformer_lens.pretrained.weight_conversions.qwen2 transformer_lens.pretrained.weight_conversions.t5 transformer_lens.SVDInterpreter transformer_lens.train transformer_lens.utilities.activation_functions transformer_lens.utilities.addmm transformer_lens.utilities.attention transformer_lens.utilities.devices transformer_lens.utils momentum (transformer_lens.train.HookedTransformerTrainConfig attribute) move_model_modules_to_device() (transformer_lens.HookedTransformer.HookedTransformer method) move_to_and_update_config() (in module transformer_lens.utilities.devices) mps() (transformer_lens.HookedEncoder.HookedEncoder method) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder method) (transformer_lens.HookedTransformer.HookedTransformer method) N n_ctx (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) (transformer_lens.loading_from_pretrained.Config attribute) n_devices (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) n_heads (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) (transformer_lens.loading_from_pretrained.Config attribute) n_key_value_heads (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) n_layers (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) (transformer_lens.loading_from_pretrained.Config attribute) n_params (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) name (transformer_lens.hook_points.HookedRootModule attribute) ndim (transformer_lens.FactoredMatrix.FactoredMatrix property) NON_HF_HOSTED_MODEL_NAMES (in module transformer_lens.loading_from_pretrained) norm() (transformer_lens.FactoredMatrix.FactoredMatrix method) normalization_type (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) NTK_by_parts_factor (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) NTK_by_parts_high_freq_factor (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) NTK_by_parts_low_freq_factor (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) num_epochs (transformer_lens.train.HookedTransformerTrainConfig attribute) num_experts (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) O OFFICIAL_MODEL_NAMES (in module transformer_lens.loading_from_pretrained) optimizer_name (transformer_lens.train.HookedTransformerTrainConfig attribute) original_architecture (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) Output (class in transformer_lens.HookedTransformer) output_logits_soft_cap (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) OV (transformer_lens.components.abstract_attention.AbstractAttention property) (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) override_or_use_default_value() (in module transformer_lens.utils) P pair (transformer_lens.FactoredMatrix.FactoredMatrix property) parallel_attn_mlp (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) past_keys (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCacheEntry attribute) past_values (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCacheEntry attribute) PosEmbed (class in transformer_lens.components.pos_embed) positional_embedding_type (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) post_embedding_ln (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) previous_attention_mask (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCache attribute) print_every (transformer_lens.train.HookedTransformerTrainConfig attribute) print_gpu_mem() (in module transformer_lens.utils) process_weights_() (transformer_lens.HookedTransformer.HookedTransformer method) Q QK (transformer_lens.components.abstract_attention.AbstractAttention property) (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) R refactor_factored_attn_matrices() (transformer_lens.HookedTransformer.HookedTransformer method) relative_attention_max_distance (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) relative_attention_num_buckets (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) remove_all_hook_fns() (transformer_lens.hook_points.HookedRootModule method) remove_batch_dim() (in module transformer_lens.utils) (transformer_lens.ActivationCache.ActivationCache method) remove_hooks() (transformer_lens.hook_points.HookPoint method) repeat_along_head_dimension() (in module transformer_lens.utils) reset_hooks() (transformer_lens.hook_points.HookedRootModule method) RMSNorm (class in transformer_lens.components.rms_norm) RMSNormPre (class in transformer_lens.components.rms_norm_pre) rotary_adjacent_pairs (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) rotary_base (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) rotary_dim (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) rotate_every_two() (transformer_lens.components.abstract_attention.AbstractAttention method) run_with_cache() (transformer_lens.hook_points.HookedRootModule method) (transformer_lens.HookedEncoder.HookedEncoder method) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder method) (transformer_lens.HookedTransformer.HookedTransformer method) run_with_hooks() (transformer_lens.hook_points.HookedRootModule method) S S (transformer_lens.FactoredMatrix.FactoredMatrix property) sample_datapoint() (transformer_lens.HookedTransformer.HookedTransformer method) sample_logits() (in module transformer_lens.utils) sanity_check() (in module transformer_lens.evals) save_dir (transformer_lens.train.HookedTransformerTrainConfig attribute) save_every (transformer_lens.train.HookedTransformerTrainConfig attribute) scale_attn_by_inverse_layer_idx (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) seed (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) (transformer_lens.train.HookedTransformerTrainConfig attribute) set_nested_attr() (in module transformer_lens.utils) set_seed_everywhere() (transformer_lens.HookedTransformerConfig.HookedTransformerConfig method) set_tokenizer() (transformer_lens.HookedTransformer.HookedTransformer method) set_ungroup_grouped_query_attention() (transformer_lens.HookedTransformer.HookedTransformer method) set_use_attn_in() (transformer_lens.HookedTransformer.HookedTransformer method) set_use_attn_result() (transformer_lens.HookedTransformer.HookedTransformer method) set_use_hook_mlp_in() (transformer_lens.HookedTransformer.HookedTransformer method) set_use_split_qkv_input() (transformer_lens.HookedTransformer.HookedTransformer method) setup() (transformer_lens.hook_points.HookedRootModule method) simple_attn_linear() (in module transformer_lens.utilities.attention) Slice (class in transformer_lens.utils) slice (transformer_lens.utils.Slice attribute) SliceInput (in module transformer_lens.utils) solu() (in module transformer_lens.utils) stack_activation() (transformer_lens.ActivationCache.ActivationCache method) stack_head_results() (transformer_lens.ActivationCache.ActivationCache method) stack_neuron_results() (transformer_lens.ActivationCache.ActivationCache method) svd() (transformer_lens.FactoredMatrix.FactoredMatrix method) SVDInterpreter (class in transformer_lens.SVDInterpreter) T T (transformer_lens.FactoredMatrix.FactoredMatrix property) T5Attention (class in transformer_lens.components.t5_attention) T5Block (class in transformer_lens.components.t5_block) test_prompt() (in module transformer_lens.utils) tie_word_embeddings (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) to() (transformer_lens.ActivationCache.ActivationCache method) (transformer_lens.HookedEncoder.HookedEncoder method) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder method) (transformer_lens.HookedTransformer.HookedTransformer method) to_dict() (transformer_lens.HookedTransformerConfig.HookedTransformerConfig method) to_numpy() (in module transformer_lens.utils) to_single_str_token() (transformer_lens.HookedTransformer.HookedTransformer method) to_single_token() (transformer_lens.HookedTransformer.HookedTransformer method) to_str_tokens() (transformer_lens.HookedTransformer.HookedTransformer method) to_string() (transformer_lens.HookedTransformer.HookedTransformer method) to_tokens() (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder method) (transformer_lens.HookedTransformer.HookedTransformer method) toggle_autodiff() (transformer_lens.ActivationCache.ActivationCache method) tokenize_and_concatenate() (in module transformer_lens.utils) tokenizer_name (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) tokenizer_prepends_bos (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) tokens_to_residual_directions() (transformer_lens.HookedTransformer.HookedTransformer method) TokenTypeEmbed (class in transformer_lens.components.token_typed_embed) train() (in module transformer_lens.train) transformer_lens.ActivationCache module transformer_lens.components.abstract_attention module transformer_lens.components.attention module transformer_lens.components.bert_block module transformer_lens.components.bert_embed module transformer_lens.components.bert_mlm_head module transformer_lens.components.embed module transformer_lens.components.grouped_query_attention module transformer_lens.components.layer_norm module transformer_lens.components.layer_norm_pre module transformer_lens.components.pos_embed module transformer_lens.components.rms_norm module transformer_lens.components.rms_norm_pre module transformer_lens.components.t5_attention module transformer_lens.components.t5_block module transformer_lens.components.token_typed_embed module transformer_lens.components.transformer_block module transformer_lens.components.unembed module transformer_lens.evals module transformer_lens.FactoredMatrix module transformer_lens.head_detector module transformer_lens.hook_points module transformer_lens.HookedEncoder module transformer_lens.HookedEncoderDecoder module transformer_lens.HookedTransformer module transformer_lens.HookedTransformerConfig module transformer_lens.loading_from_pretrained module transformer_lens.past_key_value_caching module transformer_lens.patching module transformer_lens.pretrained.weight_conversions.bert module transformer_lens.pretrained.weight_conversions.bloom module transformer_lens.pretrained.weight_conversions.coder module transformer_lens.pretrained.weight_conversions.gemma module transformer_lens.pretrained.weight_conversions.gpt2 module transformer_lens.pretrained.weight_conversions.gptj module transformer_lens.pretrained.weight_conversions.llama module transformer_lens.pretrained.weight_conversions.mingpt module transformer_lens.pretrained.weight_conversions.mistral module transformer_lens.pretrained.weight_conversions.mixtral module transformer_lens.pretrained.weight_conversions.nanogpt module transformer_lens.pretrained.weight_conversions.neel_solu_old module transformer_lens.pretrained.weight_conversions.neo module transformer_lens.pretrained.weight_conversions.neox module transformer_lens.pretrained.weight_conversions.opt module transformer_lens.pretrained.weight_conversions.phi module transformer_lens.pretrained.weight_conversions.phi3 module transformer_lens.pretrained.weight_conversions.qwen module transformer_lens.pretrained.weight_conversions.qwen2 module transformer_lens.pretrained.weight_conversions.t5 module transformer_lens.SVDInterpreter module transformer_lens.train module transformer_lens.utilities.activation_functions module transformer_lens.utilities.addmm module transformer_lens.utilities.attention module transformer_lens.utilities.devices module transformer_lens.utils module TransformerBlock (class in transformer_lens.components.transformer_block) transpose() (in module transformer_lens.utils) trust_remote_code (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) U U (transformer_lens.FactoredMatrix.FactoredMatrix property) Unembed (class in transformer_lens.components.unembed) unfreeze() (transformer_lens.past_key_value_caching.HookedTransformerKeyValueCache method) ungroup_grouped_query_attention (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) unsqueeze() (transformer_lens.FactoredMatrix.FactoredMatrix method) unwrap() (transformer_lens.HookedTransformerConfig.HookedTransformerConfig class method) (transformer_lens.utils.Slice class method) use_attn_in (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) use_attn_result (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) use_attn_scale (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) use_hook_mlp_in (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) use_hook_tokens (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) use_local_attn (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) use_normalization_before_and_after (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) use_NTK_by_parts_rope (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) use_split_qkv_input (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute) V values() (transformer_lens.ActivationCache.ActivationCache method) vanilla_addmm() (in module transformer_lens.utilities.addmm) Vh (transformer_lens.FactoredMatrix.FactoredMatrix property) W W_E (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) W_E_pos (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedTransformer.HookedTransformer property) W_gate (transformer_lens.HookedTransformer.HookedTransformer property) W_in (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) W_K (transformer_lens.components.grouped_query_attention.GroupedQueryAttention property) (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) W_O (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) W_out (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) W_pos (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) W_Q (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) W_U (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) W_V (transformer_lens.components.grouped_query_attention.GroupedQueryAttention property) (transformer_lens.HookedEncoder.HookedEncoder property) (transformer_lens.HookedEncoderDecoder.HookedEncoderDecoder property) (transformer_lens.HookedTransformer.HookedTransformer property) wandb (transformer_lens.train.HookedTransformerTrainConfig attribute) wandb_project_name (transformer_lens.train.HookedTransformerTrainConfig attribute) warmup_steps (transformer_lens.train.HookedTransformerTrainConfig attribute) weight_decay (transformer_lens.train.HookedTransformerTrainConfig attribute) window_size (transformer_lens.HookedTransformerConfig.HookedTransformerConfig attribute)