{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "db17f2cd",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/rameyjm7/workspace/TML/lpu/llm-preference-unlearning/lpu-env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n",
"`torch_dtype` is deprecated! Use `dtype` instead!\n",
"Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00, 1.08it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[INFO] Loaded Qwen/Qwen2.5-3B-Instruct on cuda with 36 transformer layers.\n",
"[INFO] Saved activations for prompt 1: 36 layers × 2 versions (full & pooled)\n",
"[INFO] Saved activations for prompt 2: 36 layers × 2 versions (full & pooled)\n",
"[INFO] Saved activations for prompt 3: 36 layers × 2 versions (full & pooled)\n",
"[INFO] Saved activations for prompt 4: 36 layers × 2 versions (full & pooled)\n",
"[INFO] Saved activations for prompt 5: 36 layers × 2 versions (full & pooled)\n",
"[INFO] Activation extraction complete → activations/\n"
]
},
{
"ename": "",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n",
"\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n",
"\u001b[1;31mClick here for more info. \n",
"\u001b[1;31mView Jupyter log for further details."
]
}
],
"source": [
"#!/usr/bin/env python3\n",
"\"\"\"\n",
"activation_probe_detailed.py — Phase 3.1–3.2 (Final)\n",
"Captures both full token-wise and mean-pooled activations\n",
"from all transformer layers of Qwen2.5-3B-Instruct.\n",
"\n",
"Output structure:\n",
"activations/\n",
" ├─ prompt01/\n",
" │ ├─ layer00_full.npy\n",
" │ ├─ layer00_pooled.npy\n",
" │ ├─ ...\n",
" │ └─ layer35_pooled.npy\n",
" ├─ prompt02/\n",
" │ └─ ...\n",
"\"\"\"\n",
"import os\n",
"import json\n",
"import torch\n",
"import numpy as np\n",
"from datetime import datetime\n",
"from transformers import AutoTokenizer, AutoModelForCausalLM\n",
"\n",
"\n",
"# ---------------------------------------------------------------------\n",
"# 1. Model Loading\n",
"# ---------------------------------------------------------------------\n",
"def load_model(model_name=\"Qwen/Qwen2.5-3B-Instruct\"):\n",
" device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
" tokenizer = AutoTokenizer.from_pretrained(model_name)\n",
" model = AutoModelForCausalLM.from_pretrained(\n",
" model_name,\n",
" torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,\n",
" device_map=\"auto\"\n",
" )\n",
" model.eval()\n",
" n_layers = len(model.model.layers)\n",
" print(f\"[INFO] Loaded {model_name} on {device} with {n_layers} transformer layers.\")\n",
" return model, tokenizer, device, n_layers\n",
"\n",
"\n",
"# ---------------------------------------------------------------------\n",
"# 2. Hook registration (safe)\n",
"# ---------------------------------------------------------------------\n",
"def register_hooks(model, store):\n",
" \"\"\"Attach forward hooks that safely copy activations to CPU.\"\"\"\n",
" handles = []\n",
" for idx, layer in enumerate(model.model.layers):\n",
" def hook_fn(module, inp, out, layer_idx=idx):\n",
" store[layer_idx] = out[0].detach().cpu()\n",
" handles.append(layer.register_forward_hook(hook_fn))\n",
" return handles\n",
"\n",
"\n",
"# ---------------------------------------------------------------------\n",
"# 3. Activation Capture\n",
"# ---------------------------------------------------------------------\n",
"def capture_activations(model, tokenizer, device, prompts, save_dir=\"activations\"):\n",
" os.makedirs(save_dir, exist_ok=True)\n",
" store = {}\n",
" hooks = register_hooks(model, store)\n",
"\n",
" with torch.no_grad():\n",
" for i, prompt in enumerate(prompts, start=1):\n",
" store.clear()\n",
" inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n",
" _ = model(**inputs)\n",
"\n",
" prompt_dir = os.path.join(save_dir, f\"prompt{i:02d}\")\n",
" os.makedirs(prompt_dir, exist_ok=True)\n",
"\n",
" for layer_idx, tensor in store.items():\n",
" # Save full token activations: (seq_len, hidden_dim)\n",
" full = tensor.squeeze(0).cpu().numpy()\n",
" np.save(f\"{prompt_dir}/layer{layer_idx:02d}_full.npy\", full)\n",
"\n",
" # Save mean-pooled activations: (hidden_dim,)\n",
" pooled = full.mean(axis=0)\n",
" np.save(f\"{prompt_dir}/layer{layer_idx:02d}_pooled.npy\", pooled)\n",
"\n",
" print(f\"[INFO] Saved activations for prompt {i}: \"\n",
" f\"{len(store)} layers × 2 versions (full & pooled)\")\n",
"\n",
" # Remove hooks after all prompts processed\n",
" for h in hooks:\n",
" h.remove()\n",
"\n",
" print(f\"[INFO] Activation extraction complete → {save_dir}/\")\n",
"\n",
"\n",
"# ---------------------------------------------------------------------\n",
"# 4. Main Entry\n",
"# ---------------------------------------------------------------------\n",
"def main():\n",
" # Load latest recommender JSON log\n",
" log_dir = \"logs\"\n",
" log_files = sorted([\n",
" f for f in os.listdir(log_dir)\n",
" if f.startswith(\"recommender_\") and f.endswith(\".json\")\n",
" ])\n",
" if not log_files:\n",
" raise FileNotFoundError(\"No recommender_*.json log found.\")\n",
" latest_log = os.path.join(log_dir, log_files[-1])\n",
"\n",
" with open(latest_log, \"r\", encoding=\"utf-8\") as f:\n",
" data = json.load(f)\n",
" prompts = [r[\"question\"] for r in data[\"records\"]]\n",
"\n",
" model, tokenizer, device, n_layers = load_model()\n",
" capture_activations(model, tokenizer, device, prompts)\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" main()\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "lpu-env",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.18"
}
},
"nbformat": 4,
"nbformat_minor": 5
}