{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "db17f2cd", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/rameyjm7/workspace/TML/lpu/llm-preference-unlearning/lpu-env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", " from .autonotebook import tqdm as notebook_tqdm\n", "`torch_dtype` is deprecated! Use `dtype` instead!\n", "Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00, 1.08it/s]\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "[INFO] Loaded Qwen/Qwen2.5-3B-Instruct on cuda with 36 transformer layers.\n", "[INFO] Saved activations for prompt 1: 36 layers × 2 versions (full & pooled)\n", "[INFO] Saved activations for prompt 2: 36 layers × 2 versions (full & pooled)\n", "[INFO] Saved activations for prompt 3: 36 layers × 2 versions (full & pooled)\n", "[INFO] Saved activations for prompt 4: 36 layers × 2 versions (full & pooled)\n", "[INFO] Saved activations for prompt 5: 36 layers × 2 versions (full & pooled)\n", "[INFO] Activation extraction complete → activations/\n" ] }, { "ename": "", "evalue": "", "output_type": "error", "traceback": [ "\u001b[1;31mThe Kernel crashed while executing code in the current cell or a previous cell. \n", "\u001b[1;31mPlease review the code in the cell(s) to identify a possible cause of the failure. \n", "\u001b[1;31mClick here for more info. \n", "\u001b[1;31mView Jupyter log for further details." ] } ], "source": [ "#!/usr/bin/env python3\n", "\"\"\"\n", "activation_probe_detailed.py — Phase 3.1–3.2 (Final)\n", "Captures both full token-wise and mean-pooled activations\n", "from all transformer layers of Qwen2.5-3B-Instruct.\n", "\n", "Output structure:\n", "activations/\n", " ├─ prompt01/\n", " │ ├─ layer00_full.npy\n", " │ ├─ layer00_pooled.npy\n", " │ ├─ ...\n", " │ └─ layer35_pooled.npy\n", " ├─ prompt02/\n", " │ └─ ...\n", "\"\"\"\n", "import os\n", "import json\n", "import torch\n", "import numpy as np\n", "from datetime import datetime\n", "from transformers import AutoTokenizer, AutoModelForCausalLM\n", "\n", "\n", "# ---------------------------------------------------------------------\n", "# 1. Model Loading\n", "# ---------------------------------------------------------------------\n", "def load_model(model_name=\"Qwen/Qwen2.5-3B-Instruct\"):\n", " device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n", " tokenizer = AutoTokenizer.from_pretrained(model_name)\n", " model = AutoModelForCausalLM.from_pretrained(\n", " model_name,\n", " torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,\n", " device_map=\"auto\"\n", " )\n", " model.eval()\n", " n_layers = len(model.model.layers)\n", " print(f\"[INFO] Loaded {model_name} on {device} with {n_layers} transformer layers.\")\n", " return model, tokenizer, device, n_layers\n", "\n", "\n", "# ---------------------------------------------------------------------\n", "# 2. Hook registration (safe)\n", "# ---------------------------------------------------------------------\n", "def register_hooks(model, store):\n", " \"\"\"Attach forward hooks that safely copy activations to CPU.\"\"\"\n", " handles = []\n", " for idx, layer in enumerate(model.model.layers):\n", " def hook_fn(module, inp, out, layer_idx=idx):\n", " store[layer_idx] = out[0].detach().cpu()\n", " handles.append(layer.register_forward_hook(hook_fn))\n", " return handles\n", "\n", "\n", "# ---------------------------------------------------------------------\n", "# 3. Activation Capture\n", "# ---------------------------------------------------------------------\n", "def capture_activations(model, tokenizer, device, prompts, save_dir=\"activations\"):\n", " os.makedirs(save_dir, exist_ok=True)\n", " store = {}\n", " hooks = register_hooks(model, store)\n", "\n", " with torch.no_grad():\n", " for i, prompt in enumerate(prompts, start=1):\n", " store.clear()\n", " inputs = tokenizer(prompt, return_tensors=\"pt\").to(device)\n", " _ = model(**inputs)\n", "\n", " prompt_dir = os.path.join(save_dir, f\"prompt{i:02d}\")\n", " os.makedirs(prompt_dir, exist_ok=True)\n", "\n", " for layer_idx, tensor in store.items():\n", " # Save full token activations: (seq_len, hidden_dim)\n", " full = tensor.squeeze(0).cpu().numpy()\n", " np.save(f\"{prompt_dir}/layer{layer_idx:02d}_full.npy\", full)\n", "\n", " # Save mean-pooled activations: (hidden_dim,)\n", " pooled = full.mean(axis=0)\n", " np.save(f\"{prompt_dir}/layer{layer_idx:02d}_pooled.npy\", pooled)\n", "\n", " print(f\"[INFO] Saved activations for prompt {i}: \"\n", " f\"{len(store)} layers × 2 versions (full & pooled)\")\n", "\n", " # Remove hooks after all prompts processed\n", " for h in hooks:\n", " h.remove()\n", "\n", " print(f\"[INFO] Activation extraction complete → {save_dir}/\")\n", "\n", "\n", "# ---------------------------------------------------------------------\n", "# 4. Main Entry\n", "# ---------------------------------------------------------------------\n", "def main():\n", " # Load latest recommender JSON log\n", " log_dir = \"logs\"\n", " log_files = sorted([\n", " f for f in os.listdir(log_dir)\n", " if f.startswith(\"recommender_\") and f.endswith(\".json\")\n", " ])\n", " if not log_files:\n", " raise FileNotFoundError(\"No recommender_*.json log found.\")\n", " latest_log = os.path.join(log_dir, log_files[-1])\n", "\n", " with open(latest_log, \"r\", encoding=\"utf-8\") as f:\n", " data = json.load(f)\n", " prompts = [r[\"question\"] for r in data[\"records\"]]\n", "\n", " model, tokenizer, device, n_layers = load_model()\n", " capture_activations(model, tokenizer, device, prompts)\n", "\n", "\n", "if __name__ == \"__main__\":\n", " main()\n" ] } ], "metadata": { "kernelspec": { "display_name": "lpu-env", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.18" } }, "nbformat": 4, "nbformat_minor": 5 }