Upload 2 files

Browse files

Files changed (2) hide show

complete_example_use_selective_fine_tuning.py +68 -0
selective_fine_tuning.py +251 -0

complete_example_use_selective_fine_tuning.py ADDED Viewed

	@@ -0,0 +1,68 @@

+# This script exemplify a selective fine-tuning method based on the condition number using freely available data and LLM
+# Author: Oswaldo Ludwig (now with AI support)
+# Date: 03/07/2025
+# In case of publication using this script or ideas in this script, cite:
+# Ludwig, Oswaldo. "The Condition Number as a Scale-Invariant Proxy for Information Encoding in Neural Units." arXiv preprint arXiv:2506.16289 (2025).
+import torch
+import torch.nn as nn
+import torch.optim as optim
+from torch.utils.data import Dataset, DataLoader
+from transformers import AutoTokenizer, AutoModelForCausalLM
+from datasets import load_dataset
+from selective_fine_tuning import SelectiveFineTuningOptimizer
+# Dataset using AG News
+class RealTextDataset(Dataset):
+    def __init__(self, tokenizer, split='train', max_samples=200, seq_len=64):
+        dataset = load_dataset("ag_news", split=split)
+        self.samples = dataset.select(range(max_samples))
+        self.tokenizer = tokenizer
+        self.seq_len = seq_len
+    def __len__(self):
+        return len(self.samples)
+    def __getitem__(self, idx):
+        text = self.samples[idx]['text']
+        encoding = self.tokenizer(text, truncation=True, padding='max_length', max_length=self.seq_len, return_tensors='pt')
+        input_ids = encoding['input_ids'].squeeze(0)
+        return input_ids, input_ids.clone()
+# Training loop
+def train():
+    model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
+    tokenizer = AutoTokenizer.from_pretrained(model_name)
+    model = AutoModelForCausalLM.from_pretrained(model_name)
+    print("Tokenizer and LLM loaded", flush=True)
+    dataset = RealTextDataset(tokenizer=tokenizer, max_samples=200, seq_len=64)
+    dataloader = DataLoader(dataset, batch_size=4, shuffle=True)
+    print("Data loader and dataset loaded", flush=True)
+    criterion = nn.CrossEntropyLoss()
+    optimizer_wrapper = SelectiveFineTuningOptimizer(
+        model=model,
+        base_optimizer_cls=optim.AdamW,
+        optimizer_args={'lr': 5e-5},
+        condition_file='condition_numbers.json',
+        num_tensors_to_finetune=20,
+        recompute=True
+    )
+    print("Optimizer instantiated", flush=True)
+    model.train()
+    for epoch in range(3):
+        total_loss = 0
+        for inputs, targets in dataloader:
+            optimizer_wrapper.zero_grad()
+            outputs = model(inputs, labels=targets)
+            loss = outputs.loss
+            loss.backward()
+            optimizer_wrapper.step()
+            total_loss += loss.item()
+        print(f"Epoch {epoch+1}, Loss: {total_loss/len(dataloader):.4f}")
+if __name__ == '__main__':
+    train()

selective_fine_tuning.py ADDED Viewed

	@@ -0,0 +1,251 @@

+# The class in this script implements a selective fine-tuning method based on the condition number
+# Author: Oswaldo Ludwig (now with AI support)
+# Date: 03/07/2025
+# In case of publication using this script or ideas in this script, cite:
+# Ludwig, Oswaldo. "The Condition Number as a Scale-Invariant Proxy for Information Encoding in Neural Units." arXiv preprint arXiv:2506.16289 (2025).
+import torch
+import torch.nn as nn
+import torch.optim as optim
+import os
+import json
+import numpy as np
+import logging
+from typing import Type, Dict, Any, Set, List
+# Configure logging (ensure this is at the top level or configured once)
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+class SelectiveFineTuningOptimizer:
+    """
+    A custom optimizer wrapper that selectively fine-tunes a PyTorch model
+    based on the condition numbers of its parameters. Parameters with lower
+    condition numbers are prioritized for fine-tuning.
+    """
+    def __init__(self, model: nn.Module, base_optimizer_cls: Type[optim.Optimizer], optimizer_args: Dict[str, Any],
+                 condition_file: str = 'condition_numbers.json',
+                 num_tensors_to_finetune: int = 100,
+                 recompute: bool = False,
+                 max_dim_size_to_analyze: int = None): # New parameter for filtering
+        """
+        Initializes the SelectiveFineTuningOptimizer.
+        Args:
+            model (nn.Module): The PyTorch model to be fine-tuned.
+            base_optimizer_cls (Type[optim.Optimizer]): The class of the base optimizer (e.g., torch.optim.Adam).
+            optimizer_args (Dict[str, Any]): A dictionary of arguments to pass to the base optimizer constructor.
+            condition_file (str): Path to the JSON file for storing/loading condition numbers.
+            num_tensors_to_finetune (int): The number of top tensors (based on condition number) to fine-tune.
+            recompute (bool): If True, recompute condition numbers even if the file exists.
+            max_dim_size_to_analyze (int, optional): If provided, any parameter tensor with at least one dimension
+                                                    larger than this value will be skipped from analysis.
+                                                    Useful for ignoring very large embedding matrices etc.
+        """
+        self.model = model
+        self.condition_file = condition_file
+        self.num_tensors_to_finetune = num_tensors_to_finetune
+        self.recompute = recompute
+        self.max_dim_size_to_analyze = max_dim_size_to_analyze # Store the new parameter
+        self.condition_numbers: Dict[str, float] = {}
+        if not os.path.exists(condition_file) or recompute:
+            self.condition_numbers = self._analyze_model()
+            self._save_condition_numbers()
+        else:
+            self.condition_numbers = self._load_condition_numbers()
+        self.trainable_param_names: Set[str] = self._select_trainable_parameters()
+        self._unfreeze_selected_parameters()
+        # Initialize the base optimizer with selected parameters
+        params_to_optimize = [p for n, p in model.named_parameters() if n in self.trainable_param_names]
+        if not params_to_optimize:
+            logger.warning("No parameters selected for fine-tuning based on the criteria. Optimizer will have no parameters.")
+        self.optimizer = base_optimizer_cls(params_to_optimize, **optimizer_args)
+        logger.info(f"Optimizer initialized with {len(params_to_optimize)} trainable parameters.")
+    def _analyze_model(self) -> Dict[str, float]:
+        """
+        Analyzes the singular values of model parameters to compute their condition numbers.
+        Parameters with less than 2 dimensions or having any dimension
+        larger than `max_dim_size_to_analyze` are ignored.
+        SVD is performed on the GPU if the tensor is on CUDA, otherwise on CPU.
+        Returns:
+            Dict[str, float]: A dictionary mapping parameter names to their condition numbers.
+        """
+        condition_numbers = {}
+        logger.info("Analyzing the model tensors...")
+        initial_requires_grad_state = {}
+        for name, param in self.model.named_parameters():
+            initial_requires_grad_state[name] = param.requires_grad
+            param.requires_grad = False # Temporarily disable for analysis
+        analyzed_count = 0
+        skipped_ndim_count = 0
+        skipped_dim_size_count = 0 # New counter
+        skipped_svd_error_count = 0
+        total_params_in_model = 0
+        try:
+            for name, param in self.model.named_parameters():
+                total_params_in_model += 1
+                # Filter 1: Skip by number of dimensions
+                if param.ndim < 2:
+                    logger.debug(f"Skipping {name} due to less than 2 dimensions (ndim={param.ndim}).")
+                    skipped_ndim_count += 1
+                    continue
+                # Filter 2: Skip by any dimension size exceeding threshold
+                if self.max_dim_size_to_analyze is not None:
+                    if any(dim_size > self.max_dim_size_to_analyze for dim_size in param.shape):
+                        logger.debug(f"Skipping {name} due to a dimension larger than {self.max_dim_size_to_analyze} (shape={param.shape}).")
+                        skipped_dim_size_count += 1
+                        continue
+                try:
+                    data = param.detach() # Keep on GPU if already there
+                    if data.is_cuda:
+                        # Perform SVD on GPU
+                        u, s, v = torch.linalg.svd(data, full_matrices=False)
+                    else:
+                        # Fallback to CPU if not on CUDA
+                        u, s, v = torch.linalg.svd(data.cpu(), full_matrices=False)
+                    cond_number = (s[0] / s[-1]).item() if s[-1] > 0 else float('inf')
+                    condition_numbers[name] = cond_number
+                    analyzed_count += 1
+                    logger.debug(f"Analyzed {name}: condition_number={cond_number:.4f}")
+                except torch.linalg.LinAlgError as e:
+                    logger.warning(f"Skipping {name} due to SVD Linear Algebra error: {e}")
+                    skipped_svd_error_count += 1
+                except Exception as e:
+                    logger.error(f"Skipping {name} due to unexpected error during SVD: {e}")
+                    skipped_svd_error_count += 1
+        finally:
+            # Restore initial requires_grad state (though _unfreeze_selected_parameters will override this)
+            for name, param in self.model.named_parameters():
+                param.requires_grad = initial_requires_grad_state[name]
+        logger.info(f"Done analyzing model tensors. Total parameters in model: {total_params_in_model}")
+        logger.info(f"Parameters analyzed for condition numbers: {analyzed_count}")
+        logger.info(f"Skipped due to ndim < 2: {skipped_ndim_count}")
+        logger.info(f"Skipped due to dimension size > {self.max_dim_size_to_analyze}: {skipped_dim_size_count}") # New log
+        logger.info(f"Skipped due to SVD errors: {skipped_svd_error_count}")
+        return condition_numbers
+    def _save_condition_numbers(self):
+        """
+        Saves the computed condition numbers to a JSON file.
+        """
+        try:
+            with open(self.condition_file, 'w') as f:
+                json.dump(self.condition_numbers, f, indent=2)
+            logger.info(f"Condition numbers saved to {self.condition_file}")
+        except IOError as e:
+            logger.error(f"Failed to save condition numbers to {self.condition_file}: {e}")
+    def _load_condition_numbers(self) -> Dict[str, float]:
+        """
+        Loads condition numbers from a JSON file. If the file is corrupted,
+        it triggers a recomputation.
+        Returns:
+            Dict[str, float]: The loaded condition numbers.
+        """
+        try:
+            with open(self.condition_file, 'r') as f:
+                data = json.load(f)
+            logger.info(f"Condition numbers loaded from {self.condition_file}")
+            return data
+        except json.JSONDecodeError as e:
+            logger.warning(f"Condition file '{self.condition_file}' is corrupted or invalid. Error: {e}. Recomputing.")
+            if os.path.exists(self.condition_file):
+                try:
+                    os.remove(self.condition_file) # Remove corrupted file
+                    logger.info(f"Removed corrupted condition file: {self.condition_file}")
+                except OSError as err:
+                    logger.error(f"Error removing corrupted file {self.condition_file}: {err}")
+            return self._analyze_model() # Recompute if loading fails
+        except IOError as e:
+            logger.error(f"Failed to load condition numbers from {self.condition_file}: {e}. Recomputing.")
+            return self._analyze_model() # Recompute if file not found or other IO error
+    def _select_trainable_parameters(self) -> Set[str]:
+        """
+        Selects the top `num_tensors_to_finetune` parameters based on their condition numbers
+        (lower condition number is better).
+        Returns:
+            Set[str]: A set of names of the parameters chosen for fine-tuning.
+        """
+        if not self.condition_numbers:
+            logger.warning("No condition numbers available to select trainable parameters.")
+            return set()
+        sorted_params = sorted(self.condition_numbers.items(), key=lambda x: x[1])
+        selected = [name for name, _ in sorted_params[:self.num_tensors_to_finetune]]
+        logger.info(f"Selected {len(selected)} parameters for fine-tuning out of {len(self.condition_numbers)} analyzed.")
+        logger.debug(f"Selected parameters: {selected}")
+        return set(selected)
+    def _unfreeze_selected_parameters(self):
+        """
+        Sets `requires_grad=True` for the selected trainable parameters
+        and `requires_grad=False` for all other parameters in the model.
+        """
+        total_params = 0
+        frozen_params_count = 0
+        unfrozen_params_count = 0
+        for name, param in self.model.named_parameters():
+            total_params += 1
+            if name in self.trainable_param_names:
+                if not param.requires_grad: # Only change if it's different
+                    param.requires_grad = True
+                    unfrozen_params_count += 1
+                logger.debug(f"Parameter '{name}' set to requires_grad=True.")
+            else:
+                if param.requires_grad: # Only change if it's different
+                    param.requires_grad = False
+                    frozen_params_count += 1
+                logger.debug(f"Parameter '{name}' set to requires_grad=False.")
+        logger.info(f"Model parameters configured: {unfrozen_params_count} unfrozen, {frozen_params_count} frozen (out of {total_params} total).")
+    def step(self):
+        """
+        Performs a single optimization step (parameter update).
+        Delegates to the base optimizer's step method.
+        """
+        self.optimizer.step()
+    def zero_grad(self):
+        """
+        Clears the gradients of all optimized parameters.
+        Delegates to the base optimizer's zero_grad method.
+        """
+        self.optimizer.zero_grad()
+    def state_dict(self) -> Dict[str, Any]:
+        """
+        Returns a serializable dictionary containing the current state of the optimizer.
+        Delegates to the base optimizer's state_dict method.
+        """
+        return self.optimizer.state_dict()
+    def load_state_dict(self, state_dict: Dict[str, Any]):
+        """
+        Loads the optimizer's state from a state_dict.
+        Delegates to the base optimizer's load_state_dict method.
+        Args:
+            state_dict (Dict[str, Any]): A dictionary containing the optimizer's state.
+        """
+        self.optimizer.load_state_dict(state_dict)