Skip to content

Commit 5a7dfb4

Browse files
committed
UNDIAL
1 parent bed6430 commit 5a7dfb4

File tree

4 files changed

+163
-0
lines changed

4 files changed

+163
-0
lines changed

community/methods/UNDIAL/README.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# UNDIAL: Self-Distillation with Adjusted Logits for Robust Unlearning in Large Language Models (NAACL 2025)
2+
3+
- Authors: Yijiang River Dong, Hongzhou Lin, Mikhail Belkin, Ramón Huerta, Ivan Vulić
4+
- Link​: https://arxiv.org/pdf/2402.10052
5+
6+
# Setup
7+
- Hyperparameters: The original paper uses Llama-2 7B with LoRA to tune the model (rank=8, alpha=16) and learning rate of 1e-4. It's suggested to search the learning rate over [1e-5, 3e-4, 1e-4], epoch over [3,10], and use an effective batch size of 16 (batch_size * gradient_accumulation). The other important hyperparemeter is beta, the strength of penalty, which typically takes a number between [3,10,30]. If we change to other models, adjusting learning rate accordingly.
8+
9+
- Computation Setup: All experiments are run on one A100.
10+
- Other Details: The original paper does not use the retain set and aims to retain knowledge in all domains, not just on the retain set. So alpha is set to 0. Practionioners could search over the gemma to better retain the performance on the retain set.
11+
12+
# Results
13+
Run `run.sh` script. The results can be found in `leaderboard.md`.
14+
15+
# Citation
16+
@misc{dong2024undial,
17+
title={UNDIAL: Self-Distillation with Adjusted Logits for Robust Unlearning in Large Language Models},
18+
author={Yijiang River Dong and Hongzhou Lin and Mikhail Belkin and Ramon Huerta and Ivan Vulić},
19+
year={2024},
20+
eprint={2402.10052},
21+
archivePrefix={arXiv},
22+
primaryClass={cs.CL},
23+
url={https://arxiv.org/abs/2402.10052},
24+
}

community/methods/UNDIAL/run.sh

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
#!/bin/bash
2+
3+
4+
models=(
5+
"Llama-3.2-1B-Instruct"
6+
)
7+
trainers_experiments=(
8+
"UNDIAL unlearn/tofu/default.yaml"
9+
)
10+
forget_retain_splits=(
11+
"forget05 retain95"
12+
"forget10 retain90"
13+
"forget01 retain99"
14+
)
15+
16+
per_device_train_batch_size=4 # on two gpus would make effective batch size 32
17+
gradient_accumulation_steps=4
18+
19+
20+
########################################################################################################################
21+
########################################### Unlearn TOFU models ########################################################
22+
########################################################################################################################
23+
24+
25+
for split in "${forget_retain_splits[@]}"; do
26+
forget_split=$(echo $split | cut -d' ' -f1)
27+
retain_split=$(echo $split | cut -d' ' -f2)
28+
for model in "${models[@]}"; do
29+
for trainer_experiment in "${trainers_experiments[@]}"; do
30+
trainer=$(echo $trainer_experiment | cut -d' ' -f1)
31+
experiment=$(echo $trainer_experiment | cut -d' ' -f2)
32+
33+
task_name=tofu_${model}_${forget_split}_${trainer}
34+
model_path=open-unlearning/tofu_${model}_full
35+
echo ${task_name}: Unlearning ${model_path} using ${trainer}
36+
37+
# Unlearn
38+
python src/train.py --config-name=unlearn.yaml \
39+
experiment=${experiment} \
40+
trainer=${trainer} \
41+
task_name=${task_name} \
42+
model=${model} \
43+
forget_split=${forget_split} \
44+
retain_split=${retain_split} \
45+
model.model_args.pretrained_model_name_or_path=${model_path} \
46+
retain_logs_path=saves/eval/tofu_${model}_${retain_split}/TOFU_EVAL.json \
47+
trainer.args.per_device_train_batch_size=$per_device_train_batch_size \
48+
trainer.args.gradient_accumulation_steps=$gradient_accumulation_steps \
49+
50+
# Eval
51+
CUDA_VISIBLE_DEVICES=0 python src/eval.py \
52+
experiment=eval/tofu/default.yaml \
53+
forget_split=${forget_split} \
54+
model=${model} \
55+
task_name=${task_name} \
56+
model.model_args.pretrained_model_name_or_path=saves/unlearn/${task_name} \
57+
paths.output_dir=saves/unlearn/${task_name}/evals \
58+
retain_logs_path=saves/eval/tofu_${model}_${retain_split}/TOFU_EVAL.json
59+
done
60+
done
61+
done
62+
63+
# #########################################################
64+
# #################### MUSE Unlearning ####################
65+
# #########################################################
66+
67+
68+
model=Llama-3.2-1B-Instruct
69+
70+
data_splits=(
71+
"News"
72+
"Books"
73+
)
74+
75+
trainers=(
76+
"UNDIAL"
77+
)
78+
79+
for data_split in "${data_splits[@]}"; do
80+
for trainer in "${trainers[@]}"; do
81+
82+
task_name=muse_${model}_${data_split}_${trainer}
83+
84+
python src/train.py --config-name=unlearn.yaml \
85+
experiment=unlearn/muse/default.yaml \
86+
model=${model} \
87+
data_split=${data_split} \
88+
trainer=${trainer} \
89+
task_name=${task_name} \
90+
retain_logs_path=saves/eval/muse_${model}_${data_split}_retrain/MUSE_EVAL.json \
91+
trainer.args.per_device_train_batch_size=${per_device_train_batch_size} \
92+
trainer.args.gradient_accumulation_steps=${gradient_accumulation_steps} \
93+
94+
CUDA_VISIBLE_DEVICES=0 python src/eval.py \
95+
experiment=eval/muse/default.yaml \
96+
data_split=${data_split} \
97+
task_name=${task_name} \
98+
model=${model} \
99+
model.model_args.pretrained_model_name_or_path=saves/unlearn/${task_name} \
100+
paths.output_dir=saves/unlearn/${trainer}/evals \
101+
retain_logs_path=saves/eval/muse_${model}_${data_split}_retrain/MUSE_EVAL.json
102+
done
103+
done

configs/trainer/UNDIAL.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
handler: UNDIAL # corresponds to the class defined in src/trainer/unlearn/grad_diff.py
2+
args: # HuggingFace TrainingArguments
3+
per_device_train_batch_size: 2
4+
per_device_eval_batch_size: 16
5+
gradient_accumulation_steps: 8
6+
learning_rate: 1e-4
7+
num_train_epochs: 10
8+
method_args: # Your own method-specific arguments
9+
gamma: 1.0
10+
alpha: 0.0
11+
beta: 10.0 # the strength of penalty for memorized tokens
12+
retain_loss_type: NLL

src/trainer/unlearn/undial.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
from trainer.utils import compute_undial_loss
2+
from trainer.unlearn.grad_diff import GradDiff
3+
4+
class UNDIAL(GradDiff):
5+
def __init__(self, beta=1.0, *args, **kwargs):
6+
super().__init__(*args, **kwargs)
7+
self.beta = beta
8+
if self.ref_model is None:
9+
self.ref_model = self._prepare_ref_model(self.model)
10+
11+
def compute_loss(self, model, inputs, return_outputs=False):
12+
forget_inputs = inputs["forget"]
13+
forget_loss, forget_outputs = compute_undial_loss(model, self.ref_model, forget_inputs, self.beta)
14+
15+
retain_inputs = inputs["retain"]
16+
retain_inputs = {
17+
"input_ids": retain_inputs["input_ids"],
18+
"attention_mask": retain_inputs["attention_mask"],
19+
"labels": retain_inputs["labels"],
20+
}
21+
retain_loss = self.compute_retain_loss(model=model, retain_inputs=retain_inputs)
22+
23+
loss = self.gamma * forget_loss + self.alpha * retain_loss
24+
return (loss, forget_outputs) if return_outputs else loss

0 commit comments

Comments
 (0)