Im working on multi GPU server and i want to use one GPU for the training setting GPU for the train
device = torch.device("cuda:2")
torch.cuda.set_device(device)
device_map={"": torch.cuda.current_device()}
model = AutoModelForCausalLM.from_pretrained(
model_path,
quantization_config=bnb_config,
device_map=device_map
)
model.config.use_cache = False
model.config.pretraining_tp = 1
but as soon as i run the TrainingArguments() part the torch.cuda.current_device() has changed to 0
training_arguments = TrainingArguments(
output_dir=output_dir,
num_train_epochs=num_train_epochs,
per_device_train_batch_size=per_device_train_batch_size,
gradient_accumulation_steps=gradient_accumulation_steps,
optim=optim,
save_steps=save_steps,
logging_steps=logging_steps,
learning_rate=learning_rate,
weight_decay=weight_decay,
fp16=fp16,
bf16=bf16,
max_grad_norm=max_grad_norm,
max_steps=max_steps,
warmup_ratio=warmup_ratio,
group_by_length=group_by_length,
lr_scheduler_type=lr_scheduler_type,
report_to=["tensorboard"]
)
how can i maintain GPU number