I have refactored the code, but it converges much more slowly than the original code. I don't know what details I might have missed.
1.githut original code page:https://github.com/gist-ailab/IITNet-official 2.my code:
def setProjectPath(projectPath):
import os
import sys
root=os.path.abspath(projectPath)
print(root)
sys.path.append(root)
os.environ["TORCH_HOME"]=r"E:\Data\torch-model"
os.environ["KMP_DUPLICATE_LIB_OK"]='TRUE'
import warnings
warnings.filterwarnings("ignore", category=RuntimeWarning, module="mne")
projectPath=r'/mount/mount_project/test'
setProjectPath(projectPath)
iitnet_config_10={
"max_epochs": 500,
"dataset": "Sleep-EDF",
"signal_type": "Fpz-Cz",
"sampling_rate": 100,
"seq_len": 10,
"target_idx": -1,
"n_splits": 20,
"hidden_dim": 128,
"batch_size": 256,
"patience": 10,
"num_layers": 50,
"dropout_rate": 0.5,
"num_classes": 5,
"early_stopping_mode": "min",
"bidirectional": True,
"learning_rate": 0.005,
"weight_decay": 0.000001
}
iitnet_config_01={
"max_epochs": 500,
"dataset": "Sleep-EDF",
"signal_type": "Fpz-Cz",
"sampling_rate": 100,
"seq_len": 1,
"target_idx": -1,
"n_splits": 20,
"hidden_dim": 128,
"batch_size": 256,
"patience": 10,
"num_layers": 50,#resnet_18,34,50,101,152
"dropout_rate": 0.5,
"num_classes": 5,
"early_stopping_mode": "min",
"bidirectional": True,
"learning_rate": 0.005,
"weight_decay": 0.000001
}
from tqdm import tqdm
from project.dataset import loader
from torch.utils.data import *
train_dataset=loader.EEGDataLoader(iitnet_config_01,1)
train_dataloader=DataLoader(dataset=train_dataset,batch_size=128)
val_dataset=loader.EEGDataLoader(iitnet_config_01,1,mode='val')
val_dataloader=DataLoader(dataset=val_dataset,batch_size=128)
import project.model.iitnet.models.main_models as iitnet
import torch
model=iitnet.MainModel(config=iitnet_config_01)
model = torch.nn.DataParallel(model, device_ids=list(range(len('0'.split(",")))))
optimizer = torch.optim.Adam(model.parameters(),lr=iitnet_config_01['learning_rate'],weight_decay=iitnet_config_01['weight_decay'])
loss_fn=torch.nn.CrossEntropyLoss()
device=torch.device('cuda')
model.to(device)
for j in range(100):
model.train()
with tqdm(enumerate(train_dataloader),total=len(train_dataloader)) as t:
correct, total, train_loss = 0, 0, 0
for i,batch_data in t:
x,y=batch_data
x=x.to(device)
y=y.to(device)
total += y.size(0)
y_hat=model(x)
loss=loss_fn(y_hat,y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
train_loss += loss.item()
predicted=torch.argmax(y_hat,1)
correct+=predicted.eq(y).sum().item()
t.set_description_str(f'第{j}epoch')
t.set_postfix_str('Loss: %.3f | TRAIN_Acc: %.3f%% (%d/%d)'
% (train_loss / (i + 1), 100. * correct / total, correct, total))
model.eval()
with tqdm(enumerate(val_dataloader),total=len(val_dataloader)) as t:
correct, total, val_loss = 0, 0, 0
for i,batch_data in t:
x,y=batch_data
x=x.to(device)
y=y.to(device)
total += y.size(0)
y_hat=model(x)
loss=loss_fn(y_hat,y)
val_loss += loss.item()
predicted=torch.argmax(y_hat,1)
correct+=predicted.eq(y).sum().item()
t.set_description_str(f'第{j}epoch')
t.set_postfix_str('Loss: %.3f | EVAL_Acc: %.3f%% (%d/%d)'
% (train_loss / (i + 1), 100. * correct / total, correct, total))
I want to know what detail I miss that make my code converges much more slowly than the original code