How do I reconstruct a audio signal using phase?

31 Views Asked by At

I got a neural network that separates noise from people voices, at least tries to, and I want to test it on a file, I used the code below but I got back a buzzing sound, not relevant at all. I think the problem is I am using only the magnitude to reconstruct the signal, but I need to use the phase, as it is in my requirements.

How do i do that?

import os
import soundfile as sf
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# Define the model architecture
class AudioUNet(nn.Module):
    ...
# Create an instance of the model
model = AudioUNet(input_channels=1, start_neurons=16)

# Load the trained model weights
checkpoint_path = 'models/checkpoint_epoch.pth'
checkpoint = torch.load(checkpoint_path)
model.load_state_dict(checkpoint['model_state_dict'])
model.eval()  # Set the model to evaluation mode

# Load the noisy audio file
noisy_audio_file = 'F:\IRRI\combinate\mixed_audio_90.wav'
noisy_waveform, sample_rate = sf.read(noisy_audio_file, always_2d=True)
noisy_waveform = torch.from_numpy(noisy_waveform).to(torch.float32)

# Apply STFT to the noisy audio file
noisy_specgram = torch.stft(
    noisy_waveform.squeeze(),
    n_fft=1024,
    hop_length=512,
    win_length=1024,
    window=torch.hann_window(1024),
    return_complex=True,  # Ensure complex output for istft
)
noisy_magnitude = torch.sqrt(noisy_specgram.real ** 2 + noisy_specgram.imag ** 2)

# Forward pass through the model
with torch.no_grad():
    output_magnitude = model(noisy_magnitude.unsqueeze(0))

# Convert the output back to numpy array
output_magnitude = output_magnitude.squeeze().numpy()

# Convert the output_magnitude to a PyTorch tensor
output_magnitude = torch.from_numpy(output_magnitude)

# Reconstruct the denoised audio waveform
output_specgram = output_magnitude * torch.exp(1j * torch.angle(noisy_specgram))
denoised_waveform = torch.istft(output_specgram, n_fft=1024, hop_length=512, win_length=1024,
                                window=torch.hann_window(1024), length=len(noisy_waveform))

# Convert the denoised waveform to a numpy array
denoised_waveform = denoised_waveform.numpy()

# Save the denoised audio as a new WAV file
denoised_audio_file = 'denoised.wav'
sf.write(denoised_audio_file, denoised_waveform, sample_rate)

print(f"Denoised audio saved at: {denoised_audio_file}")

0

There are 0 best solutions below