INMP441 audio data not correctly saved by flask server, why?

19 Views Asked by At

So this is my Esp32 code I'm using the esp32 wroom 32 wifi and bluetooth module:

#include <WiFi.h>
#include <HTTPClient.h>
#include <driver/i2s.h>
#include <ArduinoJson.h>
#include <Base64.h>
// WiFi credentials
const char* ssid     = "SSID";
const char* password = "PASSWORD";

// Server details
const char* serverName = "FLASK URL/route";

// INMP441 I2S pin assignment
#define I2S_WS 25
#define I2S_SD 33
#define I2S_SCK 32

// Button and LED pin assignment
#define BUTTON_PIN 12
#define LED_PIN 13

unsigned long ledOnTime = 0;
bool recording = false;

void setup() {
  Serial.begin(115200);

  // Connect to WiFi
  WiFi.begin(ssid, password);
  while (WiFi.status() != WL_CONNECTED) {
    delay(1000);
    Serial.println("Connecting to WiFi...");
  }
  Serial.println("Connected to WiFi");
 if(WiFi.status() == WL_CONNECTED) {
    HTTPClient http;
    http.begin(serverName);
    int httpResponseCode = http.GET();
    Serial.println(httpResponseCode);
    if(httpResponseCode!=405){
      while(httpResponseCode!=405){
         int httpResponseCode = http.GET();
            Serial.println(httpResponseCode);
            if(httpResponseCode==405){
              break;
            }
      };
      http.end();
    };
    

  }
  i2s_config_t i2s_config = {
    .mode = (i2s_mode_t)(I2S_MODE_MASTER | I2S_MODE_RX | I2S_MODE_PDM),
    .sample_rate =  44100,
    .bits_per_sample = I2S_BITS_PER_SAMPLE_16BIT,
    .channel_format = I2S_CHANNEL_FMT_ALL_LEFT,
    .communication_format = I2S_COMM_FORMAT_I2S,
    .intr_alloc_flags = ESP_INTR_FLAG_LEVEL1,
    .dma_buf_count = 2,
    .dma_buf_len = 1024
  };

  i2s_pin_config_t pin_config;
  pin_config.bck_io_num = I2S_SCK;
  pin_config.ws_io_num = I2S_WS;
  pin_config.data_out_num = -1;
  pin_config.data_in_num = I2S_SD;

  i2s_driver_install(I2S_NUM_0, &i2s_config, 0, NULL);
  i2s_set_pin(I2S_NUM_0, &pin_config);
  i2s_set_clk(I2S_NUM_0, 44100, I2S_BITS_PER_SAMPLE_16BIT, I2S_CHANNEL_MONO);

  // Set the button as input
  pinMode(BUTTON_PIN, INPUT_PULLUP);

  // Set the LED as output
  pinMode(LED_PIN, OUTPUT);
}

void loop() {
  // Check if the button is pressed
   if (digitalRead(BUTTON_PIN) == 0 && !recording) {
    digitalWrite(LED_PIN, HIGH); // Turn on the LED
    ledOnTime = millis();
    recording = true;
  }
  if (recording) {
     // Turn on the LED

    uint16_t i2s_read_buff[1024];
    size_t bytes_read;

    // Record for 10 seconds or until the button is released
     for(int i = 0; i < 10 * 44100 / sizeof(i2s_read_buff); i++) {
    i2s_read(I2S_NUM_0, i2s_read_buff, sizeof(i2s_read_buff), &bytes_read, portMAX_DELAY);

    // Send audio data to Flask app
    if(WiFi.status()== WL_CONNECTED){
      HTTPClient http;
      http.begin(serverName);
      http.addHeader("Content-Type", "application/json");

      // Base64 encode the audio data
      String base64Audio = base64::encode((uint8_t*)i2s_read_buff, bytes_read);

      // Create a JSON object with the audio data
      StaticJsonDocument<200> doc;
      doc["audio"] = base64Audio;
      String json;
      serializeJson(doc, json);

      // Send the JSON object in the POST request
      int httpResponseCode = http.POST(json);
      http.end();
    }
    else{
      Serial.println("WiFi Disconnected");
    }
  }
  if (millis() - ledOnTime >= 10000) {
      digitalWrite(LED_PIN, LOW); // Turn off the LED
       if(WiFi.status() == WL_CONNECTED){
  HTTPClient http;
  http.begin( "flask URL /save");
  int httpResponseCode = http.POST("");
  http.end();
}
      recording = false;
    }
   

  }
 
}

There is a simple btn connected to the esp32, when the btn is pushed an led turns on signaling that the flask server is starting to receive audio data from the inmp441 mic. I did enter my SSID and PASSWORD as well as the correct server URL. I'm also sending a get request just to set initial flask connection as my esp32 takes like 10 minutes to establish an initial connection to the flask server

currently this is my server code:

from flask import Flask, request
from pydub import AudioSegment
import io
import base64

app = Flask(__name__)

# List to store all incoming audio segments
audio_segments = []

@app.route('/endpoint', methods=['POST'])
def handle_audio():
    data = request.get_json()
    base64_audio = data['audio']
    audio_data = base64.b64decode(base64_audio)

    # Create an AudioSegment from the raw data
    incoming_audio = AudioSegment.from_raw(io.BytesIO(audio_data), sample_width=2, frame_rate=44100, channels=1)

    # Add the incoming audio to the list of segments
    audio_segments.append(incoming_audio)

    return 'Audio data received!'

@app.route('/save', methods=['POST'])
def save_audio():
    # Concatenate all audio segments
    combined_audio = sum(audio_segments)

    # Export as FLAC
    combined_audio.export("PATH/Desktop/output.flac", format="flac")

    return 'Audio data saved as FLAC file!'

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)

yes I know its not the best way to save an audio file with another post request but its temporary. My problem is that the audio data I essentially save is simply noise and that even though the recording is 10 seconds only 4 seconds is saved in the file. The audio data is received as separate chunks. After all the chunks have been received it saves the file

What I want to do is simply record voice audio for 10 seconds and save it as an audio file through a flask server. Can anyone please help... Thanks

0

There are 0 best solutions below