Speech-to-text api polling timeout with LRO files on google cloud storage

20 Views Asked by At

I am trying to transcribe audio files (.wav) with an average duration of 20/30 minutes but google bees after exactly 4 minutes and 30 seconds delete the task. I have looked in their documentation and a reference is made to a polling algorithm with a duration of 24 hours, moreover when the error is received, this github link is given to better understand the problem : https://github.com/googleapis/google-cloud-java?tab=readme-ov-file#lro-timeouts

I have overwritten the polling algorithm but it seems that it is not applied correctly. The file is successfully loaded into the bucket and the URI is also successfully obtained.

import com.google.api.gax.core.CredentialsProvider;
import com.google.api.gax.core.FixedCredentialsProvider;
import com.google.api.gax.longrunning.OperationFuture;
import com.google.api.gax.longrunning.OperationTimedPollAlgorithm;
import com.google.api.gax.retrying.RetrySettings;
import com.google.api.gax.retrying.TimedRetryAlgorithm;
import com.google.auth.oauth2.GoogleCredentials;
import com.google.cloud.speech.v1p1beta1.*;

import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.BlobInfo;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.StorageOptions;
import org.threeten.bp.Duration;


import java.io.*;


import java.nio.file.Paths;

import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;


public class Main {

    private static String textFileName = "";
    private static String textFilePath = "";
    private static final String AUDIO_ENCODING_LINEAR16 = "LINEAR16";
    private static RecognitionConfig config;
    private static String comune = "porco";
    private static String linguaTraduzione = "it-IT";

    public static void main(String[] args) {

        try {
            performAudioConversionAndTranscription(new File("C:\\Users\\aless\\IdeaProjects\\google\\src\\main\\java\\org\\example\\sa.json"),"C:\\Users\\aless\\Downloads\\prova.wav");

        } catch (Exception e) {
            throw new RuntimeException(e);
        }

    }

    private static void performAudioConversionAndTranscription(File credenziali, String audioPath) {

        try {

            //--------------------CONFIGURAZIONE E CREAZIONE CREDENZIALI STT------------------------//
            GoogleCredentials googleCredentials = GoogleCredentials.fromStream(new FileInputStream(credenziali));
            CredentialsProvider credentialsProvider = FixedCredentialsProvider.create(googleCredentials);
            SpeechSettings settings = SpeechSettings.newBuilder().setCredentialsProvider(credentialsProvider).build();
            SpeechClient speechClient = SpeechClient.create(settings);



            //----------------------SET CONFIGURAZIONE POLLING------------------------------//
            SpeechSettings.Builder speechSettings = SpeechSettings.newBuilder();
            TimedRetryAlgorithm timedRetryAlgorithm =
                    OperationTimedPollAlgorithm.create(
                            RetrySettings.newBuilder()
                                    .setInitialRetryDelay(Duration.ofMillis(500L))
                                    .setRetryDelayMultiplier(1.5)
                                    .setMaxRetryDelay(Duration.ofMillis(5000L))
                                    .setTotalTimeout(Duration.ofHours(24L)) // set polling timeout to 24 hours
                                    .build());



            speechSettings.longRunningRecognizeOperationSettings().setPollingAlgorithm(timedRetryAlgorithm);

            //----------------CONFIGURAZIONE OGGETTO PER LA DIARIZZAZIONE----------------------//
            SpeakerDiarizationConfig speakerDiarizationConfig = SpeakerDiarizationConfig.newBuilder()
                    .setEnableSpeakerDiarization(true)
                    .setMinSpeakerCount(1)
                    .setMaxSpeakerCount(6)
                    .build();

            String audioFileName = "rdfd";


            //----------------------------CONFIGURAZIONE STT---------------------------//
            config = RecognitionConfig.newBuilder()
                    .setEncoding(RecognitionConfig.AudioEncoding.valueOf(AUDIO_ENCODING_LINEAR16))
                    .setLanguageCode(linguaTraduzione)
                    .setDiarizationConfig(speakerDiarizationConfig)
                    .setEnableAutomaticPunctuation(true)
                    .setEnableWordConfidence(true)
                    .setAudioChannelCount(2)
                    .build();

            textFileName = audioFileName + "_TRASCRIZIONE_.txt";
            String audioFolderPath = new File(audioPath).getParent();
            textFilePath = "C:\\Users\\aless\\Downloads\\" + textFileName;
            System.out.println(textFilePath);
            Storage storage =  StorageOptions.newBuilder().setCredentials(googleCredentials).build().getService();

            // Verifica se la cartella esiste già
            if (storage.get("cartabia", comune) == null) {
                // Crea l'oggetto BlobInfo per la cartella
                BlobInfo blobInfo = BlobInfo.newBuilder("cartabia", comune + "/").build();

                // Carica l'oggetto BlobInfo nel bucket
                storage.create(blobInfo);
            }

            //------------------------CARTELLA CREATA--------------------//
            String fileName = new File(audioPath).getName();

            // Rimuovi l'estensione .wav se presente
            if (fileName.endsWith(".wav")) {
                fileName = fileName.substring(0, fileName.length() - 4);
            }

            // Aggiungi _comune.wav alla fine del nome del file
            fileName = fileName + "_" + comune + ".wav";

            // Costruisci il percorso completo del file nella cartella
            String fullPath = comune + "/" + fileName;
            System.out.println(fullPath);
            // Crea l'oggetto BlobId con il percorso completo
            BlobId blobId = BlobId.of("cartabia", fullPath);

            // Crea l'oggetto BlobInfo con l'ID del blob
            BlobInfo blobInfo = BlobInfo.newBuilder(blobId).build();

            //---------------EVITA RACE CONDITION---------------------//
            Storage.BlobWriteOption precondition;
            if (storage.get("cartabia", fullPath) == null) {
                precondition = Storage.BlobWriteOption.doesNotExist();
            } else {
                precondition =
                        Storage.BlobWriteOption.generationMatch(
                        );
            }

            //--------------CARICA FILE WAV NEL BUCKET-----------------//
            storage.createFrom(blobInfo, Paths.get(audioPath), precondition);

            //---------------OTTIENE URI ASSOCIATA AL WAV APPENA CARICATO-------------------//
            String gcsUri = String.format("gs://cartabia/%s",fullPath);

            System.out.println(gcsUri);

            RecognitionAudio audio = RecognitionAudio.newBuilder().setUri(gcsUri).build();


            OperationFuture<LongRunningRecognizeResponse, LongRunningRecognizeMetadata> response =
                    speechClient.longRunningRecognizeAsync(config, audio);

           while(!response.isDone()){
               Thread.sleep(10000);
           }

            System.out.println(response.get());
            List<SpeechRecognitionResult> resultaa = response.get().getResultsList();
0

There are 0 best solutions below