Pocketsphinx simply listening for one command

981 Views Asked by At

I have successfully created a speech to text app which is able to open activities based on recognising keywords. At the moment for me to start the Speech to text feature I need to click a button. I now do not want to manually press the button and want to use a continuous listener to trigger the button.

I have been looking into using pocketsphinx and have added a keyphrase "listen to command" once this keyphrase is heard I want the button to be automatically pressed which is then followed by a sequence of commands I can add into the code. I do not need offline speech to text etc and hence I am using Google's speech to text option but am planning to use pocketsphinx to trigger the google's speech to text feature.

Below is most of the code that I partly modified from pocketsphinx:

public class PocketSphinxActivity extends Activity implements RecognitionListener {

 /* Named searches allow to quickly reconfigure the decoder */
 private static final String KWS_SEARCH = "wakeup";
 private static final String FORECAST_SEARCH = "forecast";
 private static final String DIGITS_SEARCH = "digits";
 private static final String PHONE_SEARCH = "phones";
 private static final String MENU_SEARCH = "menu";

 /* Keyword we are looking for to activate menu */
 private static final String KEYPHRASE = "listen to command"; //adjust this keyphrase!

 private SpeechRecognizer recognizer;
 private HashMap < String, Integer > captions;

 @Override
 public void onCreate(Bundle state) {
  super.onCreate(state);

  // Prepare the data for UI
  captions = new HashMap < String, Integer > ();
  captions.put(KWS_SEARCH, R.string.kws_caption);
  captions.put(MENU_SEARCH, R.string.menu_caption);
  captions.put(DIGITS_SEARCH, R.string.digits_caption);
  captions.put(PHONE_SEARCH, R.string.phone_caption);
  captions.put(FORECAST_SEARCH, R.string.forecast_caption);
  setContentView(R.layout.main);
  ((TextView) findViewById(R.id.caption_text))
  .setText("Preparing the recognizer");

  // Recognizer initialization is a time-consuming and it involves IO,
  // so we execute it in async task

  new AsyncTask < Void, Void, Exception > () {
   @Override
   protected Exception doInBackground(Void...params) {
    try {
     Assets assets = new Assets(PocketSphinxActivity.this);
     File assetDir = assets.syncAssets();
     setupRecognizer(assetDir);
    } catch (IOException e) {
     return e;
    }
    return null;
   }

   @Override
   protected void onPostExecute(Exception result) {
    if (result != null) {
     ((TextView) findViewById(R.id.caption_text))
     .setText("Failed to init recognizer " + result);
    } else {
     switchSearch(KWS_SEARCH);
    }
   }
  }.execute();
 }

 @Override
 public void onDestroy() {
  super.onDestroy();
  recognizer.cancel();
  recognizer.shutdown();
 }

 /**
  * In partial result we get quick updates about current hypothesis. In
  * keyword spotting mode we can react here, in other modes we need to wait
  * for final result in onResult.
  */
 @Override
 public void onPartialResult(Hypothesis hypothesis) {
  if (hypothesis == null)
   return;

  String text = hypothesis.getHypstr();
  if (text.equals(KEYPHRASE))
   switchSearch(MENU_SEARCH);
  else if (text.equals(DIGITS_SEARCH))
   switchSearch(DIGITS_SEARCH);
  else if (text.equals(PHONE_SEARCH))
   switchSearch(PHONE_SEARCH);
  else if (text.equals(FORECAST_SEARCH))
   switchSearch(FORECAST_SEARCH);
  else
   ((TextView) findViewById(R.id.result_text)).setText(text);
 }

 /**
  * This callback is called when we stop the recognizer.
  */
 @Override
 public void onResult(Hypothesis hypothesis) {
  ((TextView) findViewById(R.id.result_text)).setText("");
  if (hypothesis != null) {
   String text = hypothesis.getHypstr();
   makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
  }
 }

 @Override
 public void onBeginningOfSpeech() {}

 /**
  * We stop recognizer here to get a final result
  */
 @Override
 public void onEndOfSpeech() {
  if (!recognizer.getSearchName().equals(KWS_SEARCH))
   switchSearch(KWS_SEARCH);
 }

 private void switchSearch(String searchName) {
  recognizer.stop();

  // If we are not spotting, start listening with timeout (10000 ms or 10 seconds).
  if (searchName.equals(KWS_SEARCH))
   recognizer.startListening(searchName);
  else
   recognizer.startListening(searchName, 10000);

  String caption = getResources().getString(captions.get(searchName));
  ((TextView) findViewById(R.id.caption_text)).setText(caption);
 }

 private void setupRecognizer(File assetsDir) throws IOException {
  // The recognizer can be configured to perform multiple searches
  // of different kind and switch between them

  recognizer = defaultSetup()
   .setAcousticModel(new File(assetsDir, "en-us-ptm"))
   .setDictionary(new File(assetsDir, "cmudict-en-us.dict"))

  // To disable logging of raw audio comment out this call (takes a lot of space on the device)
  .setRawLogDir(assetsDir)

  // Threshold to tune for keyphrase to balance between false alarms and misses
  .setKeywordThreshold(1e-45 f)

  // Use context-independent phonetic search, context-dependent is too slow for mobile
  .setBoolean("-allphone_ci", true)

  .getRecognizer();
  recognizer.addListener(this);

  /** In your application you might not need to add all those searches.
   * They are added here for demonstration. You can leave just one.
   */

  // Create keyword-activation search.
  recognizer.addKeyphraseSearch(KWS_SEARCH, KEYPHRASE);

  // Create grammar-based search for selection between demos
  File menuGrammar = new File(assetsDir, "menu.gram");
  recognizer.addGrammarSearch(MENU_SEARCH, menuGrammar);

  // Create grammar-based search for digit recognition
  File digitsGrammar = new File(assetsDir, "digits.gram");
  recognizer.addGrammarSearch(DIGITS_SEARCH, digitsGrammar);

  // Create language model search
  File languageModel = new File(assetsDir, "weather.dmp");
  recognizer.addNgramSearch(FORECAST_SEARCH, languageModel);

  // Phonetic search
  File phoneticModel = new File(assetsDir, "en-phone.dmp");
  recognizer.addAllphoneSearch(PHONE_SEARCH, phoneticModel);
 }

 @Override
 public void onError(Exception error) {
  ((TextView) findViewById(R.id.caption_text)).setText(error.getMessage());
 }

 @Override
 public void onTimeout() {
  switchSearch(KWS_SEARCH);
 }

I am not interested in most of the commands, I just simply want the app to listen as soon as it is opened (which it currently does) and as soon as a person says Listen to command it then presses a button which I have set an id of bVoice

If someone can kindly help me modify the above code so that I know what to delete and what to add that will be greatly appreciated. Also note that if there is a much easier method of doing this please feel free to share (having a custom key phrase isn't also necessary).

1

There are 1 best solutions below

9
On

Something like this, and you do not need to "push the button", you can directly call button callback instead and perform actual steps you are interested in.

public class PocketSphinxActivity extends Activity implements RecognitionListener {

 private static final String KWS_SEARCH = "wakeup";
 private static final String KEYPHRASE = "listen to command"; //adjust this keyphrase!

 private SpeechRecognizer recognizer;

 @Override
 public void onCreate(Bundle state) {
  super.onCreate(state);

  setContentView(R.layout.main);
  ((TextView) findViewById(R.id.caption_text))
  .setText("Preparing the recognizer");

  new AsyncTask < Void, Void, Exception > () {
   @Override
   protected Exception doInBackground(Void...params) {
    try {
     Assets assets = new Assets(PocketSphinxActivity.this);
     File assetDir = assets.syncAssets();
     setupRecognizer(assetDir);
    } catch (IOException e) {
     return e;
    }
    return null;
   }

   @Override
   protected void onPostExecute(Exception result) {
    if (result != null) {
     ((TextView) findViewById(R.id.caption_text))
     .setText("Failed to init recognizer " + result);
    } else {
     recognizer.startListening(KWS_SEARCH);
    }
   }
  }.execute();
 }

 @Override
 public void onDestroy() {
  super.onDestroy();
  recognizer.cancel();
  recognizer.shutdown();
 }

 /**
  * In partial result we get quick updates about current hypothesis. In
  * keyword spotting mode we can react here, in other modes we need to wait
  * for final result in onResult.
  */
 @Override
 public void onPartialResult(Hypothesis hypothesis) {
  if (hypothesis == null)
   return;

  String text = hypothesis.getHypstr();
  if (text.equals(KEYPHRASE)) {
        recognizer.cancel();
        performAction();     // <- You have to implement this
        recognizer.startListening(KWS_SEARCH);
  }
 }

 @Override
 public void onResult(Hypothesis hypothesis) {}

 @Override
 public void onBeginningOfSpeech() {}

 @Override
 public void onEndOfSpeech() {}

 @Override
 public void onTimeout() {}

 private void setupRecognizer(File assetsDir) throws IOException {
  // The recognizer can be configured to perform multiple searches
  // of different kind and switch between them

  recognizer = defaultSetup()
   .setAcousticModel(new File(assetsDir, "en-us-ptm"))
   .setDictionary(new File(assetsDir, "cmudict-en-us.dict"))
   .getRecognizer();
  recognizer.addListener(this);

  // Create keyword-activation search.
  recognizer.addKeyphraseSearch(KWS_SEARCH, KEYPHRASE);
 }

 @Override
 public void onError(Exception error) {
  ((TextView) findViewById(R.id.caption_text)).setText(error.getMessage());
 }

 public void peformAction() {
    // do here whatever you want
 }