Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
- /* ====================================================================
- * Copyright (c) 1999-2010 Carnegie Mellon University. All rights
- * reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the
- * distribution.
- *
- * This work was supported in part by funding from the Defense Advanced
- * Research Projects Agency and the National Science Foundation of the
- * United States of America, and the CMU Sphinx Speech Consortium.
- *
- * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
- * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
- * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * ====================================================================
- *
- */
- /*
- * continuous.c - Simple pocketsphinx command-line application to test
- * both continuous listening/silence filtering from microphone
- * and continuous file transcription.
- */
- /*
- * This is a simple example of pocketsphinx application that uses continuous listening
- * with silence filtering to automatically segment a continuous stream of audio input
- * into utterances that are then decoded.
- *
- * Remarks:
- * - Each utterance is ended when a silence segment of at least 1 sec is recognized.
- * - Single-threaded implementation for portability.
- * - Uses audio library; can be replaced with an equivalent custom library.
- */
- #include <stdio.h>
- #include <string.h>
- #if !defined(_WIN32_WCE)
- #include <signal.h>
- #include <setjmp.h>
- #endif
- #if defined(WIN32) && !defined(GNUWINCE)
- #include <time.h>
- #else
- #include <sys/types.h>
- #include <sys/time.h>
- #endif
- #include <sphinxbase/err.h>
- #include <sphinxbase/ad.h>
- #include <sphinxbase/cont_ad.h>
- #include <unistd.h>
- #include "pocketsphinx.h"
- static const arg_t cont_args_def[] = {
- POCKETSPHINX_OPTIONS,
- /* Argument file. */
- { "-argfile",
- ARG_STRING,
- NULL,
- "Argument file giving extra arguments." },
- { "-adcdev",
- ARG_STRING,
- NULL,
- "Name of audio device to use for input." },
- { "-infile",
- ARG_STRING,
- NULL,
- "Audio file to transcribe." },
- { "-time",
- ARG_BOOLEAN,
- "no",
- "Print word times in file transcription." },
- CMDLN_EMPTY_OPTION
- };
- static ps_decoder_t *ps;
- static cmd_ln_t *config;
- static FILE* rawfd;
- static int32
- ad_file_read(ad_rec_t * ad, int16 * buf, int32 max)
- {
- size_t nread;
- nread = fread(buf, sizeof(int16), max, rawfd);
- return (nread > 0 ? nread : -1);
- }
- static void
- print_word_times(int32 start)
- {
- ps_seg_t *iter = ps_seg_iter(ps, NULL);
- while (iter != NULL) {
- int32 sf, ef, pprob;
- float conf;
- ps_seg_frames (iter, &sf, &ef);
- pprob = ps_seg_prob (iter, NULL, NULL, NULL);
- conf = logmath_exp(ps_get_logmath(ps), pprob);
- printf ("%s %f %f %f\n", ps_seg_word (iter), (sf + start) / 100.0, (ef + start) / 100.0, conf);
- iter = ps_seg_next (iter);
- }
- }
- /*
- * Continuous recognition from a file
- */
- static void
- recognize_from_file() {
- cont_ad_t *cont;
- ad_rec_t file_ad = {0};
- int16 adbuf[4096];
- const char* hyp;
- const char* uttid;
- int32 k, ts, start;
- char waveheader[44];
- if ((rawfd = fopen(cmd_ln_str_r(config, "-infile"), "rb")) == NULL) {
- E_FATAL_SYSTEM("Failed to open file '%s' for reading",
- cmd_ln_str_r(config, "-infile"));
- }
- fread(waveheader, 1, 44, rawfd);
- file_ad.sps = (int32)cmd_ln_float32_r(config, "-samprate");
- file_ad.bps = sizeof(int16);
- if ((cont = cont_ad_init(&file_ad, ad_file_read)) == NULL) {
- E_FATAL("Failed to initialize voice activity detection");
- }
- if (cont_ad_calib(cont) < 0)
- E_FATAL("Failed to calibrate voice activity detection\n");
- rewind (rawfd);
- for (;;) {
- while ((k = cont_ad_read(cont, adbuf, 4096)) == 0);
- if (k < 0) {
- break;
- }
- if (ps_start_utt(ps, NULL) < 0)
- E_FATAL("ps_start_utt() failed\n");
- ps_process_raw(ps, adbuf, k, FALSE, FALSE);
- ts = cont->read_ts;
- start = ((ts - k) * 100.0) / file_ad.sps;
- for (;;) {
- if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
- break;
- if (k == 0) {
- /*
- * No speech data available; check current timestamp with most recent
- * speech to see if more than 1 sec elapsed. If so, end of utterance.
- */
- if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
- break;
- }
- else {
- /* New speech data received; note current timestamp */
- ts = cont->read_ts;
- }
- ps_process_raw(ps, adbuf, k, FALSE, FALSE);
- }
- ps_end_utt(ps);
- if (cmd_ln_boolean_r(config, "-time")) {
- print_word_times(start);
- } else {
- hyp = ps_get_hyp(ps, NULL, &uttid);
- printf("%s: %s\n", uttid, hyp);
- }
- fflush(stdout);
- }
- cont_ad_close(cont);
- fclose(rawfd);
- }
- /* Sleep for specified msec */
- static void
- sleep_msec(int32 ms)
- {
- #if (defined(WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
- Sleep(ms);
- #else
- /* ------------------- Unix ------------------ */
- struct timeval tmo;
- tmo.tv_sec = 0;
- tmo.tv_usec = ms * 1000;
- select(0, NULL, NULL, NULL, &tmo);
- #endif
- }
- /*
- * Main utterance processing loop:
- * for (;;) {
- * wait for start of next utterance;
- * decode utterance until silence of at least 1 sec observed;
- * print utterance result;
- * }
- */
- static void
- recognize_from_microphone()
- {
- ad_rec_t *ad;
- int16 adbuf[4096];
- int32 k, ts, rem;
- char const *hyp;
- char const *uttid;
- cont_ad_t *cont;
- char word[256];
- if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
- (int)cmd_ln_float32_r(config, "-samprate"))) == NULL)
- E_FATAL("Failed to open audio device\n");
- /* Initialize continuous listening module */
- if ((cont = cont_ad_init(ad, ad_read)) == NULL)
- E_FATAL("Failed to initialize voice activity detection\n");
- if (ad_start_rec(ad) < 0)
- E_FATAL("Failed to start recording\n");
- if (cont_ad_calib(cont) < 0)
- E_FATAL("Failed to calibrate voice activity detection\n");
- for (;;) {
- /* Indicate listening for next utterance */
- printf("READY....\n");
- fflush(stdout);
- fflush(stderr);
- /* Wait data for next utterance */
- while ((k = cont_ad_read(cont, adbuf, 4096)) == 0)
- sleep_msec(100);
- if (k < 0)
- E_FATAL("Failed to read audio\n");
- /*
- * Non-zero amount of data received; start recognition of new utterance.
- * NULL argument to uttproc_begin_utt => automatic generation of utterance-id.
- */
- if (ps_start_utt(ps, NULL) < 0)
- E_FATAL("Failed to start utterance\n");
- ps_process_raw(ps, adbuf, k, FALSE, FALSE);
- printf("Listening...\n");
- fflush(stdout);
- /* Note timestamp for this first block of data */
- ts = cont->read_ts;
- /* Decode utterance until end (marked by a "long" silence, >1sec) */
- for (;;) {
- /* Read non-silence audio data, if any, from continuous listening module */
- if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
- E_FATAL("Failed to read audio\n");
- if (k == 0) {
- /*
- * No speech data available; check current timestamp with most recent
- * speech to see if more than 1 sec elapsed. If so, end of utterance.
- */
- if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
- break;
- }
- else {
- /* New speech data received; note current timestamp */
- ts = cont->read_ts;
- }
- /*
- * Decode whatever data was read above.
- */
- rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE);
- /* If no work to be done, sleep a bit */
- if ((rem == 0) && (k == 0))
- sleep_msec(20);
- }
- /*
- * Utterance ended; flush any accumulated, unprocessed A/D data and stop
- * listening until current utterance completely decoded
- */
- ad_stop_rec(ad);
- while (ad_read(ad, adbuf, 4096) >= 0);
- cont_ad_reset(cont);
- printf("Stopped listening, please wait...\n");
- fflush(stdout);
- /* Finish decoding, obtain and print result */
- ps_end_utt(ps);
- hyp = ps_get_hyp(ps, NULL, &uttid);
- printf("%s: %s\n", uttid, hyp);
- fflush(stdout);
- /* Exit if the first word spoken was GOODBYE */
- if (hyp) {
- sscanf(hyp, "%s", word);
- if (strcmp(word, "GOODBYE") == 0)
- {
- system("espeak -v es-la \"Gracias por venir a la presentacion del Barcamp\"");
- //break;
- }
- else if(strcmp(word, "HELLO") == 0)
- {
- system("espeak -v es-la \"Hola, bienvenidos a la presentacion del Barcamp\"");
- //break;
- }
- else if(strcmp(word, "MUSIC") == 0)
- {
- FILE *fpipe;
- char *command = (char *)"aplay BobMarley.wav";
- char line[256];
- if ( !(fpipe = (FILE*)popen(command,"r")) )
- { // If fpipe is NULL
- perror("Problems with pipe");
- exit(1);
- }
- fgets( line, sizeof line, fpipe);
- pid_t pid = strtoul(line, NULL, 10);
- printf("The id is %d\n", pid);
- //system("aplay BobMarley.wav");
- //break;
- }
- }
- /* Resume A/D recording for next utterance */
- if (ad_start_rec(ad) < 0)
- E_FATAL("Failed to start recording\n");
- }
- cont_ad_close(cont);
- ad_close(ad);
- }
- static jmp_buf jbuf;
- static void
- sighandler(int signo)
- {
- longjmp(jbuf, 1);
- }
- int
- main(int argc, char *argv[])
- {
- char const *cfg;
- if (argc == 2) {
- config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE);
- }
- else {
- config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE);
- }
- /* Handle argument file as -argfile. */
- if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
- config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
- }
- if (config == NULL)
- return 1;
- ps = ps_init(config);
- if (ps == NULL)
- return 1;
- E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);
- if (cmd_ln_str_r(config, "-infile") != NULL) {
- recognize_from_file();
- } else {
- /* Make sure we exit cleanly (needed for profiling among other things) */
- /* Signals seem to be broken in arm-wince-pe. */
- #if !defined(GNUWINCE) && !defined(_WIN32_WCE) && !defined(__SYMBIAN32__)
- signal(SIGINT, &sighandler);
- #endif
- if (setjmp(jbuf) == 0) {
- recognize_from_microphone();
- }
- }
- ps_free(ps);
- return 0;
- }
- /** Silvio Moioli: Windows CE/Mobile entry point added. */
- #if defined(_WIN32_WCE)
- #pragma comment(linker,"/entry:mainWCRTStartup")
- #include <windows.h>
- //Windows Mobile has the Unicode main only
- int wmain(int32 argc, wchar_t *wargv[]) {
- char** argv;
- size_t wlen;
- size_t len;
- int i;
- argv = malloc(argc*sizeof(char*));
- for (i=0; i<argc; i++){
- wlen = lstrlenW(wargv[i]);
- len = wcstombs(NULL, wargv[i], wlen);
- argv[i] = malloc(len+1);
- wcstombs(argv[i], wargv[i], wlen);
- }
- //assuming ASCII parameters
- return main(argc, argv);
- }
- #endif
Advertisement
Add Comment
Please, Sign In to add comment