Guest User

C

a guest
Nov 10th, 2014
366
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 13.12 KB | None | 0 0
  1. /* -*- c-basic-offset: 4; indent-tabs-mode: nil -*- */
  2. /* ====================================================================
  3.  * Copyright (c) 1999-2010 Carnegie Mellon University.  All rights
  4.  * reserved.
  5.  *
  6.  * Redistribution and use in source and binary forms, with or without
  7.  * modification, are permitted provided that the following conditions
  8.  * are met:
  9.  *
  10.  * 1. Redistributions of source code must retain the above copyright
  11.  *    notice, this list of conditions and the following disclaimer.
  12.  *
  13.  * 2. Redistributions in binary form must reproduce the above copyright
  14.  *    notice, this list of conditions and the following disclaimer in
  15.  *    the documentation and/or other materials provided with the
  16.  *    distribution.
  17.  *
  18.  * This work was supported in part by funding from the Defense Advanced
  19.  * Research Projects Agency and the National Science Foundation of the
  20.  * United States of America, and the CMU Sphinx Speech Consortium.
  21.  *
  22.  * THIS SOFTWARE IS PROVIDED BY CARNEGIE MELLON UNIVERSITY ``AS IS'' AND
  23.  * ANY EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  24.  * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  25.  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL CARNEGIE MELLON UNIVERSITY
  26.  * NOR ITS EMPLOYEES BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  27.  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  28.  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  29.  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  30.  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  31.  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  32.  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  33.  *
  34.  * ====================================================================
  35.  *
  36.  */
  37. /*
  38.  * continuous.c - Simple pocketsphinx command-line application to test
  39.  *                both continuous listening/silence filtering from microphone
  40.  *                and continuous file transcription.
  41.  */
  42.  
  43. /*
  44.  * This is a simple example of pocketsphinx application that uses continuous listening
  45.  * with silence filtering to automatically segment a continuous stream of audio input
  46.  * into utterances that are then decoded.
  47.  *
  48.  * Remarks:
  49.  *   - Each utterance is ended when a silence segment of at least 1 sec is recognized.
  50.  *   - Single-threaded implementation for portability.
  51.  *   - Uses audio library; can be replaced with an equivalent custom library.
  52.  */
  53.  
  54.  
  55. #include <stdio.h>
  56. #include <string.h>
  57.  
  58. #if !defined(_WIN32_WCE)
  59. #include <signal.h>
  60. #include <setjmp.h>
  61. #endif
  62. #if defined(WIN32) && !defined(GNUWINCE)
  63. #include <time.h>
  64. #else
  65. #include <sys/types.h>
  66. #include <sys/time.h>
  67. #endif
  68.  
  69. #include <sphinxbase/err.h>
  70. #include <sphinxbase/ad.h>
  71. #include <sphinxbase/cont_ad.h>
  72. #include <unistd.h>
  73.  
  74.  
  75. #include "pocketsphinx.h"
  76.  
  77. static const arg_t cont_args_def[] = {
  78.     POCKETSPHINX_OPTIONS,
  79.     /* Argument file. */
  80.     { "-argfile",
  81.       ARG_STRING,
  82.       NULL,
  83.       "Argument file giving extra arguments." },
  84.     { "-adcdev",
  85.       ARG_STRING,
  86.       NULL,
  87.       "Name of audio device to use for input." },
  88.     { "-infile",
  89.       ARG_STRING,
  90.       NULL,
  91.       "Audio file to transcribe." },
  92.     { "-time",
  93.       ARG_BOOLEAN,
  94.       "no",
  95.       "Print word times in file transcription." },
  96.     CMDLN_EMPTY_OPTION
  97. };
  98.  
  99. static ps_decoder_t *ps;
  100. static cmd_ln_t *config;
  101. static FILE* rawfd;
  102.  
  103. static int32
  104. ad_file_read(ad_rec_t * ad, int16 * buf, int32 max)
  105. {
  106.     size_t nread;
  107.    
  108.     nread = fread(buf, sizeof(int16), max, rawfd);
  109.    
  110.     return (nread > 0 ? nread : -1);
  111. }
  112.  
  113. static void
  114. print_word_times(int32 start)
  115. {
  116.     ps_seg_t *iter = ps_seg_iter(ps, NULL);
  117.     while (iter != NULL) {
  118.         int32 sf, ef, pprob;
  119.         float conf;
  120.        
  121.         ps_seg_frames (iter, &sf, &ef);
  122.         pprob = ps_seg_prob (iter, NULL, NULL, NULL);
  123.         conf = logmath_exp(ps_get_logmath(ps), pprob);
  124.         printf ("%s %f %f %f\n", ps_seg_word (iter), (sf + start) / 100.0, (ef + start) / 100.0, conf);
  125.         iter = ps_seg_next (iter);
  126.     }
  127. }
  128.  
  129. /*
  130.  * Continuous recognition from a file
  131.  */
  132. static void
  133. recognize_from_file() {
  134.     cont_ad_t *cont;
  135.     ad_rec_t file_ad = {0};
  136.     int16 adbuf[4096];
  137.     const char* hyp;
  138.     const char* uttid;
  139.     int32 k, ts, start;
  140.  
  141.     char waveheader[44];
  142.     if ((rawfd = fopen(cmd_ln_str_r(config, "-infile"), "rb")) == NULL) {
  143.     E_FATAL_SYSTEM("Failed to open file '%s' for reading",
  144.             cmd_ln_str_r(config, "-infile"));
  145.     }
  146.    
  147.     fread(waveheader, 1, 44, rawfd);
  148.  
  149.     file_ad.sps = (int32)cmd_ln_float32_r(config, "-samprate");
  150.     file_ad.bps = sizeof(int16);
  151.  
  152.     if ((cont = cont_ad_init(&file_ad, ad_file_read)) == NULL) {
  153.         E_FATAL("Failed to initialize voice activity detection");
  154.     }
  155.     if (cont_ad_calib(cont) < 0)
  156.         E_FATAL("Failed to calibrate voice activity detection\n");
  157.     rewind (rawfd);
  158.  
  159.     for (;;) {
  160.  
  161.     while ((k = cont_ad_read(cont, adbuf, 4096)) == 0);
  162.    
  163.         if (k < 0) {
  164.             break;
  165.         }
  166.  
  167.         if (ps_start_utt(ps, NULL) < 0)
  168.             E_FATAL("ps_start_utt() failed\n");
  169.  
  170.         ps_process_raw(ps, adbuf, k, FALSE, FALSE);
  171.        
  172.         ts = cont->read_ts;
  173.         start = ((ts - k) * 100.0) / file_ad.sps;
  174.        
  175.         for (;;) {
  176.             if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
  177.                 break;
  178.  
  179.             if (k == 0) {
  180.                 /*
  181.                  * No speech data available; check current timestamp with most recent
  182.                  * speech to see if more than 1 sec elapsed.  If so, end of utterance.
  183.                  */
  184.                 if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
  185.                     break;
  186.             }
  187.             else {
  188.                 /* New speech data received; note current timestamp */
  189.                 ts = cont->read_ts;
  190.             }
  191.  
  192.  
  193.             ps_process_raw(ps, adbuf, k, FALSE, FALSE);
  194.         }
  195.  
  196.         ps_end_utt(ps);
  197.        
  198.         if (cmd_ln_boolean_r(config, "-time")) {
  199.         print_word_times(start);
  200.     } else {
  201.         hyp = ps_get_hyp(ps, NULL, &uttid);
  202.             printf("%s: %s\n", uttid, hyp);
  203.         }
  204.         fflush(stdout);
  205.     }
  206.  
  207.     cont_ad_close(cont);
  208.     fclose(rawfd);
  209. }
  210.  
  211. /* Sleep for specified msec */
  212. static void
  213. sleep_msec(int32 ms)
  214. {
  215. #if (defined(WIN32) && !defined(GNUWINCE)) || defined(_WIN32_WCE)
  216.     Sleep(ms);
  217. #else
  218.     /* ------------------- Unix ------------------ */
  219.     struct timeval tmo;
  220.  
  221.     tmo.tv_sec = 0;
  222.     tmo.tv_usec = ms * 1000;
  223.  
  224.     select(0, NULL, NULL, NULL, &tmo);
  225. #endif
  226. }
  227.  
  228. /*
  229.  * Main utterance processing loop:
  230.  *     for (;;) {
  231.  *     wait for start of next utterance;
  232.  *     decode utterance until silence of at least 1 sec observed;
  233.  *     print utterance result;
  234.  *     }
  235.  */
  236. static void
  237. recognize_from_microphone()
  238. {
  239.     ad_rec_t *ad;
  240.     int16 adbuf[4096];
  241.     int32 k, ts, rem;
  242.     char const *hyp;
  243.     char const *uttid;
  244.     cont_ad_t *cont;
  245.     char word[256];
  246.  
  247.     if ((ad = ad_open_dev(cmd_ln_str_r(config, "-adcdev"),
  248.                           (int)cmd_ln_float32_r(config, "-samprate"))) == NULL)
  249.         E_FATAL("Failed to open audio device\n");
  250.  
  251.     /* Initialize continuous listening module */
  252.     if ((cont = cont_ad_init(ad, ad_read)) == NULL)
  253.         E_FATAL("Failed to initialize voice activity detection\n");
  254.     if (ad_start_rec(ad) < 0)
  255.         E_FATAL("Failed to start recording\n");
  256.     if (cont_ad_calib(cont) < 0)
  257.         E_FATAL("Failed to calibrate voice activity detection\n");
  258.  
  259.     for (;;) {
  260.         /* Indicate listening for next utterance */
  261.         printf("READY....\n");
  262.         fflush(stdout);
  263.         fflush(stderr);
  264.  
  265.         /* Wait data for next utterance */
  266.         while ((k = cont_ad_read(cont, adbuf, 4096)) == 0)
  267.             sleep_msec(100);
  268.  
  269.         if (k < 0)
  270.             E_FATAL("Failed to read audio\n");
  271.  
  272.         /*
  273.          * Non-zero amount of data received; start recognition of new utterance.
  274.          * NULL argument to uttproc_begin_utt => automatic generation of utterance-id.
  275.          */
  276.         if (ps_start_utt(ps, NULL) < 0)
  277.             E_FATAL("Failed to start utterance\n");
  278.         ps_process_raw(ps, adbuf, k, FALSE, FALSE);
  279.         printf("Listening...\n");
  280.         fflush(stdout);
  281.  
  282.         /* Note timestamp for this first block of data */
  283.         ts = cont->read_ts;
  284.  
  285.         /* Decode utterance until end (marked by a "long" silence, >1sec) */
  286.         for (;;) {
  287.             /* Read non-silence audio data, if any, from continuous listening module */
  288.             if ((k = cont_ad_read(cont, adbuf, 4096)) < 0)
  289.                 E_FATAL("Failed to read audio\n");
  290.             if (k == 0) {
  291.                 /*
  292.                  * No speech data available; check current timestamp with most recent
  293.                  * speech to see if more than 1 sec elapsed.  If so, end of utterance.
  294.                  */
  295.                 if ((cont->read_ts - ts) > DEFAULT_SAMPLES_PER_SEC)
  296.                     break;
  297.             }
  298.             else {
  299.                 /* New speech data received; note current timestamp */
  300.                 ts = cont->read_ts;
  301.             }
  302.  
  303.             /*
  304.              * Decode whatever data was read above.
  305.              */
  306.             rem = ps_process_raw(ps, adbuf, k, FALSE, FALSE);
  307.  
  308.             /* If no work to be done, sleep a bit */
  309.             if ((rem == 0) && (k == 0))
  310.                 sleep_msec(20);
  311.         }
  312.  
  313.         /*
  314.          * Utterance ended; flush any accumulated, unprocessed A/D data and stop
  315.          * listening until current utterance completely decoded
  316.          */
  317.         ad_stop_rec(ad);
  318.         while (ad_read(ad, adbuf, 4096) >= 0);
  319.         cont_ad_reset(cont);
  320.  
  321.         printf("Stopped listening, please wait...\n");
  322.         fflush(stdout);
  323.         /* Finish decoding, obtain and print result */
  324.         ps_end_utt(ps);
  325.         hyp = ps_get_hyp(ps, NULL, &uttid);
  326.         printf("%s: %s\n", uttid, hyp);
  327.         fflush(stdout);
  328.  
  329.         /* Exit if the first word spoken was GOODBYE */
  330.         if (hyp) {
  331.             sscanf(hyp, "%s", word);
  332.             if (strcmp(word, "GOODBYE") == 0)
  333.             {
  334.                 system("espeak -v es-la \"Gracias por venir a la presentacion del Barcamp\"");
  335.                 //break;
  336.             }
  337.             else if(strcmp(word, "HELLO") == 0)
  338.             {
  339.                 system("espeak -v es-la \"Hola, bienvenidos a la presentacion del Barcamp\"");
  340.                 //break;
  341.             }
  342.             else if(strcmp(word, "MUSIC") == 0)
  343.             {
  344.                 FILE *fpipe;
  345.                 char *command = (char *)"aplay BobMarley.wav";
  346.                 char line[256];
  347.                
  348.                 if ( !(fpipe = (FILE*)popen(command,"r")) )
  349.                 {  // If fpipe is NULL
  350.                   perror("Problems with pipe");
  351.                   exit(1);
  352.                 }
  353.                 fgets( line, sizeof line, fpipe);                
  354.                 pid_t pid = strtoul(line, NULL, 10);
  355.                 printf("The id is %d\n", pid);
  356.  
  357.                          
  358.  
  359.                 //system("aplay BobMarley.wav");
  360.                 //break;
  361.             }
  362.         }
  363.  
  364.         /* Resume A/D recording for next utterance */
  365.         if (ad_start_rec(ad) < 0)
  366.             E_FATAL("Failed to start recording\n");
  367.     }
  368.  
  369.     cont_ad_close(cont);
  370.     ad_close(ad);
  371. }
  372.  
  373. static jmp_buf jbuf;
  374. static void
  375. sighandler(int signo)
  376. {
  377.     longjmp(jbuf, 1);
  378. }
  379.  
  380. int
  381. main(int argc, char *argv[])
  382. {
  383.     char const *cfg;
  384.  
  385.     if (argc == 2) {
  386.         config = cmd_ln_parse_file_r(NULL, cont_args_def, argv[1], TRUE);
  387.     }
  388.     else {
  389.         config = cmd_ln_parse_r(NULL, cont_args_def, argc, argv, FALSE);
  390.     }
  391.     /* Handle argument file as -argfile. */
  392.     if (config && (cfg = cmd_ln_str_r(config, "-argfile")) != NULL) {
  393.         config = cmd_ln_parse_file_r(config, cont_args_def, cfg, FALSE);
  394.     }
  395.     if (config == NULL)
  396.         return 1;
  397.  
  398.     ps = ps_init(config);
  399.     if (ps == NULL)
  400.         return 1;
  401.  
  402.     E_INFO("%s COMPILED ON: %s, AT: %s\n\n", argv[0], __DATE__, __TIME__);
  403.  
  404.     if (cmd_ln_str_r(config, "-infile") != NULL) {
  405.     recognize_from_file();
  406.     } else {
  407.  
  408.         /* Make sure we exit cleanly (needed for profiling among other things) */
  409.     /* Signals seem to be broken in arm-wince-pe. */
  410. #if !defined(GNUWINCE) && !defined(_WIN32_WCE) && !defined(__SYMBIAN32__)
  411.     signal(SIGINT, &sighandler);
  412. #endif
  413.  
  414.         if (setjmp(jbuf) == 0) {
  415.         recognize_from_microphone();
  416.     }
  417.     }
  418.  
  419.     ps_free(ps);
  420.     return 0;
  421. }
  422.  
  423. /** Silvio Moioli: Windows CE/Mobile entry point added. */
  424. #if defined(_WIN32_WCE)
  425. #pragma comment(linker,"/entry:mainWCRTStartup")
  426. #include <windows.h>
  427.  
  428. //Windows Mobile has the Unicode main only
  429. int wmain(int32 argc, wchar_t *wargv[]) {
  430.     char** argv;
  431.     size_t wlen;
  432.     size_t len;
  433.     int i;
  434.  
  435.     argv = malloc(argc*sizeof(char*));
  436.     for (i=0; i<argc; i++){
  437.         wlen = lstrlenW(wargv[i]);
  438.         len = wcstombs(NULL, wargv[i], wlen);
  439.         argv[i] = malloc(len+1);
  440.         wcstombs(argv[i], wargv[i], wlen);
  441.     }
  442.  
  443.     //assuming ASCII parameters
  444.     return main(argc, argv);
  445. }
  446. #endif
Advertisement
Add Comment
Please, Sign In to add comment