Using Espeak SAPI / dll on Windows?

Question: I am trying to use a text-to-speech mechanism. So I got it working wounderfully on linux (code below). Now I also wanted to transfer this basic program to windows, but it is almost impossible ...

Part of the problem is that in dll windows only for AUDIO_OUTPUT_SYNCHRONOUS, which means it needs a callback, but I can’t figure out how to play the sound from the callback ... It crashed first, and then I realized, I need callback function, now I get the data in the callback function, but I don’t know how to play it ... because it is not a wav file and does not play automatically, as in Linux.

The sourceforge site is pretty useless because it mainly uses the SAPI version, but then there is no example on how to use the sapi espeak dll ...

Anyway, here is my code, can anyone help?

#ifdef __cplusplus #include <cstdio> #include <cstdlib> #include <cstring> #else #include <stdio.h> #include <stdlib.h> #include <string.h> #endif #include <assert.h> #include <ctype.h> //#include "speak_lib.h" #include "espeak/speak_lib.h" // libespeak-dev: /usr/include/espeak/speak_lib.h // apt-get install libespeak-dev // apt-get install libportaudio-dev // g++ -o mine mine.cpp -lespeak // g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak // gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak char voicename[40]; int samplerate; int quiet = 0; static char genders[4] = {' ','M','F',' '}; //const char *data_path = "/usr/share/"; // /usr/share/espeak-data/ const char *data_path = NULL; // use default path for espeak-data int strrcmp(const char *s, const char *sub) { int slen = strlen(s); int sublen = strlen(sub); return memcmp(s + slen - sublen, sub, sublen); } char * strrcpy(char *dest, const char *source) { // Pre assertions assert(dest != NULL); assert(source != NULL); assert(dest != source); // tk: parentheses while((*dest++ = *source++)) ; return(--dest); } const char* GetLanguageVoiceName(const char* pszShortSign) { #define LANGUAGE_LENGTH 30 static char szReturnValue[LANGUAGE_LENGTH] ; memset(szReturnValue, 0, LANGUAGE_LENGTH); for (int i = 0; pszShortSign[i] != '\0'; ++i) szReturnValue[i] = (char) tolower(pszShortSign[i]); const espeak_VOICE **voices; espeak_VOICE voice_select; voices = espeak_ListVoices(NULL); const espeak_VOICE *v; for(int ix=0; (v = voices[ix]) != NULL; ix++) { if( !strrcmp( v->languages, szReturnValue) ) { strcpy(szReturnValue, v->name); return szReturnValue; } } // End for strcpy(szReturnValue, "default"); return szReturnValue; } // End function getvoicename void ListVoices() { const espeak_VOICE **voices; espeak_VOICE voice_select; voices = espeak_ListVoices(NULL); const espeak_VOICE *v; for(int ix=0; (v = voices[ix]) != NULL; ix++) { printf("Shortsign: %s\n", v->languages); printf("age: %d\n", v->age); printf("gender: %c\n", genders[v->gender]); printf("name: %s\n", v->name); printf("\n\n"); } // End for } // End function getvoicename int main() { printf("Hello World!\n"); const char* szVersionInfo = espeak_Info(NULL); printf("Espeak version: %s\n", szVersionInfo); samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0); strcpy(voicename, "default"); // espeak --voices strcpy(voicename, "german"); strcpy(voicename, GetLanguageVoiceName("DE")); if(espeak_SetVoiceByName(voicename) != EE_OK) { printf("Espeak setvoice error...\n"); } static char word[200] = "Hello World" ; strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3"); strcpy(word, "Unnamed Player wurde zum Opfer von GSG9"); int speed = 220; int volume = 500; // volume in range 0-100 0=silence int pitch = 50; // base pitch, range 0-100. 50=normal // espeak.cpp 625 espeak_SetParameter(espeakRATE, speed, 0); espeak_SetParameter(espeakVOLUME,volume,0); espeak_SetParameter(espeakPITCH,pitch,0); // espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal // espeakPUNCTUATION: which punctuation characters to announce: // value in espeak_PUNCT_TYPE (none, all, some), espeak_VOICE *voice_spec = espeak_GetCurrentVoice(); voice_spec->gender=2; // 0=none 1=male, 2=female, //voice_spec->age = age; espeak_SetVoiceByProperties(voice_spec); espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL); espeak_Synchronize(); strcpy(voicename, GetLanguageVoiceName("EN")); espeak_SetVoiceByName(voicename); strcpy(word, "Geany was fragged by GSG9 Googlebot"); strcpy(word, "Googlebot"); espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL); espeak_Synchronize(); espeak_Terminate(); printf("Espeak terminated\n"); return EXIT_SUCCESS; } /* if(espeak_SetVoiceByName(voicename) != EE_OK) { memset(&voice_select,0,sizeof(voice_select)); voice_select.languages = voicename; if(espeak_SetVoiceByProperties(&voice_select) != EE_OK) { fprintf(stderr,"%svoice '%s'\n",err_load,voicename); exit(2); } } */ 

The above code is for Linux. Below is the code on how far I got Vista x64 (32 bit emu):

 #ifdef __cplusplus #include <cstdio> #include <cstdlib> #include <cstring> #else #include <stdio.h> #include <stdlib.h> #include <string.h> #endif #include <assert.h> #include <ctype.h> #include "speak_lib.h" //#include "espeak/speak_lib.h" // libespeak-dev: /usr/include/espeak/speak_lib.h // apt-get install libespeak-dev // apt-get install libportaudio-dev // g++ -o mine mine.cpp -lespeak // g++ -o mine mine.cpp -I/usr/include/espeak/ -lespeak // gcc -o mine mine.cpp -I/usr/include/espeak/ -lespeak char voicename[40]; int iSampleRate; int quiet = 0; static char genders[4] = {' ','M','F',' '}; //const char *data_path = "/usr/share/"; // /usr/share/espeak-data/ //const char *data_path = NULL; // use default path for espeak-data const char *data_path = "C:\\Users\\Username\\Desktop\\espeak-1.43-source\\espeak-1.43-source\\"; int strrcmp(const char *s, const char *sub) { int slen = strlen(s); int sublen = strlen(sub); return memcmp(s + slen - sublen, sub, sublen); } char * strrcpy(char *dest, const char *source) { // Pre assertions assert(dest != NULL); assert(source != NULL); assert(dest != source); // tk: parentheses while((*dest++ = *source++)) ; return(--dest); } const char* GetLanguageVoiceName(const char* pszShortSign) { #define LANGUAGE_LENGTH 30 static char szReturnValue[LANGUAGE_LENGTH] ; memset(szReturnValue, 0, LANGUAGE_LENGTH); for (int i = 0; pszShortSign[i] != '\0'; ++i) szReturnValue[i] = (char) tolower(pszShortSign[i]); const espeak_VOICE **voices; espeak_VOICE voice_select; voices = espeak_ListVoices(NULL); const espeak_VOICE *v; for(int ix=0; (v = voices[ix]) != NULL; ix++) { if( !strrcmp( v->languages, szReturnValue) ) { strcpy(szReturnValue, v->name); return szReturnValue; } } // End for strcpy(szReturnValue, "default"); return szReturnValue; } // End function getvoicename void ListVoices() { const espeak_VOICE **voices; espeak_VOICE voice_select; voices = espeak_ListVoices(NULL); const espeak_VOICE *v; for(int ix=0; (v = voices[ix]) != NULL; ix++) { printf("Shortsign: %s\n", v->languages); printf("age: %d\n", v->age); printf("gender: %c\n", genders[v->gender]); printf("name: %s\n", v->name); printf("\n\n"); } // End for } // End function getvoicename /* Callback from espeak. Directly speaks using AudioTrack. */ #define LOGI(x) printf("%s\n", x) static int AndroidEspeakDirectSpeechCallback(short *wav, int numsamples, espeak_EVENT *events) { char buf[100]; sprintf(buf, "AndroidEspeakDirectSpeechCallback: %d samples", numsamples); LOGI(buf); if (wav == NULL) { LOGI("Null: speech has completed"); } if (numsamples > 0) { //audout->write(wav, sizeof(short) * numsamples); sprintf(buf, "AudioTrack wrote: %d bytes", sizeof(short) * numsamples); LOGI(buf); } return 0; // continue synthesis (1 is to abort) } static int AndroidEspeakSynthToFileCallback(short *wav, int numsamples,espeak_EVENT *events) { char buf[100]; sprintf(buf, "AndroidEspeakSynthToFileCallback: %d samples", numsamples); LOGI(buf); if (wav == NULL) { LOGI("Null: speech has completed"); } // The user data should contain the file pointer of the file to write to //void* user_data = events->user_data; FILE* user_data = fopen ( "myfile1.wav" , "ab" ); FILE* fp = static_cast<FILE *>(user_data); // Write all of the samples fwrite(wav, sizeof(short), numsamples, fp); return 0; // continue synthesis (1 is to abort) } int main() { printf("Hello World!\n"); const char* szVersionInfo = espeak_Info(NULL); printf("Espeak version: %s\n", szVersionInfo); iSampleRate = espeak_Initialize(AUDIO_OUTPUT_SYNCHRONOUS, 4096, data_path, 0); if (iSampleRate <= 0) { printf("Unable to initialize espeak"); return EXIT_FAILURE; } //samplerate = espeak_Initialize(AUDIO_OUTPUT_PLAYBACK,0,data_path,0); //ListVoices(); strcpy(voicename, "default"); // espeak --voices //strcpy(voicename, "german"); //strcpy(voicename, GetLanguageVoiceName("DE")); if(espeak_SetVoiceByName(voicename) != EE_OK) { printf("Espeak setvoice error...\n"); } static char word[200] = "Hello World" ; strcpy(word, "TV-fäns aufgepasst, es ist 20 Uhr 15. Zeit für Rambo 3"); strcpy(word, "Unnamed Player wurde zum Opfer von GSG9"); int speed = 220; int volume = 500; // volume in range 0-100 0=silence int pitch = 50; // base pitch, range 0-100. 50=normal // espeak.cpp 625 espeak_SetParameter(espeakRATE, speed, 0); espeak_SetParameter(espeakVOLUME,volume,0); espeak_SetParameter(espeakPITCH,pitch,0); // espeakRANGE: pitch range, range 0-100. 0-monotone, 50=normal // espeakPUNCTUATION: which punctuation characters to announce: // value in espeak_PUNCT_TYPE (none, all, some), //espeak_VOICE *voice_spec = espeak_GetCurrentVoice(); //voice_spec->gender=2; // 0=none 1=male, 2=female, //voice_spec->age = age; //espeak_SetVoiceByProperties(voice_spec); //espeak_SetSynthCallback(AndroidEspeakDirectSpeechCallback); espeak_SetSynthCallback(AndroidEspeakSynthToFileCallback); unsigned int unique_identifier; espeak_ERROR err = espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, &unique_identifier, NULL); err = espeak_Synchronize(); /* strcpy(voicename, GetLanguageVoiceName("EN")); espeak_SetVoiceByName(voicename); strcpy(word, "Geany was fragged by GSG9 Googlebot"); strcpy(word, "Googlebot"); espeak_Synth( (char*) word, strlen(word)+1, 0, POS_CHARACTER, 0, espeakCHARS_AUTO, NULL, NULL); espeak_Synchronize(); */ // espeak_Cancel(); espeak_Terminate(); printf("Espeak terminated\n"); system("pause"); return EXIT_SUCCESS; } 
+6
c ++ c espeak text-to-speech
source share
2 answers

A few changes to the source code are necessary so that the window library has the same functionality as Linux. I have listed the changes here . A ready-to-use binary is also available.

All patches and descriptions were also sent to a third-party developer (publicly, through the mailing list and tracking patches), so it may be available directly in the future.

+3
source share

Have you tried passing the buffer you received in your callback to sndplaysnd () ??

 Declare Function sndPlaySound Lib "winmm.dll" Alias "sndPlaySoundA" (ByVal lpszSoundName As String, ByVal uFlags As Long) As Long 

Its standard winAPI is as follows:

  sndPlaySound(buffer[0], SND_ASYNC | SND_MEMORY) 

Alternatively, if you have a wav file that has sound in it:

  sndPlaySound(filename, SND_ASYNC) 

playound has an ASYNC mode that does not block the execution of your program during audio playback.

NOTE. I used it in VB, and the above snippets are used for VB. If you are coding in VC ++, you may need to modify them accordingly. But the basic intention remains unchanged; pass buffer to sndPlaySound with ASYNC flag set.

GOOD LUCK !!

+1
source share

All Articles