diff --git a/include/sphinxbase/cmn.h b/include/sphinxbase/cmn.h index d91555e5..9a9f22c9 100644 --- a/include/sphinxbase/cmn.h +++ b/include/sphinxbase/cmn.h @@ -110,7 +110,8 @@ extern "C" { typedef enum cmn_type_e { CMN_NONE = 0, CMN_CURRENT, - CMN_PRIOR + CMN_PRIOR, + CMN_ADAPT } cmn_type_t; /** String representations of cmn_type_t values. */ @@ -127,6 +128,8 @@ cmn_type_t cmn_type_from_str(const char *str); typedef struct { mfcc_t *cmn_mean; /**< Temporary variable: current means */ + mfcc_t *max; /**< Temporary variable: current maximums */ + mfcc_t *cur; /**< Temporary variable: local maximums */ mfcc_t *cmn_var; /**< Temporary variables: stored the cmn variance */ mfcc_t *sum; /**< The sum of the cmn frames */ int32 nframe; /**< Number of frames */ @@ -184,6 +187,18 @@ void cmn_prior_get(cmn_t *cmn, mfcc_t *vec); SPHINXBASE_EXPORT void cmn_free (cmn_t *cmn); +/** + * CMN for one block of data, using adapted mean + */ +SPHINXBASE_EXPORT +void cmn_adapt(cmn_t *cmn, /**< In/Out: cmn normalization, which contains + the cmn_mean and cmn_var) */ + mfcc_t **incep, /**< In/Out: mfc[f] = mfc vector in frame f*/ + int32 varnorm, /**< varnorm is supported */ + int32 nfr /**< Number of incoming frames */ + ); + + #ifdef __cplusplus } #endif diff --git a/include/sphinxbase/feat.h b/include/sphinxbase/feat.h index 5f16a646..c7deef09 100644 --- a/include/sphinxbase/feat.h +++ b/include/sphinxbase/feat.h @@ -77,15 +77,15 @@ extern "C" { { "-cmn", \ ARG_STRING, \ "current", \ - "Cepstral mean normalization scheme ('current', 'prior', or 'none')" }, \ + "Cepstral mean normalization scheme ('current', 'prior', 'adapt' or 'none')" }, \ { "-cmninit", \ ARG_STRING, \ "8.0", \ - "Initial values (comma-separated) for cepstral mean when 'prior' is used" }, \ + "Initial values (comma-separated) for cepstral mean when 'prior' or 'adapt' is used" }, \ { "-varnorm", \ ARG_BOOLEAN, \ "no", \ - "Variance normalize each utterance (only if CMN == current)" }, \ + "Variance normalize each utterance (only if CMN == 'current' or 'adapt')" }, \ { "-agc", \ ARG_STRING, \ "none", \ diff --git a/src/libsphinxbase/feat/Makefile.am b/src/libsphinxbase/feat/Makefile.am index a2ad62d5..42893f3d 100644 --- a/src/libsphinxbase/feat/Makefile.am +++ b/src/libsphinxbase/feat/Makefile.am @@ -4,6 +4,7 @@ libsphinxfeat_la_SOURCES = \ agc.c \ cmn.c \ cmn_prior.c \ + cmn_adapt.c \ lda.c \ feat.c diff --git a/src/libsphinxbase/feat/cmn.c b/src/libsphinxbase/feat/cmn.c index c133c19a..51e7e295 100644 --- a/src/libsphinxbase/feat/cmn.c +++ b/src/libsphinxbase/feat/cmn.c @@ -110,7 +110,8 @@ const char *cmn_type_str[] = { "none", "current", - "prior" + "prior", + "adapt" }; static const int n_cmn_type_str = sizeof(cmn_type_str)/sizeof(cmn_type_str[0]); @@ -136,8 +137,11 @@ cmn_init(int32 veclen) cmn->cmn_mean = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); cmn->cmn_var = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); cmn->sum = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); + cmn->max = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); + cmn->cur = (mfcc_t *) ckd_calloc(veclen, sizeof(mfcc_t)); /* A front-end dependent magic number */ cmn->cmn_mean[0] = FLOAT2MFCC(12.0); + cmn->max[0] = FLOAT2MFCC(24.0); cmn->nframe = 0; E_INFO("mean[0]= %.2f, mean[1..%d]= 0.0\n", MFCC2FLOAT(cmn->cmn_mean[0]), veclen - 1); @@ -233,6 +237,12 @@ cmn_free(cmn_t * cmn) if (cmn->sum) ckd_free((void *) cmn->sum); + if (cmn->max) + ckd_free((void *) cmn->max); + + if (cmn->cur) + ckd_free((void *) cmn->cur); + ckd_free((void *) cmn); } } diff --git a/src/libsphinxbase/feat/cmn_adapt.c b/src/libsphinxbase/feat/cmn_adapt.c new file mode 100644 index 00000000..fff461ff --- /dev/null +++ b/src/libsphinxbase/feat/cmn_adapt.c @@ -0,0 +1,90 @@ +/* + * Warning: supporting only floating point operations + * + * 16-Feb-2016 Zamir Ostroukhov + */ + +#ifdef HAVE_CONFIG_H +#include +#endif + +#ifdef _MSC_VER +#pragma warning (disable: 4244) +#endif + +#include "sphinxbase/ckd_alloc.h" +#include "sphinxbase/cmn.h" + +/* You can change it for best result */ +#define ZERO_SUBSTITUTION 0.0000001f +#define COEF_MEAN_PER_FRAME 0.001f /* speed ratio adaptation for mean (per frame) */ +#define COEF_POWER_PER_FRAME 0.01f /* speed ratio adaptation for max power (per frame) */ +#define COEF_MAX_PER_CALL 0.1f /* speed ratio adaptation for maximums (per function call) */ +#define COEF_MAX_PER_ERROR 1.1f /* speed ratio adaptation for maximums, when an error is detected (per function call) */ +#define THRESHOLD_MAX_ERROR 2.0f /* threshold for error detector */ + +void +cmn_adapt(cmn_t *cmn, mfcc_t **incep, int32 varnorm, int32 nfr) { + + int32 i, j; + + if (nfr <= 0) + return; + + for (j = 0; j < cmn->veclen; j++) { + cmn->cur[j] = 0.0f; + for (i = 0; i < nfr; i++) + if ( abs(incep[i][j]) > cmn->cur[j] ) + cmn->cur[j] = abs(incep[i][j]); + + if ( cmn->cur[j] == 0.0f ) + cmn->cur[j] = ZERO_SUBSTITUTION; + + if ( cmn->max[j] == 0.0f ) + cmn->max[j] = cmn->cur[j] * COEF_MAX_PER_ERROR; + + if ( cmn->cur[j] > cmn->max[j] * THRESHOLD_MAX_ERROR ) { + + cmn->max[j] = cmn->cur[j] * COEF_MAX_PER_ERROR; + + } else { + + mfcc_t u_prob = ( cmn->cur[j] / cmn->max[j] ) * COEF_MAX_PER_CALL; + + if ( u_prob > COEF_MAX_PER_CALL ) + u_prob = COEF_MAX_PER_CALL; + + cmn->max[j] = cmn->cur[j] * u_prob + cmn->max[j] * (1.0f-u_prob); + } + + } + + mfcc_t prob0 = ( cmn->cur[0] / cmn->max[0] ) * COEF_POWER_PER_FRAME; + + if ( prob0 > COEF_POWER_PER_FRAME ) + prob0 = COEF_POWER_PER_FRAME; + + for (i = 0; i < nfr; i++) { + + if ( cmn->cur[0] > cmn->max[0] ) + cmn->max[0] = cmn->cur[0] * prob0 + cmn->max[0] * (1.0f-prob0); + + mfcc_t e_prob; + + if ( incep[i][0] <= 0.0f ) { + e_prob = 0.0f; + } else { + e_prob = incep[i][0] / cmn->max[0] * COEF_MEAN_PER_FRAME; + } + + for (j = 0; j < cmn->veclen; j++) { + cmn->sum[j] += incep[i][j]; // save compatibility with prior method, you can to remove it + cmn->cmn_mean[j] = incep[i][j] * e_prob + cmn->cmn_mean[j] * (1.0f-e_prob); + incep[i][j] -= cmn->cmn_mean[j]; + if (varnorm) + incep[i][j] /= cmn->max[j]; + } + + ++cmn->nframe; // save compatibility with prior method, you can to remove it + } +} diff --git a/src/libsphinxbase/feat/feat.c b/src/libsphinxbase/feat/feat.c index d2252fd8..57be1cc7 100644 --- a/src/libsphinxbase/feat/feat.c +++ b/src/libsphinxbase/feat/feat.c @@ -918,7 +918,7 @@ feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt) cmn_type_t cmn_type = fcb->cmn; if (!(beginutt && endutt) - && cmn_type != CMN_NONE) /* Only cmn_prior in block computation mode. */ + && cmn_type != CMN_NONE && cmn_type != CMN_ADAPT ) /* Only cmn_prior in block computation mode. */ fcb->cmn = cmn_type = CMN_PRIOR; switch (cmn_type) { @@ -929,6 +929,8 @@ feat_cmn(feat_t *fcb, mfcc_t **mfc, int32 nfr, int32 beginutt, int32 endutt) cmn_prior(fcb->cmn_struct, mfc, fcb->varnorm, nfr); if (endutt) cmn_prior_update(fcb->cmn_struct); + case CMN_ADAPT: + cmn_adapt(fcb->cmn_struct, mfc, fcb->varnorm, nfr); break; default: ; diff --git a/win32/sphinxbase/sphinxbase.vcxproj b/win32/sphinxbase/sphinxbase.vcxproj index 7a97cdf8..68c40d1b 100755 --- a/win32/sphinxbase/sphinxbase.vcxproj +++ b/win32/sphinxbase/sphinxbase.vcxproj @@ -62,7 +62,7 @@ MaxSpeed AnySuitable ../../include/win32;../../include;%(AdditionalIncludeDirectories) - NDEBUG;_USRDLL;SPHINX_DLL;SPHINXBASE_EXPORTS;HAVE_CONFIG_H;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions) + NDEBUG;_USRDLL;SPHINXBASE_EXPORTS;HAVE_CONFIG_H;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions) MultiThreadedDLL true .\$(Configuration)\$(Platform)/sphinxbase.pch @@ -102,7 +102,7 @@ Disabled ../../include/win32;../../include;%(AdditionalIncludeDirectories) - _DEBUG;_USRDLL;SPHINX_DLL;SPHINXBASE_EXPORTS;HAVE_CONFIG_H;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions) + _DEBUG;_USRDLL;SPHINXBASE_EXPORTS;HAVE_CONFIG_H;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions) true EnableFastChecks MultiThreadedDebugDLL @@ -137,6 +137,7 @@ + @@ -174,6 +175,7 @@ + @@ -209,11 +211,13 @@ + + diff --git a/win32/sphinxbase/sphinxbase.vcxproj.filters b/win32/sphinxbase/sphinxbase.vcxproj.filters index a80d32e2..25989464 100755 --- a/win32/sphinxbase/sphinxbase.vcxproj.filters +++ b/win32/sphinxbase/sphinxbase.vcxproj.filters @@ -35,6 +35,9 @@ Source Files + + Source Files + Source Files @@ -86,6 +89,9 @@ Source Files + + Source Files + Source Files @@ -247,6 +253,9 @@ Header Files + + Header Files + Header Files @@ -271,6 +280,9 @@ Header Files + + Header Files + Header Files