OLD | NEW |
(Empty) | |
| 1 /* Copyright (C) 2002 Jean-Marc Valin |
| 2 File: vbr.c |
| 3 |
| 4 VBR-related routines |
| 5 |
| 6 Redistribution and use in source and binary forms, with or without |
| 7 modification, are permitted provided that the following conditions |
| 8 are met: |
| 9 |
| 10 - Redistributions of source code must retain the above copyright |
| 11 notice, this list of conditions and the following disclaimer. |
| 12 |
| 13 - Redistributions in binary form must reproduce the above copyright |
| 14 notice, this list of conditions and the following disclaimer in the |
| 15 documentation and/or other materials provided with the distribution. |
| 16 |
| 17 - Neither the name of the Xiph.org Foundation nor the names of its |
| 18 contributors may be used to endorse or promote products derived from |
| 19 this software without specific prior written permission. |
| 20 |
| 21 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 22 ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 23 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 24 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| 25 CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 26 EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 27 PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 28 PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 29 LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 30 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 32 |
| 33 */ |
| 34 |
| 35 #ifdef HAVE_CONFIG_H |
| 36 #include "config.h" |
| 37 #endif |
| 38 |
| 39 #include "vbr.h" |
| 40 #include <math.h> |
| 41 |
| 42 |
| 43 #define sqr(x) ((x)*(x)) |
| 44 |
| 45 #define MIN_ENERGY 6000 |
| 46 #define NOISE_POW .3 |
| 47 |
| 48 #ifndef DISABLE_VBR |
| 49 |
| 50 const float vbr_nb_thresh[9][11]={ |
| 51 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}
, /* CNG */ |
| 52 { 4.0f, 2.5f, 2.0f, 1.2f, 0.5f, 0.0f, -0.5f, -0.7f, -0.8f, -0.9f, -1.0f}
, /* 2 kbps */ |
| 53 {10.0f, 6.5f, 5.2f, 4.5f, 3.9f, 3.5f, 3.0f, 2.5f, 2.3f, 1.8f, 1.0f}
, /* 6 kbps */ |
| 54 {11.0f, 8.8f, 7.5f, 6.5f, 5.0f, 3.9f, 3.9f, 3.9f, 3.5f, 3.0f, 1.0f}
, /* 8 kbps */ |
| 55 {11.0f, 11.0f, 9.9f, 8.5f, 7.0f, 6.0f, 4.5f, 4.0f, 4.0f, 4.0f, 2.0f}
, /* 11 kbps */ |
| 56 {11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 8.0f, 7.0f, 6.0f, 5.0f, 3.0f}
, /* 15 kbps */ |
| 57 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.5f, 7.0f, 6.0f, 5.0f}
, /* 18 kbps */ |
| 58 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 9.5f, 7.5f}
, /* 24 kbps */ |
| 59 { 7.0f, 4.5f, 3.7f, 3.0f, 2.5f, 2.0f, 1.8f, 1.5f, 1.0f, 0.0f, 0.0f}
/* 4 kbps */ |
| 60 }; |
| 61 |
| 62 |
| 63 const float vbr_hb_thresh[5][11]={ |
| 64 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}
, /* silence */ |
| 65 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}
, /* 2 kbps */ |
| 66 {11.0f, 11.0f, 9.5f, 8.5f, 7.5f, 6.0f, 5.0f, 3.9f, 3.0f, 2.0f, 1.0f}
, /* 6 kbps */ |
| 67 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.5f, 8.7f, 7.8f, 7.0f, 6.5f, 4.0f}
, /* 10 kbps */ |
| 68 {11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 11.0f, 9.8f, 7.5f, 5.5f}
/* 18 kbps */ |
| 69 }; |
| 70 |
| 71 const float vbr_uhb_thresh[2][11]={ |
| 72 {-1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f, -1.0f}
, /* silence */ |
| 73 { 3.9f, 2.5f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, -1.0f}
/* 2 kbps */ |
| 74 }; |
| 75 |
| 76 void vbr_init(VBRState *vbr) |
| 77 { |
| 78 int i; |
| 79 |
| 80 vbr->average_energy=0; |
| 81 vbr->last_energy=1; |
| 82 vbr->accum_sum=0; |
| 83 vbr->energy_alpha=.1; |
| 84 vbr->soft_pitch=0; |
| 85 vbr->last_pitch_coef=0; |
| 86 vbr->last_quality=0; |
| 87 |
| 88 vbr->noise_accum = .05*pow(MIN_ENERGY, NOISE_POW); |
| 89 vbr->noise_accum_count=.05; |
| 90 vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; |
| 91 vbr->consec_noise=0; |
| 92 |
| 93 |
| 94 for (i=0;i<VBR_MEMORY_SIZE;i++) |
| 95 vbr->last_log_energy[i] = log(MIN_ENERGY); |
| 96 } |
| 97 |
| 98 |
| 99 /* |
| 100 This function should analyse the signal and decide how critical the |
| 101 coding error will be perceptually. The following factors should be |
| 102 taken into account: |
| 103 |
| 104 -Attacks (positive energy derivative) should be coded with more bits |
| 105 |
| 106 -Stationary voiced segments should receive more bits |
| 107 |
| 108 -Segments with (very) low absolute energy should receive less bits (maybe |
| 109 only shaped noise?) |
| 110 |
| 111 -DTX for near-zero energy? |
| 112 |
| 113 -Stationary fricative segments should have less bits |
| 114 |
| 115 -Temporal masking: when energy slope is decreasing, decrease the bit-rate |
| 116 |
| 117 -Decrease bit-rate for males (low pitch)? |
| 118 |
| 119 -(wideband only) less bits in the high-band when signal is very |
| 120 non-stationary (harder to notice high-frequency noise)??? |
| 121 |
| 122 */ |
| 123 |
| 124 float vbr_analysis(VBRState *vbr, spx_word16_t *sig, int len, int pitch, float p
itch_coef) |
| 125 { |
| 126 int i; |
| 127 float ener=0, ener1=0, ener2=0; |
| 128 float qual=7; |
| 129 int va; |
| 130 float log_energy; |
| 131 float non_st=0; |
| 132 float voicing; |
| 133 float pow_ener; |
| 134 |
| 135 for (i=0;i<len>>1;i++) |
| 136 ener1 += ((float)sig[i])*sig[i]; |
| 137 |
| 138 for (i=len>>1;i<len;i++) |
| 139 ener2 += ((float)sig[i])*sig[i]; |
| 140 ener=ener1+ener2; |
| 141 |
| 142 log_energy = log(ener+MIN_ENERGY); |
| 143 for (i=0;i<VBR_MEMORY_SIZE;i++) |
| 144 non_st += sqr(log_energy-vbr->last_log_energy[i]); |
| 145 non_st = non_st/(30*VBR_MEMORY_SIZE); |
| 146 if (non_st>1) |
| 147 non_st=1; |
| 148 |
| 149 voicing = 3*(pitch_coef-.4)*fabs(pitch_coef-.4); |
| 150 vbr->average_energy = (1-vbr->energy_alpha)*vbr->average_energy + vbr->energy
_alpha*ener; |
| 151 vbr->noise_level=vbr->noise_accum/vbr->noise_accum_count; |
| 152 pow_ener = pow(ener,NOISE_POW); |
| 153 if (vbr->noise_accum_count<.06 && ener>MIN_ENERGY) |
| 154 vbr->noise_accum = .05*pow_ener; |
| 155 |
| 156 if ((voicing<.3 && non_st < .2 && pow_ener < 1.2*vbr->noise_level) |
| 157 || (voicing<.3 && non_st < .05 && pow_ener < 1.5*vbr->noise_level) |
| 158 || (voicing<.4 && non_st < .05 && pow_ener < 1.2*vbr->noise_level) |
| 159 || (voicing<0 && non_st < .05)) |
| 160 { |
| 161 float tmp; |
| 162 va = 0; |
| 163 vbr->consec_noise++; |
| 164 if (pow_ener > 3*vbr->noise_level) |
| 165 tmp = 3*vbr->noise_level; |
| 166 else |
| 167 tmp = pow_ener; |
| 168 if (vbr->consec_noise>=4) |
| 169 { |
| 170 vbr->noise_accum = .95*vbr->noise_accum + .05*tmp; |
| 171 vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; |
| 172 } |
| 173 } else { |
| 174 va = 1; |
| 175 vbr->consec_noise=0; |
| 176 } |
| 177 |
| 178 if (pow_ener < vbr->noise_level && ener>MIN_ENERGY) |
| 179 { |
| 180 vbr->noise_accum = .95*vbr->noise_accum + .05*pow_ener; |
| 181 vbr->noise_accum_count = .95*vbr->noise_accum_count + .05; |
| 182 } |
| 183 |
| 184 /* Checking for very low absolute energy */ |
| 185 if (ener < 30000) |
| 186 { |
| 187 qual -= .7; |
| 188 if (ener < 10000) |
| 189 qual-=.7; |
| 190 if (ener < 3000) |
| 191 qual-=.7; |
| 192 } else { |
| 193 float short_diff, long_diff; |
| 194 short_diff = log((ener+1)/(1+vbr->last_energy)); |
| 195 long_diff = log((ener+1)/(1+vbr->average_energy)); |
| 196 /*fprintf (stderr, "%f %f\n", short_diff, long_diff);*/ |
| 197 |
| 198 if (long_diff<-5) |
| 199 long_diff=-5; |
| 200 if (long_diff>2) |
| 201 long_diff=2; |
| 202 |
| 203 if (long_diff>0) |
| 204 qual += .6*long_diff; |
| 205 if (long_diff<0) |
| 206 qual += .5*long_diff; |
| 207 if (short_diff>0) |
| 208 { |
| 209 if (short_diff>5) |
| 210 short_diff=5; |
| 211 qual += .5*short_diff; |
| 212 } |
| 213 /* Checking for energy increases */ |
| 214 if (ener2 > 1.6*ener1) |
| 215 qual += .5; |
| 216 } |
| 217 vbr->last_energy = ener; |
| 218 vbr->soft_pitch = .6*vbr->soft_pitch + .4*pitch_coef; |
| 219 qual += 2.2*((pitch_coef-.4) + (vbr->soft_pitch-.4)); |
| 220 |
| 221 if (qual < vbr->last_quality) |
| 222 qual = .5*qual + .5*vbr->last_quality; |
| 223 if (qual<4) |
| 224 qual=4; |
| 225 if (qual>10) |
| 226 qual=10; |
| 227 |
| 228 /* |
| 229 if (vbr->consec_noise>=2) |
| 230 qual-=1.3; |
| 231 if (vbr->consec_noise>=5) |
| 232 qual-=1.3; |
| 233 if (vbr->consec_noise>=12) |
| 234 qual-=1.3; |
| 235 */ |
| 236 if (vbr->consec_noise>=3) |
| 237 qual=4; |
| 238 |
| 239 if (vbr->consec_noise) |
| 240 qual -= 1.0 * (log(3.0 + vbr->consec_noise)-log(3)); |
| 241 if (qual<0) |
| 242 qual=0; |
| 243 |
| 244 if (ener<60000) |
| 245 { |
| 246 if (vbr->consec_noise>2) |
| 247 qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); |
| 248 if (ener<10000&&vbr->consec_noise>2) |
| 249 qual-=0.5*(log(3.0 + vbr->consec_noise)-log(3)); |
| 250 if (qual<0) |
| 251 qual=0; |
| 252 qual += .3*log(.0001+ener/60000.0); |
| 253 } |
| 254 if (qual<-1) |
| 255 qual=-1; |
| 256 |
| 257 /*printf ("%f %f %f %f %d\n", qual, voicing, non_st, pow_ener/(.01+vbr->noise
_level), va);*/ |
| 258 |
| 259 vbr->last_pitch_coef = pitch_coef; |
| 260 vbr->last_quality = qual; |
| 261 |
| 262 for (i=VBR_MEMORY_SIZE-1;i>0;i--) |
| 263 vbr->last_log_energy[i] = vbr->last_log_energy[i-1]; |
| 264 vbr->last_log_energy[0] = log_energy; |
| 265 |
| 266 /*printf ("VBR: %f %f %f %d %f\n", (float)(log_energy-log(vbr->average_energy
+MIN_ENERGY)), non_st, voicing, va, vbr->noise_level);*/ |
| 267 |
| 268 return qual; |
| 269 } |
| 270 |
| 271 void vbr_destroy(VBRState *vbr) |
| 272 { |
| 273 } |
| 274 |
| 275 #endif /* #ifndef DISABLE_VBR */ |
OLD | NEW |