patched-ffmpeg-mt/libavcodec/wmavoice.c - Issue 789004: ffmpeg roll of source to mar 9 version...

Side by Side Diff: patched-ffmpeg-mt/libavcodec/wmavoice.c

Issue 789004: ffmpeg roll of source to mar 9 version... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/ffmpeg/

Patch Set: '' Created 10 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

OLD	NEW
(Empty)
	1 /*

	2 * Windows Media Audio Voice decoder.

	3 * Copyright (c) 2009 Ronald S. Bultje

	4 *

	5 * This file is part of FFmpeg.

	6 *

	7 * FFmpeg is free software; you can redistribute it and/or

	8 * modify it under the terms of the GNU Lesser General Public

	9 * License as published by the Free Software Foundation; either

	10 * version 2.1 of the License, or (at your option) any later version.

	11 *

	12 * FFmpeg is distributed in the hope that it will be useful,

	13 * but WITHOUT ANY WARRANTY; without even the implied warranty of

	14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU

	15 * Lesser General Public License for more details.

	16 *

	17 * You should have received a copy of the GNU Lesser General Public

	18 * License along with FFmpeg; if not, write to the Free Software

	19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA

	20 */

	21

	22 /**

	23 * @file libavcodec/wmavoice.c

	24 * @brief Windows Media Audio Voice compatible decoder

	25 * @author Ronald S. Bultje <rsbultje@gmail.com>

	26 */

	27

	28 #include <math.h>

	29 #include "avcodec.h"

	30 #include "get_bits.h"

	31 #include "put_bits.h"

	32 #include "wmavoice_data.h"

	33 #include "celp_math.h"

	34 #include "celp_filters.h"

	35 #include "acelp_vectors.h"

	36 #include "acelp_filters.h"

	37 #include "lsp.h"

	38 #include "libavutil/lzo.h"

	39

	40 #define MAX_BLOCKS 8 ///< maximum number of blocks per frame

	41 #define MAX_LSPS 16 ///< maximum filter order

	42 #define MAX_FRAMES 3 ///< maximum number of frames per superframe

	43 #define MAX_FRAMESIZE 160 ///< maximum number of samples per frame

	44 #define MAX_SIGNAL_HISTORY 416 ///< maximum excitation signal history

	45 #define MAX_SFRAMESIZE (MAX_FRAMESIZE * MAX_FRAMES)

	46 ///< maximum number of samples per superframe

	47 #define SFRAME_CACHE_MAXSIZE 256 ///< maximum cache size for frame data that

	48 ///< was split over two packets

	49 #define VLC_NBITS 6 ///< number of bits to read per VLC iteration

	50

	51 /**

	52 * Frame type VLC coding.

	53 */

	54 static VLC frame_type_vlc;

	55

	56 /**

	57 * Adaptive codebook types.

	58 */

	59 enum {

	60 ACB_TYPE_NONE = 0, ///< no adaptive codebook (only hardcoded fixed)

	61 ACB_TYPE_ASYMMETRIC = 1, ///< adaptive codebook with per-frame pitch, which

	62 ///< we interpolate to get a per-sample pitch.

	63 ///< Signal is generated using an asymmetric sinc

	64 ///< window function

	65 ///< @note see #wmavoice_ipol1_coeffs

	66 ACB_TYPE_HAMMING = 2 ///< Per-block pitch with signal generation using

	67 ///< a Hamming sinc window function

	68 ///< @note see #wmavoice_ipol2_coeffs

	69 };

	70

	71 /**

	72 * Fixed codebook types.

	73 */

	74 enum {

	75 FCB_TYPE_SILENCE = 0, ///< comfort noise during silence

	76 ///< generated from a hardcoded (fixed) codebook

	77 ///< with per-frame (low) gain values

	78 FCB_TYPE_HARDCODED = 1, ///< hardcoded (fixed) codebook with per-block

	79 ///< gain values

	80 FCB_TYPE_AW_PULSES = 2, ///< Pitch-adaptive window (AW) pulse signals,

	81 ///< used in particular for low-bitrate streams

	82 FCB_TYPE_EXC_PULSES = 3, ///< Innovation (fixed) codebook pulse sets in

	83 ///< combinations of either single pulses or

	84 ///< pulse pairs

	85 };

	86

	87 /**

	88 * Description of frame types.

	89 */

	90 static const struct frame_type_desc {

	91 uint8_t n_blocks; ///< amount of blocks per frame (each block

	92 ///< (contains 160/#n_blocks samples)

	93 uint8_t log_n_blocks; ///< log2(#n_blocks)

	94 uint8_t acb_type; ///< Adaptive codebook type (ACB_TYPE_*)

	95 uint8_t fcb_type; ///< Fixed codebook type (FCB_TYPE_*)

	96 uint8_t dbl_pulses; ///< how many pulse vectors have pulse pairs

	97 ///< (rather than just one single pulse)

	98 ///< only if #fcb_type == #FCB_TYPE_EXC_PULSES

	99 uint16_t frame_size; ///< the amount of bits that make up the block

	100 ///< data (per frame)

	101 } frame_descs[17] = {

	102 { 1, 0, ACB_TYPE_NONE, FCB_TYPE_SILENCE, 0, 0 },

	103 { 2, 1, ACB_TYPE_NONE, FCB_TYPE_HARDCODED, 0, 28 },

	104 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_AW_PULSES, 0, 46 },

	105 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 80 },

	106 { 2, 1, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 104 },

	107 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 0, 108 },

	108 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 2, 132 },

	109 { 4, 2, ACB_TYPE_ASYMMETRIC, FCB_TYPE_EXC_PULSES, 5, 168 },

	110 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 64 },

	111 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 80 },

	112 { 2, 1, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 104 },

	113 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 108 },

	114 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 132 },

	115 { 4, 2, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 168 },

	116 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 0, 176 },

	117 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 2, 208 },

	118 { 8, 3, ACB_TYPE_HAMMING, FCB_TYPE_EXC_PULSES, 5, 256 }

	119 };

	120

	121 /**

	122 * WMA Voice decoding context.

	123 */

	124 typedef struct {

	125 /**

	126 * @defgroup struct_global Global values

	127 * Global values, specified in the stream header / extradata or used

	128 * all over.

	129 * @{

	130 */

	131 GetBitContext gb; ///< packet bitreader. During decoder init,

	132 ///< it contains the extradata from the

	133 ///< demuxer. During decoding, it contains

	134 ///< packet data.

	135 int8_t vbm_tree[25]; ///< converts VLC codes to frame type

	136

	137 int spillover_bitsize; ///< number of bits used to specify

	138 ///< #spillover_nbits in the packet header

	139 ///< = ceil(log2(ctx->block_align << 3))

	140 int history_nsamples; ///< number of samples in history for signal

	141 ///< prediction (through ACB)

	142

	143 int do_apf; ///< whether to apply the averaged

	144 ///< projection filter (APF)

	145

	146 int lsps; ///< number of LSPs per frame [10 or 16]

	147 int lsp_q_mode; ///< defines quantizer defaults [0, 1]

	148 int lsp_def_mode; ///< defines different sets of LSP defaults

	149 ///< [0, 1]

	150 int frame_lsp_bitsize; ///< size (in bits) of LSPs, when encoded

	151 ///< per-frame (independent coding)

	152 int sframe_lsp_bitsize; ///< size (in bits) of LSPs, when encoded

	153 ///< per superframe (residual coding)

	154

	155 int min_pitch_val; ///< base value for pitch parsing code

	156 int max_pitch_val; ///< max value + 1 for pitch parsing

	157 int pitch_nbits; ///< number of bits used to specify the

	158 ///< pitch value in the frame header

	159 int block_pitch_nbits; ///< number of bits used to specify the

	160 ///< first block's pitch value

	161 int block_pitch_range; ///< range of the block pitch

	162 int block_delta_pitch_nbits; ///< number of bits used to specify the

	163 ///< delta pitch between this and the last

	164 ///< block's pitch value, used in all but

	165 ///< first block

	166 int block_delta_pitch_hrange; ///< 1/2 range of the delta (full range is

	167 ///< from -this to +this-1)

	168 uint16_t block_conv_table[4]; ///< boundaries for block pitch unit/scale

	169 ///< conversion

	170

	171 /**

	172 * @}

	173 * @defgroup struct_packet Packet values

	174 * Packet values, specified in the packet header or related to a packet.

	175 * A packet is considered to be a single unit of data provided to this

	176 * decoder by the demuxer.

	177 * @{

	178 */

	179 int spillover_nbits; ///< number of bits of the previous packet's

	180 ///< last superframe preceeding this

	181 ///< packet's first full superframe (useful

	182 ///< for re-synchronization also)

	183 int has_residual_lsps; ///< if set, superframes contain one set of

	184 ///< LSPs that cover all frames, encoded as

	185 ///< independent and residual LSPs; if not

	186 ///< set, each frame contains its own, fully

	187 ///< independent, LSPs

	188 int skip_bits_next; ///< number of bits to skip at the next call

	189 ///< to #wmavoice_decode_packet() (since

	190 ///< they're part of the previous superframe)

	191

	192 uint8_t sframe_cache[SFRAME_CACHE_MAXSIZE + FF_INPUT_BUFFER_PADDING_SIZE];

	193 ///< cache for superframe data split over

	194 ///< multiple packets

	195 int sframe_cache_size; ///< set to >0 if we have data from an

	196 ///< (incomplete) superframe from a previous

	197 ///< packet that spilled over in the current

	198 ///< packet; specifies the amount of bits in

	199 ///< #sframe_cache

	200 PutBitContext pb; ///< bitstream writer for #sframe_cache

	201

	202 /**

	203 * @}

	204 * @defgroup struct_frame Frame and superframe values

	205 * Superframe and frame data - these can change from frame to frame,

	206 * although some of them do in that case serve as a cache / history for

	207 * the next frame or superframe.

	208 * @{

	209 */

	210 double prev_lsps[MAX_LSPS]; ///< LSPs of the last frame of the previous

	211 ///< superframe

	212 int last_pitch_val; ///< pitch value of the previous frame

	213 int last_acb_type; ///< frame type [0-2] of the previous frame

	214 int pitch_diff_sh16; ///< ((cur_pitch_val - #last_pitch_val)

	215 ///< << 16) / #MAX_FRAMESIZE

	216 float silence_gain; ///< set for use in blocks if #ACB_TYPE_NONE

	217

	218 int aw_idx_is_ext; ///< whether the AW index was encoded in

	219 ///< 8 bits (instead of 6)

	220 int aw_pulse_range; ///< the range over which #aw_pulse_set1()

	221 ///< can apply the pulse, relative to the

	222 ///< value in aw_first_pulse_off. The exact

	223 ///< position of the first AW-pulse is within

	224 ///< [pulse_off, pulse_off + this], and

	225 ///< depends on bitstream values; [16 or 24]

	226 int aw_n_pulses[2]; ///< number of AW-pulses in each block; note

	227 ///< that this number can be negative (in

	228 ///< which case it basically means "zero")

	229 int aw_first_pulse_off[2]; ///< index of first sample to which to

	230 ///< apply AW-pulses, or -0xff if unset

	231 int aw_next_pulse_off_cache; ///< the position (relative to start of the

	232 ///< second block) at which pulses should

	233 ///< start to be positioned, serves as a

	234 ///< cache for pitch-adaptive window pulses

	235 ///< between blocks

	236

	237 int frame_cntr; ///< current frame index [0 - 0xFFFE]; is

	238 ///< only used for comfort noise in #pRNG()

	239 float gain_pred_err[6]; ///< cache for gain prediction

	240 float excitation_history[MAX_SIGNAL_HISTORY];

	241 ///< cache of the signal of previous

	242 ///< superframes, used as a history for

	243 ///< signal generation

	244 float synth_history[MAX_LSPS]; ///< see #excitation_history

	245 /**

	246 * @}

	247 */

	248 } WMAVoiceContext;

	249

	250 /**

	251 * Sets up the variable bit mode (VBM) tree from container extradata.

	252 * @param gb bit I/O context.

	253 * The bit context (s->gb) should be loaded with byte 23-46 of the

	254 * container extradata (i.e. the ones containing the VBM tree).

	255 * @param vbm_tree pointer to array to which the decoded VBM tree will be

	256 * written.

	257 * @return 0 on success, <0 on error.

	258 */

	259 static av_cold int decode_vbmtree(GetBitContext *gb, int8_t vbm_tree[25])

	260 {

	261 static const uint8_t bits[] = {

	262 2, 2, 2, 4, 4, 4,

	263 6, 6, 6, 8, 8, 8,

	264 10, 10, 10, 12, 12, 12,

	265 14, 14, 14, 14

	266 };

	267 static const uint16_t codes[] = {

	268 0x0000, 0x0001, 0x0002, // 00/01/10

	269 0x000c, 0x000d, 0x000e, // 11+00/01/10

	270 0x003c, 0x003d, 0x003e, // 1111+00/01/10

	271 0x00fc, 0x00fd, 0x00fe, // 111111+00/01/10

	272 0x03fc, 0x03fd, 0x03fe, // 11111111+00/01/10

	273 0x0ffc, 0x0ffd, 0x0ffe, // 1111111111+00/01/10

	274 0x3ffc, 0x3ffd, 0x3ffe, 0x3fff // 111111111111+xx

	275 };

	276 int cntr[8], n, res;

	277

	278 memset(vbm_tree, 0xff, sizeof(vbm_tree));

	279 memset(cntr, 0, sizeof(cntr));

	280 for (n = 0; n < 17; n++) {

	281 res = get_bits(gb, 3);

	282 if (cntr[res] > 3) // should be >= 3 + (res == 7))

	283 return -1;

	284 vbm_tree[res * 3 + cntr[res]++] = n;

	285 }

	286 INIT_VLC_STATIC(&frame_type_vlc, VLC_NBITS, sizeof(bits),

	287 bits, 1, 1, codes, 2, 2, 132);

	288 return 0;

	289 }

	290

	291 /**

	292 * Set up decoder with parameters from demuxer (extradata etc.).

	293 */

	294 static av_cold int wmavoice_decode_init(AVCodecContext *ctx)

	295 {

	296 int n, flags, pitch_range, lsp16_flag;

	297 WMAVoiceContext *s = ctx->priv_data;

	298

	299 /**

	300 * Extradata layout:

	301 * - byte 0-18: WMAPro-in-WMAVoice extradata (see wmaprodec.c),

	302 * - byte 19-22: flags field (annoyingly in LE; see below for known

	303 * values),

	304 * - byte 23-46: variable bitmode tree (really just 17 * 3 bits,

	305 * rest is 0).

	306 */

	307 if (ctx->extradata_size != 46) {

	308 av_log(ctx, AV_LOG_ERROR,

	309 "Invalid extradata size %d (should be 46)\n",

	310 ctx->extradata_size);

	311 return -1;

	312 }

	313 flags = AV_RL32(ctx->extradata + 18);

	314 s->spillover_bitsize = 3 + av_ceil_log2(ctx->block_align);

	315 s->do_apf = flags & 0x1;

	316 s->lsp_q_mode = !!(flags & 0x2000);

	317 s->lsp_def_mode = !!(flags & 0x4000);

	318 lsp16_flag = flags & 0x1000;

	319 if (lsp16_flag) {

	320 s->lsps = 16;

	321 s->frame_lsp_bitsize = 34;

	322 s->sframe_lsp_bitsize = 60;

	323 } else {

	324 s->lsps = 10;

	325 s->frame_lsp_bitsize = 24;

	326 s->sframe_lsp_bitsize = 48;

	327 }

	328 for (n = 0; n < s->lsps; n++)

	329 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);

	330

	331 init_get_bits(&s->gb, ctx->extradata + 22, (ctx->extradata_size - 22) << 3);

	332 if (decode_vbmtree(&s->gb, s->vbm_tree) < 0) {

	333 av_log(ctx, AV_LOG_ERROR, "Invalid VBM tree; broken extradata?\n");

	334 return -1;

	335 }

	336

	337 s->min_pitch_val = ((ctx->sample_rate << 8) / 400 + 50) >> 8;

	338 s->max_pitch_val = ((ctx->sample_rate << 8) * 37 / 2000 + 50) >> 8;

	339 pitch_range = s->max_pitch_val - s->min_pitch_val;

	340 s->pitch_nbits = av_ceil_log2(pitch_range);

	341 s->last_pitch_val = 40;

	342 s->last_acb_type = ACB_TYPE_NONE;

	343 s->history_nsamples = s->max_pitch_val + 8;

	344

	345 if (s->min_pitch_val < 1 \|\| s->history_nsamples > MAX_SIGNAL_HISTORY) {

	346 int min_sr = ((((1 << 8) - 50) * 400) + 0xFF) >> 8,

	347 max_sr = ((((MAX_SIGNAL_HISTORY - 8) << 8) + 205) * 2000 / 37) >> 8;

	348

	349 av_log(ctx, AV_LOG_ERROR,

	350 "Unsupported samplerate %d (min=%d, max=%d)\n",

	351 ctx->sample_rate, min_sr, max_sr); // 322-22097 Hz

	352

	353 return -1;

	354 }

	355

	356 s->block_conv_table[0] = s->min_pitch_val;

	357 s->block_conv_table[1] = (pitch_range * 25) >> 6;

	358 s->block_conv_table[2] = (pitch_range * 44) >> 6;

	359 s->block_conv_table[3] = s->max_pitch_val - 1;

	360 s->block_delta_pitch_hrange = (pitch_range >> 3) & ~0xF;

	361 s->block_delta_pitch_nbits = 1 + av_ceil_log2(s->block_delta_pitch_hrange);

	362 s->block_pitch_range = s->block_conv_table[2] +

	363 s->block_conv_table[3] + 1 +

	364 2 * (s->block_conv_table[1] - 2 * s->min_pitch _val);

	365 s->block_pitch_nbits = av_ceil_log2(s->block_pitch_range);

	366

	367 ctx->sample_fmt = SAMPLE_FMT_FLT;

	368

	369 return 0;

	370 }

	371

	372 /**

	373 * Dequantize LSPs

	374 * @param lsps output pointer to the array that will hold the LSPs

	375 * @param num number of LSPs to be dequantized

	376 * @param values quantized values, contains n_stages values

	377 * @param sizes range (i.e. max value) of each quantized value

	378 * @param n_stages number of dequantization runs

	379 * @param table dequantization table to be used

	380 * @param mul_q LSF multiplier

	381 * @param base_q base (lowest) LSF values

	382 */

	383 static void dequant_lsps(double *lsps, int num,

	384 const uint16_t *values,

	385 const uint16_t *sizes,

	386 int n_stages, const uint8_t *table,

	387 const double *mul_q,

	388 const double *base_q)

	389 {

	390 int n, m;

	391

	392 memset(lsps, 0, num * sizeof(*lsps));

	393 for (n = 0; n < n_stages; n++) {

	394 const uint8_t t_off = &table[values[n] num];

	395 double base = base_q[n], mul = mul_q[n];

	396

	397 for (m = 0; m < num; m++)

	398 lsps[m] += base + mul * t_off[m];

	399

	400 table += sizes[n] * num;

	401 }

	402 }

	403

	404 /**

	405 * @defgroup lsp_dequant LSP dequantization routines

	406 * LSP dequantization routines, for 10/16LSPs and independent/residual coding.

	407 * @note we assume enough bits are available, caller should check.

	408 * lsp10i() consumes 24 bits; lsp10r() consumes an additional 24 bits;

	409 * lsp16i() consumes 34 bits; lsp16r() consumes an additional 26 bits.

	410 * @{

	411 */

	412 /**

	413 * Parse 10 independently-coded LSPs.

	414 */

	415 static void dequant_lsp10i(GetBitContext gb, double lsps)

	416 {

	417 static const uint16_t vec_sizes[4] = { 256, 64, 32, 32 };

	418 static const double mul_lsf[4] = {

	419 5.2187144800e-3, 1.4626986422e-3,

	420 9.6179549166e-4, 1.1325736225e-3

	421 };

	422 static const double base_lsf[4] = {

	423 M_PI * -2.15522e-1, M_PI * -6.1646e-2,

	424 M_PI * -3.3486e-2, M_PI * -5.7408e-2

	425 };

	426 uint16_t v[4];

	427

	428 v[0] = get_bits(gb, 8);

	429 v[1] = get_bits(gb, 6);

	430 v[2] = get_bits(gb, 5);

	431 v[3] = get_bits(gb, 5);

	432

	433 dequant_lsps(lsps, 10, v, vec_sizes, 4, wmavoice_dq_lsp10i,

	434 mul_lsf, base_lsf);

	435 }

	436

	437 /**

	438 * Parse 10 independently-coded LSPs, and then derive the tables to

	439 * generate LSPs for the other frames from them (residual coding).

	440 */

	441 static void dequant_lsp10r(GetBitContext *gb,

	442 double i_lsps, const double old,

	443 double a1, double a2, int q_mode)

	444 {

	445 static const uint16_t vec_sizes[3] = { 128, 64, 64 };

	446 static const double mul_lsf[3] = {

	447 2.5807601174e-3, 1.2354460219e-3, 1.1763821673e-3

	448 };

	449 static const double base_lsf[3] = {

	450 M_PI * -1.07448e-1, M_PI * -5.2706e-2, M_PI * -5.1634e-2

	451 };

	452 const float (*ipol_tab)[2][10] = q_mode ?

	453 wmavoice_lsp10_intercoeff_b : wmavoice_lsp10_intercoeff_a;

	454 uint16_t interpol, v[3];

	455 int n;

	456

	457 dequant_lsp10i(gb, i_lsps);

	458

	459 interpol = get_bits(gb, 5);

	460 v[0] = get_bits(gb, 7);

	461 v[1] = get_bits(gb, 6);

	462 v[2] = get_bits(gb, 6);

	463

	464 for (n = 0; n < 10; n++) {

	465 double delta = old[n] - i_lsps[n];

	466 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];

	467 a1[10 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];

	468 }

	469

	470 dequant_lsps(a2, 20, v, vec_sizes, 3, wmavoice_dq_lsp10r,

	471 mul_lsf, base_lsf);

	472 }

	473

	474 /**

	475 * Parse 16 independently-coded LSPs.

	476 */

	477 static void dequant_lsp16i(GetBitContext gb, double lsps)

	478 {

	479 static const uint16_t vec_sizes[5] = { 256, 64, 128, 64, 128 };

	480 static const double mul_lsf[5] = {

	481 3.3439586280e-3, 6.9908173703e-4,

	482 3.3216608306e-3, 1.0334960326e-3,

	483 3.1899104283e-3

	484 };

	485 static const double base_lsf[5] = {

	486 M_PI * -1.27576e-1, M_PI * -2.4292e-2,

	487 M_PI * -1.28094e-1, M_PI * -3.2128e-2,

	488 M_PI * -1.29816e-1

	489 };

	490 uint16_t v[5];

	491

	492 v[0] = get_bits(gb, 8);

	493 v[1] = get_bits(gb, 6);

	494 v[2] = get_bits(gb, 7);

	495 v[3] = get_bits(gb, 6);

	496 v[4] = get_bits(gb, 7);

	497

	498 dequant_lsps( lsps, 5, v, vec_sizes, 2,

	499 wmavoice_dq_lsp16i1, mul_lsf, base_lsf);

	500 dequant_lsps(&lsps[5], 5, &v[2], &vec_sizes[2], 2,

	501 wmavoice_dq_lsp16i2, &mul_lsf[2], &base_lsf[2]);

	502 dequant_lsps(&lsps[10], 6, &v[4], &vec_sizes[4], 1,

	503 wmavoice_dq_lsp16i3, &mul_lsf[4], &base_lsf[4]);

	504 }

	505

	506 /**

	507 * Parse 16 independently-coded LSPs, and then derive the tables to

	508 * generate LSPs for the other frames from them (residual coding).

	509 */

	510 static void dequant_lsp16r(GetBitContext *gb,

	511 double i_lsps, const double old,

	512 double a1, double a2, int q_mode)

	513 {

	514 static const uint16_t vec_sizes[3] = { 128, 128, 128 };

	515 static const double mul_lsf[3] = {

	516 1.2232979501e-3, 1.4062241527e-3, 1.6114744851e-3

	517 };

	518 static const double base_lsf[3] = {

	519 M_PI * -5.5830e-2, M_PI * -5.2908e-2, M_PI * -5.4776e-2

	520 };

	521 const float (*ipol_tab)[2][16] = q_mode ?

	522 wmavoice_lsp16_intercoeff_b : wmavoice_lsp16_intercoeff_a;

	523 uint16_t interpol, v[3];

	524 int n;

	525

	526 dequant_lsp16i(gb, i_lsps);

	527

	528 interpol = get_bits(gb, 5);

	529 v[0] = get_bits(gb, 7);

	530 v[1] = get_bits(gb, 7);

	531 v[2] = get_bits(gb, 7);

	532

	533 for (n = 0; n < 16; n++) {

	534 double delta = old[n] - i_lsps[n];

	535 a1[n] = ipol_tab[interpol][0][n] * delta + i_lsps[n];

	536 a1[16 + n] = ipol_tab[interpol][1][n] * delta + i_lsps[n];

	537 }

	538

	539 dequant_lsps( a2, 10, v, vec_sizes, 1,

	540 wmavoice_dq_lsp16r1, mul_lsf, base_lsf);

	541 dequant_lsps(&a2[10], 10, &v[1], &vec_sizes[1], 1,

	542 wmavoice_dq_lsp16r2, &mul_lsf[1], &base_lsf[1]);

	543 dequant_lsps(&a2[20], 12, &v[2], &vec_sizes[2], 1,

	544 wmavoice_dq_lsp16r3, &mul_lsf[2], &base_lsf[2]);

	545 }

	546

	547 /**

	548 * @}

	549 * @defgroup aw Pitch-adaptive window coding functions

	550 * The next few functions are for pitch-adaptive window coding.

	551 * @{

	552 */

	553 /**

	554 * Parse the offset of the first pitch-adaptive window pulses, and

	555 * the distribution of pulses between the two blocks in this frame.

	556 * @param s WMA Voice decoding context private data

	557 * @param gb bit I/O context

	558 * @param pitch pitch for each block in this frame

	559 */

	560 static void aw_parse_coords(WMAVoiceContext s, GetBitContext gb,

	561 const int *pitch)

	562 {

	563 static const int16_t start_offset[94] = {

	564 -11, -9, -7, -5, -3, -1, 1, 3, 5, 7, 9, 11,

	565 13, 15, 18, 17, 19, 20, 21, 22, 23, 24, 25, 26,

	566 27, 28, 29, 30, 31, 32, 33, 35, 37, 39, 41, 43,

	567 45, 47, 49, 51, 53, 55, 57, 59, 61, 63, 65, 67,

	568 69, 71, 73, 75, 77, 79, 81, 83, 85, 87, 89, 91,

	569 93, 95, 97, 99, 101, 103, 105, 107, 109, 111, 113, 115,

	570 117, 119, 121, 123, 125, 127, 129, 131, 133, 135, 137, 139,

	571 141, 143, 145, 147, 149, 151, 153, 155, 157, 159

	572 };

	573 int bits, offset;

	574

	575 /* position of pulse */

	576 s->aw_idx_is_ext = 0;

	577 if ((bits = get_bits(gb, 6)) >= 54) {

	578 s->aw_idx_is_ext = 1;

	579 bits += (bits - 54) * 3 + get_bits(gb, 2);

	580 }

	581

	582 /* for a repeated pulse at pulse_off with a pitch_lag of pitch[], count

	583 * the distribution of the pulses in each block contained in this frame. */

	584 s->aw_pulse_range = FFMIN(pitch[0], pitch[1]) > 32 ? 24 : 16;

	585 for (offset = start_offset[bits]; offset < 0; offset += pitch[0]) ;

	586 s->aw_n_pulses[0] = (pitch[0] - 1 + MAX_FRAMESIZE / 2 - offset) / pit ch[0];

	587 s->aw_first_pulse_off[0] = offset - s->aw_pulse_range / 2;

	588 offset += s->aw_n_pulses[0] * pitch[0];

	589 s->aw_n_pulses[1] = (pitch[1] - 1 + MAX_FRAMESIZE - offset) / pitch[1 ];

	590 s->aw_first_pulse_off[1] = offset - (MAX_FRAMESIZE + s->aw_pulse_range) / 2;

	591

	592 /* if continuing from a position before the block, reset position to

	593 * start of block (when corrected for the range over which it can be

	594 * spread in aw_pulse_set1()). */

	595 if (start_offset[bits] < MAX_FRAMESIZE / 2) {

	596 while (s->aw_first_pulse_off[1] - pitch[1] + s->aw_pulse_range > 0)

	597 s->aw_first_pulse_off[1] -= pitch[1];

	598 if (start_offset[bits] < 0)

	599 while (s->aw_first_pulse_off[0] - pitch[0] + s->aw_pulse_range > 0)

	600 s->aw_first_pulse_off[0] -= pitch[0];

	601 }

	602 }

	603

	604 /**

	605 * Apply second set of pitch-adaptive window pulses.

	606 * @param s WMA Voice decoding context private data

	607 * @param gb bit I/O context

	608 * @param block_idx block index in frame [0, 1]

	609 * @param fcb structure containing fixed codebook vector info

	610 */

	611 static void aw_pulse_set2(WMAVoiceContext s, GetBitContext gb,

	612 int block_idx, AMRFixed *fcb)

	613 {

	614 uint16_t use_mask[7]; // only 5 are used, rest is padding

	615 /* in this function, idx is the index in the 80-bit (+ padding) use_mask

	616 * bit-array. Since use_mask consists of 16-bit values, the lower 4 bits

	617 * of idx are the position of the bit within a particular item in the

	618 * array (0 being the most significant bit, and 15 being the least

	619 * significant bit), and the remainder (>> 4) is the index in the

	620 * use_mask[]-array. This is faster and uses less memory than using a

	621 * 80-byte/80-int array. */

	622 int pulse_off = s->aw_first_pulse_off[block_idx],

	623 pulse_start, n, idx, range, aidx, start_off = 0;

	624

	625 /* set offset of first pulse to within this block */

	626 if (s->aw_n_pulses[block_idx] > 0)

	627 while (pulse_off + s->aw_pulse_range < 1)

	628 pulse_off += fcb->pitch_lag;

	629

	630 /* find range per pulse */

	631 if (s->aw_n_pulses[0] > 0) {

	632 if (block_idx == 0) {

	633 range = 32;

	634 } else /* block_idx = 1 */ {

	635 range = 8;

	636 if (s->aw_n_pulses[block_idx] > 0)

	637 pulse_off = s->aw_next_pulse_off_cache;

	638 }

	639 } else

	640 range = 16;

	641 pulse_start = s->aw_n_pulses[block_idx] > 0 ? pulse_off - range / 2 : 0;

	642

	643 /* aw_pulse_set1() already applies pulses around pulse_off (to be exactly,

	644 * in the range of [pulse_off, pulse_off + s->aw_pulse_range], and thus

	645 * we exclude that range from being pulsed again in this function. */

	646 memset( use_mask, -1, 5 * sizeof(use_mask[0]));

	647 memset(&use_mask[5], 0, 2 * sizeof(use_mask[0]));

	648 if (s->aw_n_pulses[block_idx] > 0)

	649 for (idx = pulse_off; idx < MAX_FRAMESIZE / 2; idx += fcb->pitch_lag) {

	650 int excl_range = s->aw_pulse_range; // always 16 or 24

	651 uint16_t *use_mask_ptr = &use_mask[idx >> 4];

	652 int first_sh = 16 - (idx & 15);

	653 *use_mask_ptr++ &= 0xFFFF << first_sh;

	654 excl_range -= first_sh;

	655 if (excl_range >= 16) {

	656 *use_mask_ptr++ = 0;

	657 *use_mask_ptr &= 0xFFFF >> (excl_range - 16);

	658 } else

	659 *use_mask_ptr &= 0xFFFF >> excl_range;

	660 }

	661

	662 /* find the 'aidx'th offset that is not excluded */

	663 aidx = get_bits(gb, s->aw_n_pulses[0] > 0 ? 5 - 2 * block_idx : 4);

	664 for (n = 0; n <= aidx; pulse_start++) {

	665 for (idx = pulse_start; idx < 0; idx += fcb->pitch_lag) ;

	666 if (idx >= MAX_FRAMESIZE / 2) { // find from zero

	667 if (use_mask[0]) idx = 0x0F;

	668 else if (use_mask[1]) idx = 0x1F;

	669 else if (use_mask[2]) idx = 0x2F;

	670 else if (use_mask[3]) idx = 0x3F;

	671 else if (use_mask[4]) idx = 0x4F;

	672 else return;

	673 idx -= av_log2_16bit(use_mask[idx >> 4]);

	674 }

	675 if (use_mask[idx >> 4] & (0x8000 >> (idx & 15))) {

	676 use_mask[idx >> 4] &= ~(0x8000 >> (idx & 15));

	677 n++;

	678 start_off = idx;

	679 }

	680 }

	681

	682 fcb->x[fcb->n] = start_off;

	683 fcb->y[fcb->n] = get_bits1(gb) ? -1.0 : 1.0;

	684 fcb->n++;

	685

	686 /* set offset for next block, relative to start of that block */

	687 n = (MAX_FRAMESIZE / 2 - start_off) % fcb->pitch_lag;

	688 s->aw_next_pulse_off_cache = n ? fcb->pitch_lag - n : 0;

	689 }

	690

	691 /**

	692 * Apply first set of pitch-adaptive window pulses.

	693 * @param s WMA Voice decoding context private data

	694 * @param gb bit I/O context

	695 * @param block_idx block index in frame [0, 1]

	696 * @param fcb storage location for fixed codebook pulse info

	697 */

	698 static void aw_pulse_set1(WMAVoiceContext s, GetBitContext gb,

	699 int block_idx, AMRFixed *fcb)

	700 {

	701 int val = get_bits(gb, 12 - 2 * (s->aw_idx_is_ext && !block_idx));

	702 float v;

	703

	704 if (s->aw_n_pulses[block_idx] > 0) {

	705 int n, v_mask, i_mask, sh, n_pulses;

	706

	707 if (s->aw_pulse_range == 24) { // 3 pulses, 1:sign + 3:index each

	708 n_pulses = 3;

	709 v_mask = 8;

	710 i_mask = 7;

	711 sh = 4;

	712 } else { // 4 pulses, 1:sign + 2:index each

	713 n_pulses = 4;

	714 v_mask = 4;

	715 i_mask = 3;

	716 sh = 3;

	717 }

	718

	719 for (n = n_pulses - 1; n >= 0; n--, val >>= sh) {

	720 fcb->y[fcb->n] = (val & v_mask) ? -1.0 : 1.0;

	721 fcb->x[fcb->n] = (val & i_mask) * n_pulses + n +

	722 s->aw_first_pulse_off[block_idx];

	723 while (fcb->x[fcb->n] < 0)

	724 fcb->x[fcb->n] += fcb->pitch_lag;

	725 if (fcb->x[fcb->n] < MAX_FRAMESIZE / 2)

	726 fcb->n++;

	727 }

	728 } else {

	729 int num2 = (val & 0x1FF) >> 1, delta, idx;

	730

	731 if (num2 < 1 * 79) { delta = 1; idx = num2 + 1; }

	732 else if (num2 < 2 * 78) { delta = 3; idx = num2 + 1 - 1 * 77; }

	733 else if (num2 < 3 * 77) { delta = 5; idx = num2 + 1 - 2 * 76; }

	734 else { delta = 7; idx = num2 + 1 - 3 * 75; }

	735 v = (val & 0x200) ? -1.0 : 1.0;

	736

	737 fcb->no_repeat_mask \|= 3 << fcb->n;

	738 fcb->x[fcb->n] = idx - delta;

	739 fcb->y[fcb->n] = v;

	740 fcb->x[fcb->n + 1] = idx;

	741 fcb->y[fcb->n + 1] = (val & 1) ? -v : v;

	742 fcb->n += 2;

	743 }

	744 }

	745

	746 /**

	747 * @}

	748 *

	749 * Generate a random number from frame_cntr and block_idx, which will lief

	750 * in the range [0, 1000 - block_size] (so it can be used as an index in a

	751 * table of size 1000 of which you want to read block_size entries).

	752 *

	753 * @param frame_cntr current frame number

	754 * @param block_num current block index

	755 * @param block_size amount of entries we want to read from a table

	756 * that has 1000 entries

	757 * @returns a (non-)random number in the [0, 1000 - block_size] range.

	758 */

	759 static int pRNG(int frame_cntr, int block_num, int block_size)

	760 {

	761 /* array to simplify the calculation of z:

	762 * y = (x % 9) * 5 + 6;

	763 * z = (49995 * x) / y;

	764 * Since y only has 9 values, we can remove the division by using a

	765 * LUT and using FASTDIV-style divisions. For each of the 9 values

	766 * of y, we can rewrite z as:

	767 * z = x * (49995 / y) + x * ((49995 % y) / y)

	768 * In this table, each col represents one possible value of y, the

	769 * first number is 49995 / y, and the second is the FASTDIV variant

	770 * of 49995 % y / y. */

	771 static const unsigned int div_tbl[9][2] = {

	772 { 8332, 3 * 715827883U }, // y = 6

	773 { 4545, 0 * 390451573U }, // y = 11

	774 { 3124, 11 * 268435456U }, // y = 16

	775 { 2380, 15 * 204522253U }, // y = 21

	776 { 1922, 23 * 165191050U }, // y = 26

	777 { 1612, 23 * 138547333U }, // y = 31

	778 { 1388, 27 * 119304648U }, // y = 36

	779 { 1219, 16 * 104755300U }, // y = 41

	780 { 1086, 39 * 93368855U } // y = 46

	781 };

	782 unsigned int z, y, x = MUL16(block_num, 1877) + frame_cntr;

	783 if (x >= 0xFFFF) x -= 0xFFFF; // max value of x is 8*1877+0xFFFE=0x13AA6,

	784 // so this is effectively a modulo (%)

	785 y = x - 9 * MULH(477218589, x); // x % 9

	786 z = (uint16_t) (x * div_tbl[y][0] + UMULH(x, div_tbl[y][1]));

	787 // z = x * 49995 / (y * 5 + 6)

	788 return z % (1000 - block_size);

	789 }

	790

	791 /**

	792 * Parse hardcoded signal for a single block.

	793 * @note see #synth_block().

	794 */

	795 static void synth_block_hardcoded(WMAVoiceContext s, GetBitContext gb,

	796 int block_idx, int size,

	797 const struct frame_type_desc *frame_desc,

	798 float *excitation)

	799 {

	800 float gain;

	801 int n, r_idx;

	802

	803 assert(size <= MAX_FRAMESIZE);

	804

	805 /* Set the offset from which we start reading wmavoice_std_codebook */

	806 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {

	807 r_idx = pRNG(s->frame_cntr, block_idx, size);

	808 gain = s->silence_gain;

	809 } else /* FCB_TYPE_HARDCODED */ {

	810 r_idx = get_bits(gb, 8);

	811 gain = wmavoice_gain_universal[get_bits(gb, 6)];

	812 }

	813

	814 /* Clear gain prediction parameters */

	815 memset(s->gain_pred_err, 0, sizeof(s->gain_pred_err));

	816

	817 /* Apply gain to hardcoded codebook and use that as excitation signal */

	818 for (n = 0; n < size; n++)

	819 excitation[n] = wmavoice_std_codebook[r_idx + n] * gain;

	820 }

	821

	822 /**

	823 * Parse FCB/ACB signal for a single block.

	824 * @note see #synth_block().

	825 */

	826 static void synth_block_fcb_acb(WMAVoiceContext s, GetBitContext gb,

	827 int block_idx, int size,

	828 int block_pitch_sh2,

	829 const struct frame_type_desc *frame_desc,

	830 float *excitation)

	831 {

	832 static const float gain_coeff[6] = {

	833 0.8169, -0.06545, 0.1726, 0.0185, -0.0359, 0.0458

	834 };

	835 float pulses[MAX_FRAMESIZE / 2], pred_err, acb_gain, fcb_gain;

	836 int n, idx, gain_weight;

	837 AMRFixed fcb;

	838

	839 assert(size <= MAX_FRAMESIZE / 2);

	840 memset(pulses, 0, sizeof(pulses) size);

	841

	842 fcb.pitch_lag = block_pitch_sh2 >> 2;

	843 fcb.pitch_fac = 1.0;

	844 fcb.no_repeat_mask = 0;

	845 fcb.n = 0;

	846

	847 /* For the other frame types, this is where we apply the innovation

	848 * (fixed) codebook pulses of the speech signal. */

	849 if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {

	850 aw_pulse_set1(s, gb, block_idx, &fcb);

	851 aw_pulse_set2(s, gb, block_idx, &fcb);

	852 } else /* FCB_TYPE_EXC_PULSES */ {

	853 int offset_nbits = 5 - frame_desc->log_n_blocks;

	854

	855 fcb.no_repeat_mask = -1;

	856 /* similar to ff_decode_10_pulses_35bits(), but with single pulses

	857 * (instead of double) for a subset of pulses */

	858 for (n = 0; n < 5; n++) {

	859 float sign;

	860 int pos1, pos2;

	861

	862 sign = get_bits1(gb) ? 1.0 : -1.0;

	863 pos1 = get_bits(gb, offset_nbits);

	864 fcb.x[fcb.n] = n + 5 * pos1;

	865 fcb.y[fcb.n++] = sign;

	866 if (n < frame_desc->dbl_pulses) {

	867 pos2 = get_bits(gb, offset_nbits);

	868 fcb.x[fcb.n] = n + 5 * pos2;

	869 fcb.y[fcb.n++] = (pos1 < pos2) ? -sign : sign;

	870 }

	871 }

	872 }

	873 ff_set_fixed_vector(pulses, &fcb, 1.0, size);

	874

	875 /* Calculate gain for adaptive & fixed codebook signal.

	876 * see ff_amr_set_fixed_gain(). */

	877 idx = get_bits(gb, 7);

	878 fcb_gain = expf(ff_dot_productf(s->gain_pred_err, gain_coeff, 6) -

	879 5.2409161640 + wmavoice_gain_codebook_fcb[idx]);

	880 acb_gain = wmavoice_gain_codebook_acb[idx];

	881 pred_err = av_clipf(wmavoice_gain_codebook_fcb[idx],

	882 -2.9957322736 /* log(0.05) */,

	883 1.6094379124 /* log(5.0) */);

	884

	885 gain_weight = 8 >> frame_desc->log_n_blocks;

	886 memmove(&s->gain_pred_err[gain_weight], s->gain_pred_err,

	887 sizeof(s->gain_pred_err) (6 - gain_weight));

	888 for (n = 0; n < gain_weight; n++)

	889 s->gain_pred_err[n] = pred_err;

	890

	891 /* Calculation of adaptive codebook */

	892 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {

	893 int len;

	894 for (n = 0; n < size; n += len) {

	895 int next_idx_sh16;

	896 int abs_idx = block_idx * size + n;

	897 int pitch_sh16 = (s->last_pitch_val << 16) +

	898 s->pitch_diff_sh16 * abs_idx;

	899 int pitch = (pitch_sh16 + 0x6FFF) >> 16;

	900 int idx_sh16 = ((pitch << 16) - pitch_sh16) * 8 + 0x58000;

	901 idx = idx_sh16 >> 16;

	902 if (s->pitch_diff_sh16) {

	903 if (s->pitch_diff_sh16 > 0) {

	904 next_idx_sh16 = (idx_sh16) &~ 0xFFFF;

	905 } else

	906 next_idx_sh16 = (idx_sh16 + 0x10000) &~ 0xFFFF;

	907 len = av_clip((idx_sh16 - next_idx_sh16) / s->pitch_diff_sh16 / 8,

	908 1, size - n);

	909 } else

	910 len = size;

	911

	912 ff_acelp_interpolatef(&excitation[n], &excitation[n - pitch],

	913 wmavoice_ipol1_coeffs, 17,

	914 idx, 9, len);

	915 }

	916 } else /* ACB_TYPE_HAMMING */ {

	917 int block_pitch = block_pitch_sh2 >> 2;

	918 idx = block_pitch_sh2 & 3;

	919 if (idx) {

	920 ff_acelp_interpolatef(excitation, &excitation[-block_pitch],

	921 wmavoice_ipol2_coeffs, 4,

	922 idx, 8, size);

	923 } else

	924 av_memcpy_backptr(excitation, sizeof(float) * block_pitch,

	925 sizeof(float) * size);

	926 }

	927

	928 /* Interpolate ACB/FCB and use as excitation signal */

	929 ff_weighted_vector_sumf(excitation, excitation, pulses,

	930 acb_gain, fcb_gain, size);

	931 }

	932

	933 /**

	934 * Parse data in a single block.

	935 * @note we assume enough bits are available, caller should check.

	936 *

	937 * @param s WMA Voice decoding context private data

	938 * @param gb bit I/O context

	939 * @param block_idx index of the to-be-read block

	940 * @param size amount of samples to be read in this block

	941 * @param block_pitch_sh2 pitch for this block << 2

	942 * @param lsps LSPs for (the end of) this frame

	943 * @param prev_lsps LSPs for the last frame

	944 * @param frame_desc frame type descriptor

	945 * @param excitation target memory for the ACB+FCB interpolated signal

	946 * @param synth target memory for the speech synthesis filter output

	947 * @return 0 on success, <0 on error.

	948 */

	949 static void synth_block(WMAVoiceContext s, GetBitContext gb,

	950 int block_idx, int size,

	951 int block_pitch_sh2,

	952 const double lsps, const double prev_lsps,

	953 const struct frame_type_desc *frame_desc,

	954 float excitation, float synth)

	955 {

	956 double i_lsps[MAX_LSPS];

	957 float lpcs[MAX_LSPS];

	958 float fac;

	959 int n;

	960

	961 if (frame_desc->acb_type == ACB_TYPE_NONE)

	962 synth_block_hardcoded(s, gb, block_idx, size, frame_desc, excitation);

	963 else

	964 synth_block_fcb_acb(s, gb, block_idx, size, block_pitch_sh2,

	965 frame_desc, excitation);

	966

	967 /* convert interpolated LSPs to LPCs */

	968 fac = (block_idx + 0.5) / frame_desc->n_blocks;

	969 for (n = 0; n < s->lsps; n++) // LSF -> LSP

	970 i_lsps[n] = cos(prev_lsps[n] + fac * (lsps[n] - prev_lsps[n]));

	971 ff_acelp_lspd2lpc(i_lsps, lpcs, s->lsps >> 1);

	972

	973 /* Speech synthesis */

	974 ff_celp_lp_synthesis_filterf(synth, lpcs, excitation, size, s->lsps);

	975 }

	976

	977 /**

	978 * Synthesize output samples for a single frame.

	979 * @note we assume enough bits are available, caller should check.

	980 *

	981 * @param ctx WMA Voice decoder context

	982 * @param gb bit I/O context (s->gb or one for cross-packet superframes)

	983 * @param samples pointer to output sample buffer, has space for at least 160

	984 * samples

	985 * @param lsps LSP array

	986 * @param prev_lsps array of previous frame's LSPs

	987 * @param excitation target buffer for excitation signal

	988 * @param synth target buffer for synthesized speech data

	989 * @return 0 on success, <0 on error.

	990 */

	991 static int synth_frame(AVCodecContext ctx, GetBitContext gb,

	992 float *samples,

	993 const double lsps, const double prev_lsps,

	994 float excitation, float synth)

	995 {

	996 WMAVoiceContext *s = ctx->priv_data;

	997 int n, n_blocks_x2, log_n_blocks_x2, cur_pitch_val;

	998 int pitch[MAX_BLOCKS], last_block_pitch;

	999

	1000 /* Parse frame type ("frame header"), see frame_descs */

	1001 int bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)],

	1002 block_nsamples = MAX_FRAMESIZE / frame_descs[bd_idx].n_blocks;

	1003

	1004 if (bd_idx < 0) {

	1005 av_log(ctx, AV_LOG_ERROR,

	1006 "Invalid frame type VLC code, skipping\n");

	1007 return -1;

	1008 }

	1009

	1010 /* Pitch calculation for ACB_TYPE_ASYMMETRIC ("pitch-per-frame") */

	1011 if (frame_descs[bd_idx].acb_type == ACB_TYPE_ASYMMETRIC) {

	1012 /* Pitch is provided per frame, which is interpreted as the pitch of

	1013 * the last sample of the last block of this frame. We can interpolate

	1014 * the pitch of other blocks (and even pitch-per-sample) by gradually

	1015 * incrementing/decrementing prev_frame_pitch to cur_pitch_val. */

	1016 n_blocks_x2 = frame_descs[bd_idx].n_blocks << 1;

	1017 log_n_blocks_x2 = frame_descs[bd_idx].log_n_blocks + 1;

	1018 cur_pitch_val = s->min_pitch_val + get_bits(gb, s->pitch_nbits);

	1019 cur_pitch_val = FFMIN(cur_pitch_val, s->max_pitch_val - 1);

	1020 if (s->last_acb_type == ACB_TYPE_NONE \|\|

	1021 20 * abs(cur_pitch_val - s->last_pitch_val) >

	1022 (cur_pitch_val + s->last_pitch_val))

	1023 s->last_pitch_val = cur_pitch_val;

	1024

	1025 /* pitch per block */

	1026 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {

	1027 int fac = n * 2 + 1;

	1028

	1029 pitch[n] = (MUL16(fac, cur_pitch_val) +

	1030 MUL16((n_blocks_x2 - fac), s->last_pitch_val) +

	1031 frame_descs[bd_idx].n_blocks) >> log_n_blocks_x2;

	1032 }

	1033

	1034 /* "pitch-diff-per-sample" for calculation of pitch per sample */

	1035 s->pitch_diff_sh16 =

	1036 ((cur_pitch_val - s->last_pitch_val) << 16) / MAX_FRAMESIZE;

	1037 }

	1038

	1039 /* Global gain (if silence) and pitch-adaptive window coordinates */

	1040 switch (frame_descs[bd_idx].fcb_type) {

	1041 case FCB_TYPE_SILENCE:

	1042 s->silence_gain = wmavoice_gain_silence[get_bits(gb, 8)];

	1043 break;

	1044 case FCB_TYPE_AW_PULSES:

	1045 aw_parse_coords(s, gb, pitch);

	1046 break;

	1047 }

	1048

	1049 for (n = 0; n < frame_descs[bd_idx].n_blocks; n++) {

	1050 int bl_pitch_sh2;

	1051

	1052 /* Pitch calculation for ACB_TYPE_HAMMING ("pitch-per-block") */

	1053 switch (frame_descs[bd_idx].acb_type) {

	1054 case ACB_TYPE_HAMMING: {

	1055 /* Pitch is given per block. Per-block pitches are encoded as an

	1056 * absolute value for the first block, and then delta values

	1057 * relative to this value) for all subsequent blocks. The scale of

	1058 * this pitch value is semi-logaritmic compared to its use in the

	1059 * decoder, so we convert it to normal scale also. */

	1060 int block_pitch,

	1061 t1 = (s->block_conv_table[1] - s->block_conv_table[0]) << 2,

	1062 t2 = (s->block_conv_table[2] - s->block_conv_table[1]) << 1,

	1063 t3 = s->block_conv_table[3] - s->block_conv_table[2] + 1;

	1064

	1065 if (n == 0) {

	1066 block_pitch = get_bits(gb, s->block_pitch_nbits);

	1067 } else

	1068 block_pitch = last_block_pitch - s->block_delta_pitch_hrange +

	1069 get_bits(gb, s->block_delta_pitch_nbits);

	1070 /* Convert last_ so that any next delta is within _range */

	1071 last_block_pitch = av_clip(block_pitch,

	1072 s->block_delta_pitch_hrange,

	1073 s->block_pitch_range -

	1074 s->block_delta_pitch_hrange);

	1075

	1076 /* Convert semi-log-style scale back to normal scale */

	1077 if (block_pitch < t1) {

	1078 bl_pitch_sh2 = (s->block_conv_table[0] << 2) + block_pitch;

	1079 } else {

	1080 block_pitch -= t1;

	1081 if (block_pitch < t2) {

	1082 bl_pitch_sh2 =

	1083 (s->block_conv_table[1] << 2) + (block_pitch << 1);

	1084 } else {

	1085 block_pitch -= t2;

	1086 if (block_pitch < t3) {

	1087 bl_pitch_sh2 =

	1088 (s->block_conv_table[2] + block_pitch) << 2;

	1089 } else

	1090 bl_pitch_sh2 = s->block_conv_table[3] << 2;

	1091 }

	1092 }

	1093 pitch[n] = bl_pitch_sh2 >> 2;

	1094 break;

	1095 }

	1096

	1097 case ACB_TYPE_ASYMMETRIC: {

	1098 bl_pitch_sh2 = pitch[n] << 2;

	1099 break;

	1100 }

	1101

	1102 default: // ACB_TYPE_NONE has no pitch

	1103 bl_pitch_sh2 = 0;

	1104 break;

	1105 }

	1106

	1107 synth_block(s, gb, n, block_nsamples, bl_pitch_sh2,

	1108 lsps, prev_lsps, &frame_descs[bd_idx],

	1109 &excitation[n * block_nsamples],

	1110 &synth[n * block_nsamples]);

	1111 }

	1112

	1113 /* Averaging projection filter, if applicable. Else, just copy samples

	1114 * from synthesis buffer */

	1115 if (s->do_apf) {

	1116 // FIXME this is where APF would take place, currently not implemented

	1117 av_log_missing_feature(ctx, "APF", 0);

	1118 s->do_apf = 0;

	1119 } //else

	1120 for (n = 0; n < 160; n++)

	1121 samples[n] = av_clipf(synth[n], -1.0, 1.0);

	1122

	1123 /* Cache values for next frame */

	1124 s->frame_cntr++;

	1125 if (s->frame_cntr >= 0xFFFF) s->frame_cntr -= 0xFFFF; // i.e. modulo (%)

	1126 s->last_acb_type = frame_descs[bd_idx].acb_type;

	1127 switch (frame_descs[bd_idx].acb_type) {

	1128 case ACB_TYPE_NONE:

	1129 s->last_pitch_val = 0;

	1130 break;

	1131 case ACB_TYPE_ASYMMETRIC:

	1132 s->last_pitch_val = cur_pitch_val;

	1133 break;

	1134 case ACB_TYPE_HAMMING:

	1135 s->last_pitch_val = pitch[frame_descs[bd_idx].n_blocks - 1];

	1136 break;

	1137 }

	1138

	1139 return 0;

	1140 }

	1141

	1142 /**

	1143 * Ensure minimum value for first item, maximum value for last value,

	1144 * proper spacing between each value and proper ordering.

	1145 *

	1146 * @param lsps array of LSPs

	1147 * @param num size of LSP array

	1148 *

	1149 * @note basically a double version of #ff_acelp_reorder_lsf(), might be

	1150 * useful to put in a generic location later on. Parts are also

	1151 * present in #ff_set_min_dist_lsf() + #ff_sort_nearly_sorted_floats(),

	1152 * which is in float.

	1153 */

	1154 static void stabilize_lsps(double *lsps, int num)

	1155 {

	1156 int n, m, l;

	1157

	1158 /* set minimum value for first, maximum value for last and minimum

	1159 * spacing between LSF values.

	1160 * Very similar to ff_set_min_dist_lsf(), but in double. */

	1161 lsps[0] = FFMAX(lsps[0], 0.0015 * M_PI);

	1162 for (n = 1; n < num; n++)

	1163 lsps[n] = FFMAX(lsps[n], lsps[n - 1] + 0.0125 * M_PI);

	1164 lsps[num - 1] = FFMIN(lsps[num - 1], 0.9985 * M_PI);

	1165

	1166 /* reorder (looks like one-time / non-recursed bubblesort).

	1167 * Very similar to ff_sort_nearly_sorted_floats(), but in double. */

	1168 for (n = 1; n < num; n++) {

	1169 if (lsps[n] < lsps[n - 1]) {

	1170 for (m = 1; m < num; m++) {

	1171 double tmp = lsps[m];

	1172 for (l = m - 1; l >= 0; l--) {

	1173 if (lsps[l] <= tmp) break;

	1174 lsps[l + 1] = lsps[l];

	1175 }

	1176 lsps[l + 1] = tmp;

	1177 }

	1178 break;

	1179 }

	1180 }

	1181 }

	1182

	1183 /**

	1184 * Test if there's enough bits to read 1 superframe.

	1185 *

	1186 * @param orig_gb bit I/O context used for reading. This function

	1187 * does not modify the state of the bitreader; it

	1188 * only uses it to copy the current stream position

	1189 * @param s WMA Voice decoding context private data

	1190 * @returns -1 if unsupported, 1 on not enough bits or 0 if OK.

	1191 */

	1192 static int check_bits_for_superframe(GetBitContext *orig_gb,

	1193 WMAVoiceContext *s)

	1194 {

	1195 GetBitContext s_gb, *gb = &s_gb;

	1196 int n, need_bits, bd_idx;

	1197 const struct frame_type_desc *frame_desc;

	1198

	1199 /* initialize a copy */

	1200 init_get_bits(gb, orig_gb->buffer, orig_gb->size_in_bits);

	1201 skip_bits_long(gb, get_bits_count(orig_gb));

	1202 assert(get_bits_left(gb) == get_bits_left(orig_gb));

	1203

	1204 /* superframe header */

	1205 if (get_bits_left(gb) < 14)

	1206 return 1;

	1207 if (!get_bits1(gb))

	1208 return -1; // WMAPro-in-WMAVoice superframe

	1209 if (get_bits1(gb)) skip_bits(gb, 12); // number of samples in superframe

	1210 if (s->has_residual_lsps) { // residual LSPs (for all frames)

	1211 if (get_bits_left(gb) < s->sframe_lsp_bitsize)

	1212 return 1;

	1213 skip_bits_long(gb, s->sframe_lsp_bitsize);

	1214 }

	1215

	1216 /* frames */

	1217 for (n = 0; n < MAX_FRAMES; n++) {

	1218 int aw_idx_is_ext = 0;

	1219

	1220 if (!s->has_residual_lsps) { // independent LSPs (per-frame)

	1221 if (get_bits_left(gb) < s->frame_lsp_bitsize) return 1;

	1222 skip_bits_long(gb, s->frame_lsp_bitsize);

	1223 }

	1224 bd_idx = s->vbm_tree[get_vlc2(gb, frame_type_vlc.table, 6, 3)];

	1225 if (bd_idx < 0)

	1226 return -1; // invalid frame type VLC code

	1227 frame_desc = &frame_descs[bd_idx];

	1228 if (frame_desc->acb_type == ACB_TYPE_ASYMMETRIC) {

	1229 if (get_bits_left(gb) < s->pitch_nbits)

	1230 return 1;

	1231 skip_bits_long(gb, s->pitch_nbits);

	1232 }

	1233 if (frame_desc->fcb_type == FCB_TYPE_SILENCE) {

	1234 skip_bits(gb, 8);

	1235 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {

	1236 int tmp = get_bits(gb, 6);

	1237 if (tmp >= 0x36) {

	1238 skip_bits(gb, 2);

	1239 aw_idx_is_ext = 1;

	1240 }

	1241 }

	1242

	1243 /* blocks */

	1244 if (frame_desc->acb_type == ACB_TYPE_HAMMING) {

	1245 need_bits = s->block_pitch_nbits +

	1246 (frame_desc->n_blocks - 1) * s->block_delta_pitch_nbits;

	1247 } else if (frame_desc->fcb_type == FCB_TYPE_AW_PULSES) {

	1248 need_bits = 2 * !aw_idx_is_ext;

	1249 } else

	1250 need_bits = 0;

	1251 need_bits += frame_desc->frame_size;

	1252 if (get_bits_left(gb) < need_bits)

	1253 return 1;

	1254 skip_bits_long(gb, need_bits);

	1255 }

	1256

	1257 return 0;

	1258 }

	1259

	1260 /**

	1261 * Synthesize output samples for a single superframe. If we have any data

	1262 * cached in s->sframe_cache, that will be used instead of whatever is loaded

	1263 * in s->gb.

	1264 *

	1265 * WMA Voice superframes contain 3 frames, each containing 160 audio samples,

	1266 * to give a total of 480 samples per frame. See #synth_frame() for frame

	1267 * parsing. In addition to 3 frames, superframes can also contain the LSPs

	1268 * (if these are globally specified for all frames (residually); they can

	1269 * also be specified individually per-frame. See the s->has_residual_lsps

	1270 * option), and can specify the number of samples encoded in this superframe

	1271 * (if less than 480), usually used to prevent blanks at track boundaries.

	1272 *

	1273 * @param ctx WMA Voice decoder context

	1274 * @param samples pointer to output buffer for voice samples

	1275 * @param data_size pointer containing the size of #samples on input, and the

	1276 * amount of #samples filled on output

	1277 * @return 0 on success, <0 on error or 1 if there was not enough data to

	1278 * fully parse the superframe

	1279 */

	1280 static int synth_superframe(AVCodecContext *ctx,

	1281 float samples, int data_size)

	1282 {

	1283 WMAVoiceContext *s = ctx->priv_data;

	1284 GetBitContext *gb = &s->gb, s_gb;

	1285 int n, res, n_samples = 480;

	1286 double lsps[MAX_FRAMES][MAX_LSPS];

	1287 const double *mean_lsf = s->lsps == 16 ?

	1288 wmavoice_mean_lsf16[s->lsp_def_mode] : wmavoice_mean_lsf10[s->lsp_def_mo de];

	1289 float excitation[MAX_SIGNAL_HISTORY + MAX_SFRAMESIZE + 12];

	1290 float synth[MAX_LSPS + MAX_SFRAMESIZE];

	1291

	1292 memcpy(synth, s->synth_history,

	1293 s->lsps * sizeof(*synth));

	1294 memcpy(excitation, s->excitation_history,

	1295 s->history_nsamples * sizeof(*excitation));

	1296

	1297 if (s->sframe_cache_size > 0) {

	1298 gb = &s_gb;

	1299 init_get_bits(gb, s->sframe_cache, s->sframe_cache_size);

	1300 s->sframe_cache_size = 0;

	1301 }

	1302

	1303 if ((res = check_bits_for_superframe(gb, s)) == 1) return 1;

	1304

	1305 /* First bit is speech/music bit, it differentiates between WMAVoice

	1306 * speech samples (the actual codec) and WMAVoice music samples, which

	1307 * are really WMAPro-in-WMAVoice-superframes. I've never seen those in

	1308 * the wild yet. */

	1309 if (!get_bits1(gb)) {

	1310 av_log_missing_feature(ctx, "WMAPro-in-WMAVoice support", 1);

	1311 return -1;

	1312 }

	1313

	1314 /* (optional) nr. of samples in superframe; always <= 480 and >= 0 */

	1315 if (get_bits1(gb)) {

	1316 if ((n_samples = get_bits(gb, 12)) > 480) {

	1317 av_log(ctx, AV_LOG_ERROR,

	1318 "Superframe encodes >480 samples (%d), not allowed\n",

	1319 n_samples);

	1320 return -1;

	1321 }

	1322 }

	1323 /* Parse LSPs, if global for the superframe (can also be per-frame). */

	1324 if (s->has_residual_lsps) {

	1325 double prev_lsps[MAX_LSPS], a1[MAX_LSPS * 2], a2[MAX_LSPS * 2];

	1326

	1327 for (n = 0; n < s->lsps; n++)

	1328 prev_lsps[n] = s->prev_lsps[n] - mean_lsf[n];

	1329

	1330 if (s->lsps == 10) {

	1331 dequant_lsp10r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);

	1332 } else /* s->lsps == 16 */

	1333 dequant_lsp16r(gb, lsps[2], prev_lsps, a1, a2, s->lsp_q_mode);

	1334

	1335 for (n = 0; n < s->lsps; n++) {

	1336 lsps[0][n] = mean_lsf[n] + (a1[n] - a2[n * 2]);

	1337 lsps[1][n] = mean_lsf[n] + (a1[s->lsps + n] - a2[n * 2 + 1]);

	1338 lsps[2][n] += mean_lsf[n];

	1339 }

	1340 for (n = 0; n < 3; n++)

	1341 stabilize_lsps(lsps[n], s->lsps);

	1342 }

	1343

	1344 /* Parse frames, optionally preceeded by per-frame (independent) LSPs. */

	1345 for (n = 0; n < 3; n++) {

	1346 if (!s->has_residual_lsps) {

	1347 int m;

	1348

	1349 if (s->lsps == 10) {

	1350 dequant_lsp10i(gb, lsps[n]);

	1351 } else /* s->lsps == 16 */

	1352 dequant_lsp16i(gb, lsps[n]);

	1353

	1354 for (m = 0; m < s->lsps; m++)

	1355 lsps[n][m] += mean_lsf[m];

	1356 stabilize_lsps(lsps[n], s->lsps);

	1357 }

	1358

	1359 if ((res = synth_frame(ctx, gb,

	1360 &samples[n * MAX_FRAMESIZE],

	1361 lsps[n], n == 0 ? s->prev_lsps : lsps[n - 1],

	1362 &excitation[s->history_nsamples + n * MAX_FRAMESI ZE],

	1363 &synth[s->lsps + n * MAX_FRAMESIZE])))

	1364 return res;

	1365 }

	1366

	1367 /* Statistics? FIXME - we don't check for length, a slight overrun

	1368 * will be caught by internal buffer padding, and anything else

	1369 * will be skipped, not read. */

	1370 if (get_bits1(gb)) {

	1371 res = get_bits(gb, 4);

	1372 skip_bits(gb, 10 * (res + 1));

	1373 }

	1374

	1375 /* Specify nr. of output samples */

	1376 data_size = n_samples sizeof(float);

	1377

	1378 /* Update history */

	1379 memcpy(s->prev_lsps, lsps[2],

	1380 s->lsps * sizeof(*s->prev_lsps));

	1381 memcpy(s->synth_history, &synth[MAX_SFRAMESIZE],

	1382 s->lsps * sizeof(*synth));

	1383 memcpy(s->excitation_history, &excitation[MAX_SFRAMESIZE],

	1384 s->history_nsamples * sizeof(*excitation));

	1385

	1386 return 0;

	1387 }

	1388

	1389 /**

	1390 * Parse the packet header at the start of each packet (input data to this

	1391 * decoder).

	1392 *

	1393 * @param s WMA Voice decoding context private data

	1394 * @returns 1 if not enough bits were available, or 0 on success.

	1395 */

	1396 static int parse_packet_header(WMAVoiceContext *s)

	1397 {

	1398 GetBitContext *gb = &s->gb;

	1399 unsigned int res;

	1400

	1401 if (get_bits_left(gb) < 11)

	1402 return 1;

	1403 skip_bits(gb, 4); // packet sequence number

	1404 s->has_residual_lsps = get_bits1(gb);

	1405 do {

	1406 res = get_bits(gb, 6); // number of superframes per packet

	1407 // (minus first one if there is spillover)

	1408 if (get_bits_left(gb) < 6 * (res == 0x3F) + s->spillover_bitsize)

	1409 return 1;

	1410 } while (res == 0x3F);

	1411 s->spillover_nbits = get_bits(gb, s->spillover_bitsize);

	1412

	1413 return 0;

	1414 }

	1415

	1416 /**

	1417 * Copy (unaligned) bits from gb/data/size to pb.

	1418 *

	1419 * @param pb target buffer to copy bits into

	1420 * @param data source buffer to copy bits from

	1421 * @param size size of the source data, in bytes

	1422 * @param gb bit I/O context specifying the current position in the source.

	1423 * data. This function might use this to align the bit position to

	1424 * a whole-byte boundary before calling #ff_copy_bits() on aligned

	1425 * source data

	1426 * @param nbits the amount of bits to copy from source to target

	1427 *

	1428 * @note after calling this function, the current position in the input bit

	1429 * I/O context is undefined.

	1430 */

	1431 static void copy_bits(PutBitContext *pb,

	1432 const uint8_t *data, int size,

	1433 GetBitContext *gb, int nbits)

	1434 {

	1435 int rmn_bytes, rmn_bits;

	1436

	1437 rmn_bits = rmn_bytes = get_bits_left(gb);

	1438 if (rmn_bits < nbits)

	1439 return;

	1440 rmn_bits &= 7; rmn_bytes >>= 3;

	1441 if ((rmn_bits = FFMIN(rmn_bits, nbits)) > 0)

	1442 put_bits(pb, rmn_bits, get_bits(gb, rmn_bits));

	1443 ff_copy_bits(pb, data + size - rmn_bytes,

	1444 FFMIN(nbits - rmn_bits, rmn_bytes << 3));

	1445 }

	1446

	1447 /**

	1448 * Packet decoding: a packet is anything that the (ASF) demuxer contains,

	1449 * and we expect that the demuxer / application provides it to us as such

	1450 * (else you'll probably get garbage as output). Every packet has a size of

	1451 * ctx->block_align bytes, starts with a packet header (see

	1452 * #parse_packet_header()), and then a series of superframes. Superframe

	1453 * boundaries may exceed packets, i.e. superframes can split data over

	1454 * multiple (two) packets.

	1455 *

	1456 * For more information about frames, see #synth_superframe().

	1457 */

	1458 static int wmavoice_decode_packet(AVCodecContext ctx, void data,

	1459 int data_size, AVPacket avpkt)

	1460 {

	1461 WMAVoiceContext *s = ctx->priv_data;

	1462 GetBitContext *gb = &s->gb;

	1463 int size, res, pos;

	1464

	1465 if (data_size < 480 sizeof(float)) {

	1466 av_log(ctx, AV_LOG_ERROR,

	1467 "Output buffer too small (%d given - %lu needed)\n",

	1468 data_size, 480 sizeof(float));

	1469 return -1;

	1470 }

	1471 *data_size = 0;

	1472

	1473 /* Packets are sometimes a multiple of ctx->block_align, with a packet

	1474 * header at each ctx->block_align bytes. However, FFmpeg's ASF demuxer

	1475 * feeds us ASF packets, which may concatenate multiple "codec" packets

	1476 * in a single "muxer" packet, so we artificially emulate that by

	1477 * capping the packet size at ctx->block_align. */

	1478 for (size = avpkt->size; size > ctx->block_align; size -= ctx->block_align);

	1479 if (!size)

	1480 return 0;

	1481 init_get_bits(&s->gb, avpkt->data, size << 3);

	1482

	1483 /* size == ctx->block_align is used to indicate whether we are dealing with

	1484 * a new packet or a packet of which we already read the packet header

	1485 * previously. */

	1486 if (size == ctx->block_align) { // new packet header

	1487 if ((res = parse_packet_header(s)) < 0)

	1488 return res;

	1489

	1490 /* If the packet header specifies a s->spillover_nbits, then we want

	1491 * to push out all data of the previous packet (+ spillover) before

	1492 * continuing to parse new superframes in the current packet. */

	1493 if (s->spillover_nbits > 0) {

	1494 if (s->sframe_cache_size > 0) {

	1495 int cnt = get_bits_count(gb);

	1496 copy_bits(&s->pb, avpkt->data, size, gb, s->spillover_nbits);

	1497 flush_put_bits(&s->pb);

	1498 s->sframe_cache_size += s->spillover_nbits;

	1499 if ((res = synth_superframe(ctx, data, data_size)) == 0 &&

	1500 *data_size > 0) {

	1501 cnt += s->spillover_nbits;

	1502 s->skip_bits_next = cnt & 7;

	1503 return cnt >> 3;

	1504 } else

	1505 skip_bits_long (gb, s->spillover_nbits - cnt +

	1506 get_bits_count(gb)); // resync

	1507 } else

	1508 skip_bits_long(gb, s->spillover_nbits); // resync

	1509 }

	1510 } else if (s->skip_bits_next)

	1511 skip_bits(gb, s->skip_bits_next);

	1512

	1513 /* Try parsing superframes in current packet */

	1514 s->sframe_cache_size = 0;

	1515 s->skip_bits_next = 0;

	1516 pos = get_bits_left(gb);

	1517 if ((res = synth_superframe(ctx, data, data_size)) < 0) {

	1518 return res;

	1519 } else if (*data_size > 0) {

	1520 int cnt = get_bits_count(gb);

	1521 s->skip_bits_next = cnt & 7;

	1522 return cnt >> 3;

	1523 } else if ((s->sframe_cache_size = pos) > 0) {

	1524 /* rewind bit reader to start of last (incomplete) superframe... */

	1525 init_get_bits(gb, avpkt->data, size << 3);

	1526 skip_bits_long(gb, (size << 3) - pos);

	1527 assert(get_bits_left(gb) == pos);

	1528

	1529 /* ...and cache it for spillover in next packet */

	1530 init_put_bits(&s->pb, s->sframe_cache, SFRAME_CACHE_MAXSIZE);

	1531 copy_bits(&s->pb, avpkt->data, size, gb, s->sframe_cache_size);

	1532 // FIXME bad - just copy bytes as whole and add use the

	1533 // skip_bits_next field

	1534 }

	1535

	1536 return size;

	1537 }

	1538

	1539 static av_cold void wmavoice_flush(AVCodecContext *ctx)

	1540 {

	1541 WMAVoiceContext *s = ctx->priv_data;

	1542 int n;

	1543

	1544 s->sframe_cache_size = 0;

	1545 s->skip_bits_next = 0;

	1546 for (n = 0; n < s->lsps; n++)

	1547 s->prev_lsps[n] = M_PI * (n + 1.0) / (s->lsps + 1.0);

	1548 memset(s->excitation_history, 0,

	1549 sizeof(s->excitation_history) MAX_SIGNAL_HISTORY);

	1550 memset(s->synth_history, 0,

	1551 sizeof(s->synth_history) MAX_LSPS);

	1552 memset(s->gain_pred_err, 0,

	1553 sizeof(s->gain_pred_err));

	1554 }

	1555

	1556 AVCodec wmavoice_decoder = {

	1557 "wmavoice",

	1558 CODEC_TYPE_AUDIO,

	1559 CODEC_ID_WMAVOICE,

	1560 sizeof(WMAVoiceContext),

	1561 wmavoice_decode_init,

	1562 NULL,

	1563 NULL,

	1564 wmavoice_decode_packet,

	1565 CODEC_CAP_SUBFRAMES,

	1566 .flush = wmavoice_flush,

	1567 .long_name = NULL_IF_CONFIG_SMALL("Windows Media Audio Voice"),

	1568 };

OLD	NEW

« patched-ffmpeg-mt/libavcodec/mpeg4video_es_bsf.c ('K') | « patched-ffmpeg-mt/libavcodec/wmaprodec.c ('k') | patched-ffmpeg-mt/libavcodec/wmavoice_data.h » ('j') | no next file with comments »