Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,9 @@ src/x64
.vs/
.vscode/
CMakeSettings.json

# Downloaded model files (autogen.sh / dnn/download_model.sh)
dnn/*_data.c
dnn/*_data.h
dnn/dred_rdovae_constants.h
*.pth
179 changes: 173 additions & 6 deletions celt/bands.c
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ int oaci_hysteresis_decision(oac_val16 val, const oac_val16 *thresholds, const o
}

oac_uint32 oaci_celt_lcg_rand(oac_uint32 seed) {
return 1664525*seed + 1013904223;
return (oac_uint32)((oac_uint64)1664525 * seed + 1013904223);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure why that change is needed, but if it is, it should definitely go in a separate commit/PR

}

/* This is a cos() approximation designed to be bit-exact on any platform. Bit exactness
Expand Down Expand Up @@ -986,15 +986,15 @@ static unsigned oaci_quant_partition(struct band_ctx *ctx, celt_norm *X,
if (mbits >= sbits) {
cm = oaci_quant_partition(ctx, X, N, mbits, B, lowband, LM,
MULT32_32_Q31(gain, mid), fill, left_split_mem);
rebalance = mbits - (oaci_ec_tell_frac(ec) - tell);
rebalance = mbits - (oac_int32)(oaci_ec_tell_frac(ec) - (oac_uint32)tell);
if (rebalance > 3<<BITRES && itheta != 0)
sbits += rebalance - (3<<BITRES);
cm |= oaci_quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
MULT32_32_Q31(gain, side), fill>>B, right_split_mem)<<(B0>>1);
} else {
cm = oaci_quant_partition(ctx, Y, N, sbits, B, next_lowband2, LM,
MULT32_32_Q31(gain, side), fill>>B, right_split_mem)<<(B0>>1);
rebalance = sbits - (oaci_ec_tell_frac(ec) - tell);
rebalance = sbits - (oac_int32)(oaci_ec_tell_frac(ec) - (oac_uint32)tell);
if (rebalance > 3<<BITRES && itheta != 16384)
mbits += rebalance - (3<<BITRES);
cm |= oaci_quant_partition(ctx, X, N, mbits, B, lowband, LM,
Expand Down Expand Up @@ -1352,7 +1352,7 @@ static unsigned oaci_quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_
mid for folding later. */
cm = oaci_quant_band(ctx, X, N, mbits, B, lowband, LM, lowband_out, Q31ONE,
lowband_scratch, fill, split_mem[0]);
rebalance = mbits - (oaci_ec_tell_frac(ec) - tell);
rebalance = mbits - (oac_int32)(oaci_ec_tell_frac(ec) - (oac_uint32)tell);
if (rebalance > 3<<BITRES && itheta != 0)
sbits += rebalance - (3<<BITRES);

Expand All @@ -1363,7 +1363,7 @@ static unsigned oaci_quant_band_stereo(struct band_ctx *ctx, celt_norm *X, celt_
/* For a stereo split, the high bits of fill are always zero, so no
folding will be done to the side. */
cm = oaci_quant_band(ctx, Y, N, sbits, B, NULL, LM, NULL, side, NULL, fill>>B, split_mem[1]);
rebalance = sbits - (oaci_ec_tell_frac(ec) - tell);
rebalance = sbits - (oac_int32)(oaci_ec_tell_frac(ec) - (oac_uint32)tell);
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See above. Don't lump in these changes in an Ambisonics patch

if (rebalance > 3<<BITRES && itheta != 16384)
mbits += rebalance - (3<<BITRES);
/* In stereo mode, we do not apply a scaling to the mid because we need the normalized
Expand Down Expand Up @@ -1400,7 +1400,7 @@ static void oaci_special_hybrid_folding(const CELTMode *m, celt_norm *norm, celt
OAC_COPY(&norm2[n1], &norm2[2*n1 - n2], n2 - n1);
}

void oaci_quant_all_bands(int encode, const CELTMode *m, int start, int end,
static void quant_all_bands_twoch(int encode, const CELTMode *m, int start, int end,
celt_norm *X_, celt_norm *Y_, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, oac_int32 total_bits,
Expand Down Expand Up @@ -1664,3 +1664,170 @@ void oaci_quant_all_bands(int encode, const CELTMode *m, int start, int end,

RESTORE_STACK;
}
/* Multi-channel multi-mono: each channel encoded independently per band.
collapse_masks are stored interleaved as [band*C + channel] so that
oaci_anti_collapse can read them correctly. */
static void quant_all_bands_multi(int encode, const CELTMode *m, int start, int end,
celt_norm *X_, int C, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int *tf_res, oac_int32 total_bits,
oac_int32 balance, ec_ctx *ec, int LM, int codedBands,
oac_uint32 *seed, int arch, int disable_inv) {
int i, c;
oac_int32 remaining_bits;
const oac_int16 * OAC_RESTRICT eBands = m->eBands;
int B;
int M;
int frame_size;
int norm_offset;
int norm_size;
int lowband_offset;
int update_lowband = 1;
int resynth_alloc;
struct band_ctx ctx;
VARDECL(celt_norm, _norm);
VARDECL(celt_norm, _lowband_scratch);
VARDECL(int, split_mem);
#ifdef RESYNTH
int resynth = 1;
#else
int resynth = !encode;
#endif
SAVE_STACK;
M = 1<<LM;
B = shortBlocks ? M : 1;
frame_size = M*m->shortMdctSize;
norm_offset = M*eBands[start];
norm_size = M*eBands[m->nbEBands - 1] - norm_offset;
/* One norm array per channel for spectral folding */
ALLOC(_norm, C*norm_size, celt_norm);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think this may be quite big memory use in memory-constrained systems?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LM is at most 3, so the memory is bounded by C * 8 * 18 = 144*C so we are talking about ~20kB for 5th order ambisonics. That's still a lot less than the buffer size for holding the time-domain input samples. If you can afford one, you can probably afford the other.

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If it's actually a problem down the road we can try to figure out something later, but for now I'm not concerned with memory here. As long as mono/stereo don't significantly regress, we can assume that people doing 5th order Ambisonics have the resources to do it.

if (encode && resynth)
resynth_alloc = M*(eBands[m->nbEBands] - eBands[m->nbEBands - 1]);
else
resynth_alloc = ALLOC_NONE;
ALLOC(_lowband_scratch, resynth_alloc, celt_norm);
ALLOC(split_mem, C*15, int);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same memory use question here.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's comparably small.

OAC_CLEAR(split_mem, C*15);
ctx.ec = ec;
ctx.encode = encode;
ctx.intensity = 0;
ctx.m = m;
ctx.seed = *seed;
ctx.spread = spread;
ctx.arch = arch;
ctx.disable_inv = disable_inv;
ctx.resynth = resynth;
ctx.theta_round = 0;
ctx.avoid_split_noise = B > 1;
lowband_offset = 0;
for (i = start; i < end; i++) {
oac_int32 tell;
int b;
int band_N;
oac_int32 curr_balance;
int effective_lowband = -1;
int tf_change;
int last = (i == end - 1);
ctx.i = i;
band_N = M*eBands[i + 1] - M*eBands[i];
celt_assert(band_N > 0);
tell = oaci_ec_tell_frac(ec);
if (i != start)
balance -= tell;
remaining_bits = total_bits - tell;
if (i <= codedBands - 1) {
curr_balance = oaci_celt_sudiv(balance, IMIN(3, codedBands - i));
b = IMAX(0, IMIN(16383, IMIN(remaining_bits, pulses[i] + curr_balance)));
} else {
b = 0;
}
/* Update lowband offset for spectral folding */
if (resynth && (M*eBands[i] - band_N >= M*eBands[start] || i == start + 1)
&& (update_lowband || lowband_offset == 0))
lowband_offset = i;
/* Handle hybrid mode band boundary */
if (i == start + 1) {
int n1 = M*(eBands[start + 1] - eBands[start]);
int n2 = M*(eBands[start + 2] - eBands[start + 1]);
for (c = 0; c < C; c++) {
celt_norm *norm_c = _norm + c*norm_size;
OAC_COPY(&norm_c[n1], &norm_c[2*n1 - n2], n2 - n1);
}
}
tf_change = tf_res[i];
ctx.tf_change = tf_change;
/* Encode each channel independently for this band */
{
oac_int32 remaining_per_chan = remaining_bits / C;
for (c = 0; c < C; c++) {
celt_norm *X_c;
celt_norm *norm_c = _norm + c*norm_size;
celt_norm *lowband_ptr = NULL;
celt_norm *lowband_out;
celt_norm *lb_scratch;
unsigned x_cm;
int chan_b = b / C;
/* Each channel's PVQ budget cap: current tell + per-channel share */
ctx.total_bits = oaci_ec_tell_frac(ec) + remaining_per_chan;
ctx.bandE = bandE + c*m->nbEBands;
X_c = X_ + c*frame_size + M*eBands[i];
lowband_out = last ? NULL : norm_c + M*eBands[i] - norm_offset;
/* Setup lowband for folding */
if (lowband_offset != 0 && (spread != SPREAD_AGGRESSIVE || B > 1 || tf_change < 0)) {
int fold_start, fold_end, fold_i;
effective_lowband = IMAX(0, M*eBands[lowband_offset] - norm_offset - band_N);
fold_start = lowband_offset;
while (M*eBands[--fold_start] > effective_lowband + norm_offset) ;
fold_end = lowband_offset - 1;
while (++fold_end < i && M*eBands[fold_end] < effective_lowband + norm_offset + band_N) ;
x_cm = 0;
fold_i = fold_start; do {
x_cm |= collapse_masks[fold_i*C + c];
} while (++fold_i < fold_end);
lowband_ptr = norm_c + effective_lowband;
} else {
x_cm = (1<<B) - 1;
}
if (i >= m->effEBands) {
X_c = norm_c;
lb_scratch = NULL;
} else if (encode && resynth) {
lb_scratch = _lowband_scratch;
} else {
lb_scratch = X_ + c*frame_size + M*eBands[m->effEBands - 1];
}
if (last)
lb_scratch = NULL;
x_cm = oaci_quant_band(&ctx, X_c, band_N, chan_b, B,
lowband_ptr, LM, lowband_out, Q31ONE, lb_scratch, x_cm, split_mem + c*15);
collapse_masks[i*C + c] = (unsigned char)x_cm;
}
}
balance += pulses[i] + tell;
update_lowband = b > (band_N<<BITRES);
ctx.avoid_split_noise = 0;
}
*seed = ctx.seed;
RESTORE_STACK;
}
void oaci_quant_all_bands(int encode, const CELTMode *m, int start, int end,
celt_norm *X_, int C, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, oac_int32 total_bits,
oac_int32 balance, ec_ctx *ec, int LM, int codedBands,
oac_uint32 *seed, int complexity, int arch, int disable_inv) {
if (C <= 2) {
int M = 1<<LM;
int frame_size = M*m->shortMdctSize;
celt_norm *Y_ = (C == 2) ? X_ + frame_size : NULL;
quant_all_bands_twoch(encode, m, start, end, X_, Y_, collapse_masks,
bandE, pulses, shortBlocks, spread, dual_stereo, intensity,
tf_res, total_bits, balance, ec, LM, codedBands,
seed, complexity, arch, disable_inv);
} else {
quant_all_bands_multi(encode, m, start, end, X_, C, collapse_masks,
bandE, pulses, shortBlocks, spread,
tf_res, total_bits, balance, ec, LM, codedBands,
seed, arch, disable_inv);
}
}
14 changes: 7 additions & 7 deletions celt/bands.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,15 +118,15 @@ void oaci_haar1(celt_norm *X, int N0, int stride);
* @param m Mode data
* @param start First band to process
* @param end Last band to process + 1
* @param X Residual (normalised)
* @param Y Residual (normalised) for second channel (or NULL for mono)
* @param collapse_masks Anti-collapse tracking mask
* @param bandE Square root of the energy for each band
* @param X Residual (normalised), all C channels contiguous (channel stride = M*shortMdctSize)
* @param C Number of channels
* @param collapse_masks Anti-collapse tracking mask (C*nbEBands, interleaved as [band*C + channel])
* @param bandE Square root of the energy for each band (C*nbEBands, stride nbEBands per channel)
* @param pulses Bit allocation (per band) for PVQ
* @param shortBlocks Zero for long blocks, non-zero for short blocks
* @param spread Amount of spreading to use
* @param dual_stereo Zero for MS stereo, non-zero for dual stereo
* @param intensity First band to use intensity stereo
* @param dual_stereo Zero for MS stereo, non-zero for dual stereo (C<=2 only)
* @param intensity First band to use intensity stereo (C<=2 only)
* @param tf_res Time-frequency resolution change
* @param total_bits Total number of bits that can be used for the frame (including the ones already spent)
* @param balance Number of unallocated bits
Expand All @@ -137,7 +137,7 @@ void oaci_haar1(celt_norm *X, int N0, int stride);
* @param arch Run-time architecture (see oac_select_arch())
*/
void oaci_quant_all_bands(int encode, const CELTMode *m, int start, int end,
celt_norm * X, celt_norm * Y, unsigned char *collapse_masks,
celt_norm * X, int C, unsigned char *collapse_masks,
const celt_ener *bandE, int *pulses, int shortBlocks, int spread,
int dual_stereo, int intensity, int *tf_res, oac_int32 total_bits,
oac_int32 balance, ec_ctx *ec, int M, int codedBands, oac_uint32 *seed,
Expand Down
4 changes: 3 additions & 1 deletion celt/celt.c
Original file line number Diff line number Diff line change
Expand Up @@ -346,10 +346,12 @@ const signed char oaci_tf_select_table[4][8] = {

void oaci_init_caps(const CELTMode *m, int *cap, int LM, int C) {
int i;
/* Use mono cap table for C > 2 since channels are coded independently */
int C_cap = C == 2 ? 2 : 1;
for (i = 0; i < m->nbEBands; i++) {
int N;
N = (m->eBands[i + 1] - m->eBands[i])<<LM;
cap[i] = (m->cache.caps[m->nbEBands*(2*LM + C - 1) + i] + 64)*C*N>>2;
cap[i] = (m->cache.caps[m->nbEBands*(2*LM + C_cap - 1) + i] + 64)*C*N>>2;
}
}

Expand Down
10 changes: 8 additions & 2 deletions celt/celt.h
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,12 @@
#define CELTDecoder OacCustomDecoder
#define CELTMode OacCustomMode

/** Maximum supported ambisonics order. */
#define OAC_MAX_AMBISONICS_ORDER 7
#define OAC_MAX_AMBISONICS_CHANNELS ((OAC_MAX_AMBISONICS_ORDER+1)*(OAC_MAX_AMBISONICS_ORDER+1))
#define OAC_MAX_CHANNELS 255
/* Check that OAC_MAX_CHANNELS is large enough */
typedef char oac_assert_max_channels_sufficient[(OAC_MAX_CHANNELS >= OAC_MAX_AMBISONICS_CHANNELS) ? 1 : -1];
#define LEAK_BANDS 19

typedef struct {
Expand Down Expand Up @@ -184,7 +190,7 @@ int oaci_celt_encode_with_ec(OacCustomEncoder * OAC_RESTRICT st, const oac_res *
unsigned char *compressed, int nbCompressedBytes, ec_enc *enc);

int oaci_celt_encoder_init(CELTEncoder *st, oac_int32 sampling_rate, int channels,
int arch);
int arch, int format);



Expand All @@ -193,7 +199,7 @@ int oaci_celt_encoder_init(CELTEncoder *st, oac_int32 sampling_rate, int channel
int oaci_celt_decoder_get_size(int channels);


int oaci_celt_decoder_init(CELTDecoder *st, oac_int32 sampling_rate, int channels);
int oaci_celt_decoder_init(CELTDecoder *st, oac_int32 sampling_rate, int channels, int format);

int oaci_celt_decode_with_ec_dred(CELTDecoder * OAC_RESTRICT st, const unsigned char *data,
int len, oac_res * OAC_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
Expand Down
Loading
Loading