diff options
Diffstat (limited to 'thirdparty/libvpx/vp9')
65 files changed, 17166 insertions, 0 deletions
diff --git a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c new file mode 100644 index 0000000000..1761fada2f --- /dev/null +++ b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht4x4_add_neon.c @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> +#include <assert.h> + +#include "./vp9_rtcd.h" +#include "./vpx_config.h" +#include "vp9/common/vp9_common.h" + +static int16_t sinpi_1_9 = 0x14a3; +static int16_t sinpi_2_9 = 0x26c9; +static int16_t sinpi_3_9 = 0x3441; +static int16_t sinpi_4_9 = 0x3b6c; +static int16_t cospi_8_64 = 0x3b21; +static int16_t cospi_16_64 = 0x2d41; +static int16_t cospi_24_64 = 0x187e; + +static INLINE void TRANSPOSE4X4( + int16x8_t *q8s16, + int16x8_t *q9s16) { + int32x4_t q8s32, q9s32; + int16x4x2_t d0x2s16, d1x2s16; + int32x4x2_t q0x2s32; + + d0x2s16 = vtrn_s16(vget_low_s16(*q8s16), vget_high_s16(*q8s16)); + d1x2s16 = vtrn_s16(vget_low_s16(*q9s16), vget_high_s16(*q9s16)); + + q8s32 = vreinterpretq_s32_s16(vcombine_s16(d0x2s16.val[0], d0x2s16.val[1])); + q9s32 = vreinterpretq_s32_s16(vcombine_s16(d1x2s16.val[0], d1x2s16.val[1])); + q0x2s32 = vtrnq_s32(q8s32, q9s32); + + *q8s16 = vreinterpretq_s16_s32(q0x2s32.val[0]); + *q9s16 = vreinterpretq_s16_s32(q0x2s32.val[1]); + return; +} + +static INLINE void GENERATE_COSINE_CONSTANTS( + int16x4_t *d0s16, + int16x4_t *d1s16, + int16x4_t *d2s16) { + *d0s16 = vdup_n_s16(cospi_8_64); + *d1s16 = vdup_n_s16(cospi_16_64); + *d2s16 = vdup_n_s16(cospi_24_64); + return; +} + +static INLINE void GENERATE_SINE_CONSTANTS( + int16x4_t *d3s16, + int16x4_t *d4s16, + int16x4_t *d5s16, + int16x8_t *q3s16) { + *d3s16 = vdup_n_s16(sinpi_1_9); + *d4s16 = vdup_n_s16(sinpi_2_9); + *q3s16 = vdupq_n_s16(sinpi_3_9); + *d5s16 = vdup_n_s16(sinpi_4_9); + return; +} + +static INLINE void IDCT4x4_1D( + int16x4_t *d0s16, + int16x4_t *d1s16, + int16x4_t *d2s16, + int16x8_t *q8s16, + int16x8_t *q9s16) { + int16x4_t d16s16, d17s16, d18s16, d19s16, d23s16, d24s16; + int16x4_t d26s16, d27s16, d28s16, d29s16; + int32x4_t q10s32, q13s32, q14s32, q15s32; + int16x8_t q13s16, q14s16; + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + + d23s16 = vadd_s16(d16s16, d18s16); + d24s16 = vsub_s16(d16s16, d18s16); + + q15s32 = vmull_s16(d17s16, *d2s16); + q10s32 = vmull_s16(d17s16, *d0s16); + q13s32 = vmull_s16(d23s16, *d1s16); + q14s32 = vmull_s16(d24s16, *d1s16); + q15s32 = vmlsl_s16(q15s32, d19s16, *d0s16); + q10s32 = vmlal_s16(q10s32, d19s16, *d2s16); + + d26s16 = vqrshrn_n_s32(q13s32, 14); + d27s16 = vqrshrn_n_s32(q14s32, 14); + d29s16 = vqrshrn_n_s32(q15s32, 14); + d28s16 = vqrshrn_n_s32(q10s32, 14); + + q13s16 = vcombine_s16(d26s16, d27s16); + q14s16 = vcombine_s16(d28s16, d29s16); + *q8s16 = vaddq_s16(q13s16, q14s16); + *q9s16 = vsubq_s16(q13s16, q14s16); + *q9s16 = vcombine_s16(vget_high_s16(*q9s16), + vget_low_s16(*q9s16)); // vswp + return; +} + +static INLINE void IADST4x4_1D( + int16x4_t *d3s16, + int16x4_t *d4s16, + int16x4_t *d5s16, + int16x8_t *q3s16, + int16x8_t *q8s16, + int16x8_t *q9s16) { + int16x4_t d6s16, d16s16, d17s16, d18s16, d19s16; + int32x4_t q8s32, q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; + + d6s16 = vget_low_s16(*q3s16); + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + + q10s32 = vmull_s16(*d3s16, d16s16); + q11s32 = vmull_s16(*d4s16, d16s16); + q12s32 = vmull_s16(d6s16, d17s16); + q13s32 = vmull_s16(*d5s16, d18s16); + q14s32 = vmull_s16(*d3s16, d18s16); + q15s32 = vmovl_s16(d16s16); + q15s32 = vaddw_s16(q15s32, d19s16); + q8s32 = vmull_s16(*d4s16, d19s16); + q15s32 = vsubw_s16(q15s32, d18s16); + q9s32 = vmull_s16(*d5s16, d19s16); + + q10s32 = vaddq_s32(q10s32, q13s32); + q10s32 = vaddq_s32(q10s32, q8s32); + q11s32 = vsubq_s32(q11s32, q14s32); + q8s32 = vdupq_n_s32(sinpi_3_9); + q11s32 = vsubq_s32(q11s32, q9s32); + q15s32 = vmulq_s32(q15s32, q8s32); + + q13s32 = vaddq_s32(q10s32, q12s32); + q10s32 = vaddq_s32(q10s32, q11s32); + q14s32 = vaddq_s32(q11s32, q12s32); + q10s32 = vsubq_s32(q10s32, q12s32); + + d16s16 = vqrshrn_n_s32(q13s32, 14); + d17s16 = vqrshrn_n_s32(q14s32, 14); + d18s16 = vqrshrn_n_s32(q15s32, 14); + d19s16 = vqrshrn_n_s32(q10s32, 14); + + *q8s16 = vcombine_s16(d16s16, d17s16); + *q9s16 = vcombine_s16(d18s16, d19s16); + return; +} + +void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, + int dest_stride, int tx_type) { + uint8x8_t d26u8, d27u8; + int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16; + uint32x2_t d26u32, d27u32; + int16x8_t q3s16, q8s16, q9s16; + uint16x8_t q8u16, q9u16; + + d26u32 = d27u32 = vdup_n_u32(0); + + q8s16 = vld1q_s16(input); + q9s16 = vld1q_s16(input + 8); + + TRANSPOSE4X4(&q8s16, &q9s16); + + switch (tx_type) { + case 0: // idct_idct is not supported. Fall back to C + vp9_iht4x4_16_add_c(input, dest, dest_stride, tx_type); + return; + break; + case 1: // iadst_idct + // generate constants + GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); + GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); + + // first transform rows + IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); + + // transpose the matrix + TRANSPOSE4X4(&q8s16, &q9s16); + + // then transform columns + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + break; + case 2: // idct_iadst + // generate constantsyy + GENERATE_COSINE_CONSTANTS(&d0s16, &d1s16, &d2s16); + GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); + + // first transform rows + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + + // transpose the matrix + TRANSPOSE4X4(&q8s16, &q9s16); + + // then transform columns + IDCT4x4_1D(&d0s16, &d1s16, &d2s16, &q8s16, &q9s16); + break; + case 3: // iadst_iadst + // generate constants + GENERATE_SINE_CONSTANTS(&d3s16, &d4s16, &d5s16, &q3s16); + + // first transform rows + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + + // transpose the matrix + TRANSPOSE4X4(&q8s16, &q9s16); + + // then transform columns + IADST4x4_1D(&d3s16, &d4s16, &d5s16, &q3s16, &q8s16, &q9s16); + break; + default: // iadst_idct + assert(0); + break; + } + + q8s16 = vrshrq_n_s16(q8s16, 4); + q9s16 = vrshrq_n_s16(q9s16, 4); + + d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 0); + dest += dest_stride; + d26u32 = vld1_lane_u32((const uint32_t *)dest, d26u32, 1); + dest += dest_stride; + d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 0); + dest += dest_stride; + d27u32 = vld1_lane_u32((const uint32_t *)dest, d27u32, 1); + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), vreinterpret_u8_u32(d26u32)); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), vreinterpret_u8_u32(d27u32)); + + d26u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + d27u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 1); + dest -= dest_stride; + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d27u8), 0); + dest -= dest_stride; + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 1); + dest -= dest_stride; + vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d26u8), 0); + return; +} diff --git a/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c new file mode 100644 index 0000000000..04b342c3d3 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/arm/neon/vp9_iht8x8_add_neon.c @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <arm_neon.h> +#include <assert.h> + +#include "./vp9_rtcd.h" +#include "./vpx_config.h" +#include "vp9/common/vp9_common.h" + +static int16_t cospi_2_64 = 16305; +static int16_t cospi_4_64 = 16069; +static int16_t cospi_6_64 = 15679; +static int16_t cospi_8_64 = 15137; +static int16_t cospi_10_64 = 14449; +static int16_t cospi_12_64 = 13623; +static int16_t cospi_14_64 = 12665; +static int16_t cospi_16_64 = 11585; +static int16_t cospi_18_64 = 10394; +static int16_t cospi_20_64 = 9102; +static int16_t cospi_22_64 = 7723; +static int16_t cospi_24_64 = 6270; +static int16_t cospi_26_64 = 4756; +static int16_t cospi_28_64 = 3196; +static int16_t cospi_30_64 = 1606; + +static INLINE void TRANSPOSE8X8( + int16x8_t *q8s16, + int16x8_t *q9s16, + int16x8_t *q10s16, + int16x8_t *q11s16, + int16x8_t *q12s16, + int16x8_t *q13s16, + int16x8_t *q14s16, + int16x8_t *q15s16) { + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int32x4x2_t q0x2s32, q1x2s32, q2x2s32, q3x2s32; + int16x8x2_t q0x2s16, q1x2s16, q2x2s16, q3x2s16; + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + *q8s16 = vcombine_s16(d16s16, d24s16); // vswp d17, d24 + *q9s16 = vcombine_s16(d18s16, d26s16); // vswp d19, d26 + *q10s16 = vcombine_s16(d20s16, d28s16); // vswp d21, d28 + *q11s16 = vcombine_s16(d22s16, d30s16); // vswp d23, d30 + *q12s16 = vcombine_s16(d17s16, d25s16); + *q13s16 = vcombine_s16(d19s16, d27s16); + *q14s16 = vcombine_s16(d21s16, d29s16); + *q15s16 = vcombine_s16(d23s16, d31s16); + + q0x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q8s16), + vreinterpretq_s32_s16(*q10s16)); + q1x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q9s16), + vreinterpretq_s32_s16(*q11s16)); + q2x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q12s16), + vreinterpretq_s32_s16(*q14s16)); + q3x2s32 = vtrnq_s32(vreinterpretq_s32_s16(*q13s16), + vreinterpretq_s32_s16(*q15s16)); + + q0x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[0]), // q8 + vreinterpretq_s16_s32(q1x2s32.val[0])); // q9 + q1x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q0x2s32.val[1]), // q10 + vreinterpretq_s16_s32(q1x2s32.val[1])); // q11 + q2x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[0]), // q12 + vreinterpretq_s16_s32(q3x2s32.val[0])); // q13 + q3x2s16 = vtrnq_s16(vreinterpretq_s16_s32(q2x2s32.val[1]), // q14 + vreinterpretq_s16_s32(q3x2s32.val[1])); // q15 + + *q8s16 = q0x2s16.val[0]; + *q9s16 = q0x2s16.val[1]; + *q10s16 = q1x2s16.val[0]; + *q11s16 = q1x2s16.val[1]; + *q12s16 = q2x2s16.val[0]; + *q13s16 = q2x2s16.val[1]; + *q14s16 = q3x2s16.val[0]; + *q15s16 = q3x2s16.val[1]; + return; +} + +static INLINE void IDCT8x8_1D( + int16x8_t *q8s16, + int16x8_t *q9s16, + int16x8_t *q10s16, + int16x8_t *q11s16, + int16x8_t *q12s16, + int16x8_t *q13s16, + int16x8_t *q14s16, + int16x8_t *q15s16) { + int16x4_t d0s16, d1s16, d2s16, d3s16; + int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int16x8_t q0s16, q1s16, q2s16, q3s16, q4s16, q5s16, q6s16, q7s16; + int32x4_t q2s32, q3s32, q5s32, q6s32, q8s32, q9s32; + int32x4_t q10s32, q11s32, q12s32, q13s32, q15s32; + + d0s16 = vdup_n_s16(cospi_28_64); + d1s16 = vdup_n_s16(cospi_4_64); + d2s16 = vdup_n_s16(cospi_12_64); + d3s16 = vdup_n_s16(cospi_20_64); + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + q2s32 = vmull_s16(d18s16, d0s16); + q3s32 = vmull_s16(d19s16, d0s16); + q5s32 = vmull_s16(d26s16, d2s16); + q6s32 = vmull_s16(d27s16, d2s16); + + q2s32 = vmlsl_s16(q2s32, d30s16, d1s16); + q3s32 = vmlsl_s16(q3s32, d31s16, d1s16); + q5s32 = vmlsl_s16(q5s32, d22s16, d3s16); + q6s32 = vmlsl_s16(q6s32, d23s16, d3s16); + + d8s16 = vqrshrn_n_s32(q2s32, 14); + d9s16 = vqrshrn_n_s32(q3s32, 14); + d10s16 = vqrshrn_n_s32(q5s32, 14); + d11s16 = vqrshrn_n_s32(q6s32, 14); + q4s16 = vcombine_s16(d8s16, d9s16); + q5s16 = vcombine_s16(d10s16, d11s16); + + q2s32 = vmull_s16(d18s16, d1s16); + q3s32 = vmull_s16(d19s16, d1s16); + q9s32 = vmull_s16(d26s16, d3s16); + q13s32 = vmull_s16(d27s16, d3s16); + + q2s32 = vmlal_s16(q2s32, d30s16, d0s16); + q3s32 = vmlal_s16(q3s32, d31s16, d0s16); + q9s32 = vmlal_s16(q9s32, d22s16, d2s16); + q13s32 = vmlal_s16(q13s32, d23s16, d2s16); + + d14s16 = vqrshrn_n_s32(q2s32, 14); + d15s16 = vqrshrn_n_s32(q3s32, 14); + d12s16 = vqrshrn_n_s32(q9s32, 14); + d13s16 = vqrshrn_n_s32(q13s32, 14); + q6s16 = vcombine_s16(d12s16, d13s16); + q7s16 = vcombine_s16(d14s16, d15s16); + + d0s16 = vdup_n_s16(cospi_16_64); + + q2s32 = vmull_s16(d16s16, d0s16); + q3s32 = vmull_s16(d17s16, d0s16); + q13s32 = vmull_s16(d16s16, d0s16); + q15s32 = vmull_s16(d17s16, d0s16); + + q2s32 = vmlal_s16(q2s32, d24s16, d0s16); + q3s32 = vmlal_s16(q3s32, d25s16, d0s16); + q13s32 = vmlsl_s16(q13s32, d24s16, d0s16); + q15s32 = vmlsl_s16(q15s32, d25s16, d0s16); + + d0s16 = vdup_n_s16(cospi_24_64); + d1s16 = vdup_n_s16(cospi_8_64); + + d18s16 = vqrshrn_n_s32(q2s32, 14); + d19s16 = vqrshrn_n_s32(q3s32, 14); + d22s16 = vqrshrn_n_s32(q13s32, 14); + d23s16 = vqrshrn_n_s32(q15s32, 14); + *q9s16 = vcombine_s16(d18s16, d19s16); + *q11s16 = vcombine_s16(d22s16, d23s16); + + q2s32 = vmull_s16(d20s16, d0s16); + q3s32 = vmull_s16(d21s16, d0s16); + q8s32 = vmull_s16(d20s16, d1s16); + q12s32 = vmull_s16(d21s16, d1s16); + + q2s32 = vmlsl_s16(q2s32, d28s16, d1s16); + q3s32 = vmlsl_s16(q3s32, d29s16, d1s16); + q8s32 = vmlal_s16(q8s32, d28s16, d0s16); + q12s32 = vmlal_s16(q12s32, d29s16, d0s16); + + d26s16 = vqrshrn_n_s32(q2s32, 14); + d27s16 = vqrshrn_n_s32(q3s32, 14); + d30s16 = vqrshrn_n_s32(q8s32, 14); + d31s16 = vqrshrn_n_s32(q12s32, 14); + *q13s16 = vcombine_s16(d26s16, d27s16); + *q15s16 = vcombine_s16(d30s16, d31s16); + + q0s16 = vaddq_s16(*q9s16, *q15s16); + q1s16 = vaddq_s16(*q11s16, *q13s16); + q2s16 = vsubq_s16(*q11s16, *q13s16); + q3s16 = vsubq_s16(*q9s16, *q15s16); + + *q13s16 = vsubq_s16(q4s16, q5s16); + q4s16 = vaddq_s16(q4s16, q5s16); + *q14s16 = vsubq_s16(q7s16, q6s16); + q7s16 = vaddq_s16(q7s16, q6s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + + d16s16 = vdup_n_s16(cospi_16_64); + + q9s32 = vmull_s16(d28s16, d16s16); + q10s32 = vmull_s16(d29s16, d16s16); + q11s32 = vmull_s16(d28s16, d16s16); + q12s32 = vmull_s16(d29s16, d16s16); + + q9s32 = vmlsl_s16(q9s32, d26s16, d16s16); + q10s32 = vmlsl_s16(q10s32, d27s16, d16s16); + q11s32 = vmlal_s16(q11s32, d26s16, d16s16); + q12s32 = vmlal_s16(q12s32, d27s16, d16s16); + + d10s16 = vqrshrn_n_s32(q9s32, 14); + d11s16 = vqrshrn_n_s32(q10s32, 14); + d12s16 = vqrshrn_n_s32(q11s32, 14); + d13s16 = vqrshrn_n_s32(q12s32, 14); + q5s16 = vcombine_s16(d10s16, d11s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + *q8s16 = vaddq_s16(q0s16, q7s16); + *q9s16 = vaddq_s16(q1s16, q6s16); + *q10s16 = vaddq_s16(q2s16, q5s16); + *q11s16 = vaddq_s16(q3s16, q4s16); + *q12s16 = vsubq_s16(q3s16, q4s16); + *q13s16 = vsubq_s16(q2s16, q5s16); + *q14s16 = vsubq_s16(q1s16, q6s16); + *q15s16 = vsubq_s16(q0s16, q7s16); + return; +} + +static INLINE void IADST8X8_1D( + int16x8_t *q8s16, + int16x8_t *q9s16, + int16x8_t *q10s16, + int16x8_t *q11s16, + int16x8_t *q12s16, + int16x8_t *q13s16, + int16x8_t *q14s16, + int16x8_t *q15s16) { + int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; + int16x4_t d8s16, d9s16, d10s16, d11s16, d12s16, d13s16, d14s16, d15s16; + int16x4_t d16s16, d17s16, d18s16, d19s16, d20s16, d21s16, d22s16, d23s16; + int16x4_t d24s16, d25s16, d26s16, d27s16, d28s16, d29s16, d30s16, d31s16; + int16x8_t q2s16, q4s16, q5s16, q6s16; + int32x4_t q0s32, q1s32, q2s32, q3s32, q4s32, q5s32, q6s32, q7s32, q8s32; + int32x4_t q9s32, q10s32, q11s32, q12s32, q13s32, q14s32, q15s32; + + d16s16 = vget_low_s16(*q8s16); + d17s16 = vget_high_s16(*q8s16); + d18s16 = vget_low_s16(*q9s16); + d19s16 = vget_high_s16(*q9s16); + d20s16 = vget_low_s16(*q10s16); + d21s16 = vget_high_s16(*q10s16); + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + d26s16 = vget_low_s16(*q13s16); + d27s16 = vget_high_s16(*q13s16); + d28s16 = vget_low_s16(*q14s16); + d29s16 = vget_high_s16(*q14s16); + d30s16 = vget_low_s16(*q15s16); + d31s16 = vget_high_s16(*q15s16); + + d14s16 = vdup_n_s16(cospi_2_64); + d15s16 = vdup_n_s16(cospi_30_64); + + q1s32 = vmull_s16(d30s16, d14s16); + q2s32 = vmull_s16(d31s16, d14s16); + q3s32 = vmull_s16(d30s16, d15s16); + q4s32 = vmull_s16(d31s16, d15s16); + + d30s16 = vdup_n_s16(cospi_18_64); + d31s16 = vdup_n_s16(cospi_14_64); + + q1s32 = vmlal_s16(q1s32, d16s16, d15s16); + q2s32 = vmlal_s16(q2s32, d17s16, d15s16); + q3s32 = vmlsl_s16(q3s32, d16s16, d14s16); + q4s32 = vmlsl_s16(q4s32, d17s16, d14s16); + + q5s32 = vmull_s16(d22s16, d30s16); + q6s32 = vmull_s16(d23s16, d30s16); + q7s32 = vmull_s16(d22s16, d31s16); + q8s32 = vmull_s16(d23s16, d31s16); + + q5s32 = vmlal_s16(q5s32, d24s16, d31s16); + q6s32 = vmlal_s16(q6s32, d25s16, d31s16); + q7s32 = vmlsl_s16(q7s32, d24s16, d30s16); + q8s32 = vmlsl_s16(q8s32, d25s16, d30s16); + + q11s32 = vaddq_s32(q1s32, q5s32); + q12s32 = vaddq_s32(q2s32, q6s32); + q1s32 = vsubq_s32(q1s32, q5s32); + q2s32 = vsubq_s32(q2s32, q6s32); + + d22s16 = vqrshrn_n_s32(q11s32, 14); + d23s16 = vqrshrn_n_s32(q12s32, 14); + *q11s16 = vcombine_s16(d22s16, d23s16); + + q12s32 = vaddq_s32(q3s32, q7s32); + q15s32 = vaddq_s32(q4s32, q8s32); + q3s32 = vsubq_s32(q3s32, q7s32); + q4s32 = vsubq_s32(q4s32, q8s32); + + d2s16 = vqrshrn_n_s32(q1s32, 14); + d3s16 = vqrshrn_n_s32(q2s32, 14); + d24s16 = vqrshrn_n_s32(q12s32, 14); + d25s16 = vqrshrn_n_s32(q15s32, 14); + d6s16 = vqrshrn_n_s32(q3s32, 14); + d7s16 = vqrshrn_n_s32(q4s32, 14); + *q12s16 = vcombine_s16(d24s16, d25s16); + + d0s16 = vdup_n_s16(cospi_10_64); + d1s16 = vdup_n_s16(cospi_22_64); + q4s32 = vmull_s16(d26s16, d0s16); + q5s32 = vmull_s16(d27s16, d0s16); + q2s32 = vmull_s16(d26s16, d1s16); + q6s32 = vmull_s16(d27s16, d1s16); + + d30s16 = vdup_n_s16(cospi_26_64); + d31s16 = vdup_n_s16(cospi_6_64); + + q4s32 = vmlal_s16(q4s32, d20s16, d1s16); + q5s32 = vmlal_s16(q5s32, d21s16, d1s16); + q2s32 = vmlsl_s16(q2s32, d20s16, d0s16); + q6s32 = vmlsl_s16(q6s32, d21s16, d0s16); + + q0s32 = vmull_s16(d18s16, d30s16); + q13s32 = vmull_s16(d19s16, d30s16); + + q0s32 = vmlal_s16(q0s32, d28s16, d31s16); + q13s32 = vmlal_s16(q13s32, d29s16, d31s16); + + q10s32 = vmull_s16(d18s16, d31s16); + q9s32 = vmull_s16(d19s16, d31s16); + + q10s32 = vmlsl_s16(q10s32, d28s16, d30s16); + q9s32 = vmlsl_s16(q9s32, d29s16, d30s16); + + q14s32 = vaddq_s32(q2s32, q10s32); + q15s32 = vaddq_s32(q6s32, q9s32); + q2s32 = vsubq_s32(q2s32, q10s32); + q6s32 = vsubq_s32(q6s32, q9s32); + + d28s16 = vqrshrn_n_s32(q14s32, 14); + d29s16 = vqrshrn_n_s32(q15s32, 14); + d4s16 = vqrshrn_n_s32(q2s32, 14); + d5s16 = vqrshrn_n_s32(q6s32, 14); + *q14s16 = vcombine_s16(d28s16, d29s16); + + q9s32 = vaddq_s32(q4s32, q0s32); + q10s32 = vaddq_s32(q5s32, q13s32); + q4s32 = vsubq_s32(q4s32, q0s32); + q5s32 = vsubq_s32(q5s32, q13s32); + + d30s16 = vdup_n_s16(cospi_8_64); + d31s16 = vdup_n_s16(cospi_24_64); + + d18s16 = vqrshrn_n_s32(q9s32, 14); + d19s16 = vqrshrn_n_s32(q10s32, 14); + d8s16 = vqrshrn_n_s32(q4s32, 14); + d9s16 = vqrshrn_n_s32(q5s32, 14); + *q9s16 = vcombine_s16(d18s16, d19s16); + + q5s32 = vmull_s16(d2s16, d30s16); + q6s32 = vmull_s16(d3s16, d30s16); + q7s32 = vmull_s16(d2s16, d31s16); + q0s32 = vmull_s16(d3s16, d31s16); + + q5s32 = vmlal_s16(q5s32, d6s16, d31s16); + q6s32 = vmlal_s16(q6s32, d7s16, d31s16); + q7s32 = vmlsl_s16(q7s32, d6s16, d30s16); + q0s32 = vmlsl_s16(q0s32, d7s16, d30s16); + + q1s32 = vmull_s16(d4s16, d30s16); + q3s32 = vmull_s16(d5s16, d30s16); + q10s32 = vmull_s16(d4s16, d31s16); + q2s32 = vmull_s16(d5s16, d31s16); + + q1s32 = vmlsl_s16(q1s32, d8s16, d31s16); + q3s32 = vmlsl_s16(q3s32, d9s16, d31s16); + q10s32 = vmlal_s16(q10s32, d8s16, d30s16); + q2s32 = vmlal_s16(q2s32, d9s16, d30s16); + + *q8s16 = vaddq_s16(*q11s16, *q9s16); + *q11s16 = vsubq_s16(*q11s16, *q9s16); + q4s16 = vaddq_s16(*q12s16, *q14s16); + *q12s16 = vsubq_s16(*q12s16, *q14s16); + + q14s32 = vaddq_s32(q5s32, q1s32); + q15s32 = vaddq_s32(q6s32, q3s32); + q5s32 = vsubq_s32(q5s32, q1s32); + q6s32 = vsubq_s32(q6s32, q3s32); + + d18s16 = vqrshrn_n_s32(q14s32, 14); + d19s16 = vqrshrn_n_s32(q15s32, 14); + d10s16 = vqrshrn_n_s32(q5s32, 14); + d11s16 = vqrshrn_n_s32(q6s32, 14); + *q9s16 = vcombine_s16(d18s16, d19s16); + + q1s32 = vaddq_s32(q7s32, q10s32); + q3s32 = vaddq_s32(q0s32, q2s32); + q7s32 = vsubq_s32(q7s32, q10s32); + q0s32 = vsubq_s32(q0s32, q2s32); + + d28s16 = vqrshrn_n_s32(q1s32, 14); + d29s16 = vqrshrn_n_s32(q3s32, 14); + d14s16 = vqrshrn_n_s32(q7s32, 14); + d15s16 = vqrshrn_n_s32(q0s32, 14); + *q14s16 = vcombine_s16(d28s16, d29s16); + + d30s16 = vdup_n_s16(cospi_16_64); + + d22s16 = vget_low_s16(*q11s16); + d23s16 = vget_high_s16(*q11s16); + q2s32 = vmull_s16(d22s16, d30s16); + q3s32 = vmull_s16(d23s16, d30s16); + q13s32 = vmull_s16(d22s16, d30s16); + q1s32 = vmull_s16(d23s16, d30s16); + + d24s16 = vget_low_s16(*q12s16); + d25s16 = vget_high_s16(*q12s16); + q2s32 = vmlal_s16(q2s32, d24s16, d30s16); + q3s32 = vmlal_s16(q3s32, d25s16, d30s16); + q13s32 = vmlsl_s16(q13s32, d24s16, d30s16); + q1s32 = vmlsl_s16(q1s32, d25s16, d30s16); + + d4s16 = vqrshrn_n_s32(q2s32, 14); + d5s16 = vqrshrn_n_s32(q3s32, 14); + d24s16 = vqrshrn_n_s32(q13s32, 14); + d25s16 = vqrshrn_n_s32(q1s32, 14); + q2s16 = vcombine_s16(d4s16, d5s16); + *q12s16 = vcombine_s16(d24s16, d25s16); + + q13s32 = vmull_s16(d10s16, d30s16); + q1s32 = vmull_s16(d11s16, d30s16); + q11s32 = vmull_s16(d10s16, d30s16); + q0s32 = vmull_s16(d11s16, d30s16); + + q13s32 = vmlal_s16(q13s32, d14s16, d30s16); + q1s32 = vmlal_s16(q1s32, d15s16, d30s16); + q11s32 = vmlsl_s16(q11s32, d14s16, d30s16); + q0s32 = vmlsl_s16(q0s32, d15s16, d30s16); + + d20s16 = vqrshrn_n_s32(q13s32, 14); + d21s16 = vqrshrn_n_s32(q1s32, 14); + d12s16 = vqrshrn_n_s32(q11s32, 14); + d13s16 = vqrshrn_n_s32(q0s32, 14); + *q10s16 = vcombine_s16(d20s16, d21s16); + q6s16 = vcombine_s16(d12s16, d13s16); + + q5s16 = vdupq_n_s16(0); + + *q9s16 = vsubq_s16(q5s16, *q9s16); + *q11s16 = vsubq_s16(q5s16, q2s16); + *q13s16 = vsubq_s16(q5s16, q6s16); + *q15s16 = vsubq_s16(q5s16, q4s16); + return; +} + +void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, + int dest_stride, int tx_type) { + int i; + uint8_t *d1, *d2; + uint8x8_t d0u8, d1u8, d2u8, d3u8; + uint64x1_t d0u64, d1u64, d2u64, d3u64; + int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16, q13s16, q14s16, q15s16; + uint16x8_t q8u16, q9u16, q10u16, q11u16; + + q8s16 = vld1q_s16(input); + q9s16 = vld1q_s16(input + 8); + q10s16 = vld1q_s16(input + 8 * 2); + q11s16 = vld1q_s16(input + 8 * 3); + q12s16 = vld1q_s16(input + 8 * 4); + q13s16 = vld1q_s16(input + 8 * 5); + q14s16 = vld1q_s16(input + 8 * 6); + q15s16 = vld1q_s16(input + 8 * 7); + + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + switch (tx_type) { + case 0: // idct_idct is not supported. Fall back to C + vp9_iht8x8_64_add_c(input, dest, dest_stride, tx_type); + return; + break; + case 1: // iadst_idct + // generate IDCT constants + // GENERATE_IDCT_CONSTANTS + + // first transform rows + IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // transpose the matrix + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // generate IADST constants + // GENERATE_IADST_CONSTANTS + + // then transform columns + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + break; + case 2: // idct_iadst + // generate IADST constants + // GENERATE_IADST_CONSTANTS + + // first transform rows + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // transpose the matrix + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // generate IDCT constants + // GENERATE_IDCT_CONSTANTS + + // then transform columns + IDCT8x8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + break; + case 3: // iadst_iadst + // generate IADST constants + // GENERATE_IADST_CONSTANTS + + // first transform rows + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // transpose the matrix + TRANSPOSE8X8(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + + // then transform columns + IADST8X8_1D(&q8s16, &q9s16, &q10s16, &q11s16, + &q12s16, &q13s16, &q14s16, &q15s16); + break; + default: // iadst_idct + assert(0); + break; + } + + q8s16 = vrshrq_n_s16(q8s16, 5); + q9s16 = vrshrq_n_s16(q9s16, 5); + q10s16 = vrshrq_n_s16(q10s16, 5); + q11s16 = vrshrq_n_s16(q11s16, 5); + q12s16 = vrshrq_n_s16(q12s16, 5); + q13s16 = vrshrq_n_s16(q13s16, 5); + q14s16 = vrshrq_n_s16(q14s16, 5); + q15s16 = vrshrq_n_s16(q15s16, 5); + + for (d1 = d2 = dest, i = 0; i < 2; i++) { + if (i != 0) { + q8s16 = q12s16; + q9s16 = q13s16; + q10s16 = q14s16; + q11s16 = q15s16; + } + + d0u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d1u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d2u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + d3u64 = vld1_u64((uint64_t *)d1); + d1 += dest_stride; + + q8u16 = vaddw_u8(vreinterpretq_u16_s16(q8s16), + vreinterpret_u8_u64(d0u64)); + q9u16 = vaddw_u8(vreinterpretq_u16_s16(q9s16), + vreinterpret_u8_u64(d1u64)); + q10u16 = vaddw_u8(vreinterpretq_u16_s16(q10s16), + vreinterpret_u8_u64(d2u64)); + q11u16 = vaddw_u8(vreinterpretq_u16_s16(q11s16), + vreinterpret_u8_u64(d3u64)); + + d0u8 = vqmovun_s16(vreinterpretq_s16_u16(q8u16)); + d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q9u16)); + d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q10u16)); + d3u8 = vqmovun_s16(vreinterpretq_s16_u16(q11u16)); + + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d0u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d1u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d2u8)); + d2 += dest_stride; + vst1_u64((uint64_t *)d2, vreinterpret_u64_u8(d3u8)); + d2 += dest_stride; + } + return; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_alloccommon.c b/thirdparty/libvpx/vp9/common/vp9_alloccommon.c new file mode 100644 index 0000000000..7dd1005d3f --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_alloccommon.c @@ -0,0 +1,201 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "vpx_mem/vpx_mem.h" + +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_entropymv.h" +#include "vp9/common/vp9_onyxc_int.h" + +// TODO(hkuang): Don't need to lock the whole pool after implementing atomic +// frame reference count. +void lock_buffer_pool(BufferPool *const pool) { +#if CONFIG_MULTITHREAD + pthread_mutex_lock(&pool->pool_mutex); +#else + (void)pool; +#endif +} + +void unlock_buffer_pool(BufferPool *const pool) { +#if CONFIG_MULTITHREAD + pthread_mutex_unlock(&pool->pool_mutex); +#else + (void)pool; +#endif +} + +void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) { + const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); + const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); + + cm->mi_cols = aligned_width >> MI_SIZE_LOG2; + cm->mi_rows = aligned_height >> MI_SIZE_LOG2; + cm->mi_stride = calc_mi_size(cm->mi_cols); + + cm->mb_cols = (cm->mi_cols + 1) >> 1; + cm->mb_rows = (cm->mi_rows + 1) >> 1; + cm->MBs = cm->mb_rows * cm->mb_cols; +} + +static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) { + int i; + + for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { + cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1); + if (cm->seg_map_array[i] == NULL) + return 1; + } + cm->seg_map_alloc_size = seg_map_size; + + // Init the index. + cm->seg_map_idx = 0; + cm->prev_seg_map_idx = 1; + + cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; + if (!cm->frame_parallel_decode) + cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; + + return 0; +} + +static void free_seg_map(VP9_COMMON *cm) { + int i; + + for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { + vpx_free(cm->seg_map_array[i]); + cm->seg_map_array[i] = NULL; + } + + cm->current_frame_seg_map = NULL; + + if (!cm->frame_parallel_decode) { + cm->last_frame_seg_map = NULL; + } +} + +void vp9_free_ref_frame_buffers(BufferPool *pool) { + int i; + + for (i = 0; i < FRAME_BUFFERS; ++i) { + if (pool->frame_bufs[i].ref_count > 0 && + pool->frame_bufs[i].raw_frame_buffer.data != NULL) { + pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer); + pool->frame_bufs[i].ref_count = 0; + } + vpx_free(pool->frame_bufs[i].mvs); + pool->frame_bufs[i].mvs = NULL; + vpx_free_frame_buffer(&pool->frame_bufs[i].buf); + } +} + +void vp9_free_postproc_buffers(VP9_COMMON *cm) { +#if CONFIG_VP9_POSTPROC + vpx_free_frame_buffer(&cm->post_proc_buffer); + vpx_free_frame_buffer(&cm->post_proc_buffer_int); +#else + (void)cm; +#endif +} + +void vp9_free_context_buffers(VP9_COMMON *cm) { + cm->free_mi(cm); + free_seg_map(cm); + vpx_free(cm->above_context); + cm->above_context = NULL; + vpx_free(cm->above_seg_context); + cm->above_seg_context = NULL; + vpx_free(cm->lf.lfm); + cm->lf.lfm = NULL; +} + + +int vp9_alloc_loop_filter(VP9_COMMON *cm) { + vpx_free(cm->lf.lfm); + // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The + // stride and rows are rounded up / truncated to a multiple of 8. + cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3; + cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc( + ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride, + sizeof(*cm->lf.lfm)); + if (!cm->lf.lfm) + return 1; + return 0; +} + +int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { + int new_mi_size; + + vp9_set_mb_mi(cm, width, height); + new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows); + if (cm->mi_alloc_size < new_mi_size) { + cm->free_mi(cm); + if (cm->alloc_mi(cm, new_mi_size)) + goto fail; + } + + if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) { + // Create the segmentation map structure and set to 0. + free_seg_map(cm); + if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) + goto fail; + } + + if (cm->above_context_alloc_cols < cm->mi_cols) { + vpx_free(cm->above_context); + cm->above_context = (ENTROPY_CONTEXT *)vpx_calloc( + 2 * mi_cols_aligned_to_sb(cm->mi_cols) * MAX_MB_PLANE, + sizeof(*cm->above_context)); + if (!cm->above_context) goto fail; + + vpx_free(cm->above_seg_context); + cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc( + mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context)); + if (!cm->above_seg_context) goto fail; + cm->above_context_alloc_cols = cm->mi_cols; + } + + if (vp9_alloc_loop_filter(cm)) + goto fail; + + return 0; + + fail: + vp9_free_context_buffers(cm); + return 1; +} + +void vp9_remove_common(VP9_COMMON *cm) { + vp9_free_context_buffers(cm); + + vpx_free(cm->fc); + cm->fc = NULL; + vpx_free(cm->frame_contexts); + cm->frame_contexts = NULL; +} + +void vp9_init_context_buffers(VP9_COMMON *cm) { + cm->setup_mi(cm); + if (cm->last_frame_seg_map && !cm->frame_parallel_decode) + memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols); +} + +void vp9_swap_current_and_last_seg_map(VP9_COMMON *cm) { + // Swap indices. + const int tmp = cm->seg_map_idx; + cm->seg_map_idx = cm->prev_seg_map_idx; + cm->prev_seg_map_idx = tmp; + + cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; + cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_alloccommon.h b/thirdparty/libvpx/vp9/common/vp9_alloccommon.h new file mode 100644 index 0000000000..e53955b998 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_alloccommon.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_COMMON_VP9_ALLOCCOMMON_H_ +#define VP9_COMMON_VP9_ALLOCCOMMON_H_ + +#define INVALID_IDX -1 // Invalid buffer index. + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9Common; +struct BufferPool; + +void vp9_remove_common(struct VP9Common *cm); + +int vp9_alloc_loop_filter(struct VP9Common *cm); +int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height); +void vp9_init_context_buffers(struct VP9Common *cm); +void vp9_free_context_buffers(struct VP9Common *cm); + +void vp9_free_ref_frame_buffers(struct BufferPool *pool); +void vp9_free_postproc_buffers(struct VP9Common *cm); + +int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height); +void vp9_free_state_buffers(struct VP9Common *cm); + +void vp9_set_mb_mi(struct VP9Common *cm, int width, int height); + +void vp9_swap_current_and_last_seg_map(struct VP9Common *cm); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ALLOCCOMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_blockd.c b/thirdparty/libvpx/vp9/common/vp9_blockd.c new file mode 100644 index 0000000000..7bab27d4fd --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_blockd.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_blockd.h" + +PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, + const MODE_INFO *left_mi, int b) { + if (b == 0 || b == 2) { + if (!left_mi || is_inter_block(left_mi)) + return DC_PRED; + + return get_y_mode(left_mi, b + 1); + } else { + assert(b == 1 || b == 3); + return cur_mi->bmi[b - 1].as_mode; + } +} + +PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, + const MODE_INFO *above_mi, int b) { + if (b == 0 || b == 1) { + if (!above_mi || is_inter_block(above_mi)) + return DC_PRED; + + return get_y_mode(above_mi, b + 2); + } else { + assert(b == 2 || b == 3); + return cur_mi->bmi[b - 2].as_mode; + } +} + +void vp9_foreach_transformed_block_in_plane( + const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, + foreach_transformed_block_visitor visit, void *arg) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const MODE_INFO* mi = xd->mi[0]; + // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") + // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 + // transform size varies per plane, look it up in a common way. + const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) + : mi->tx_size; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int step = 1 << (tx_size << 1); + int i = 0, r, c; + + // If mb_to_right_edge is < 0 we are in a situation in which + // the current block size extends into the UMV and we won't + // visit the sub blocks that are wholly within the UMV. + const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : + xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : + xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + const int extra_step = ((num_4x4_w - max_blocks_wide) >> tx_size) * step; + + // Keep track of the row and column of the blocks we use so that we know + // if we are in the unrestricted motion border. + for (r = 0; r < max_blocks_high; r += (1 << tx_size)) { + // Skip visiting the sub blocks that are wholly within the UMV. + for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) { + visit(plane, i, plane_bsize, tx_size, arg); + i += step; + } + i += extra_step; + } +} + +void vp9_foreach_transformed_block(const MACROBLOCKD* const xd, + BLOCK_SIZE bsize, + foreach_transformed_block_visitor visit, + void *arg) { + int plane; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) + vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg); +} + +void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, + int aoff, int loff) { + ENTROPY_CONTEXT *const a = pd->above_context + aoff; + ENTROPY_CONTEXT *const l = pd->left_context + loff; + const int tx_size_in_blocks = 1 << tx_size; + + // above + if (has_eob && xd->mb_to_right_edge < 0) { + int i; + const int blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize] + + (xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + int above_contexts = tx_size_in_blocks; + if (above_contexts + aoff > blocks_wide) + above_contexts = blocks_wide - aoff; + + for (i = 0; i < above_contexts; ++i) + a[i] = has_eob; + for (i = above_contexts; i < tx_size_in_blocks; ++i) + a[i] = 0; + } else { + memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + } + + // left + if (has_eob && xd->mb_to_bottom_edge < 0) { + int i; + const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] + + (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + int left_contexts = tx_size_in_blocks; + if (left_contexts + loff > blocks_high) + left_contexts = blocks_high - loff; + + for (i = 0; i < left_contexts; ++i) + l[i] = has_eob; + for (i = left_contexts; i < tx_size_in_blocks; ++i) + l[i] = 0; + } else { + memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); + } +} + +void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) { + int i; + + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].subsampling_x = i ? ss_x : 0; + xd->plane[i].subsampling_y = i ? ss_y : 0; + } +} diff --git a/thirdparty/libvpx/vp9/common/vp9_blockd.h b/thirdparty/libvpx/vp9/common/vp9_blockd.h new file mode 100644 index 0000000000..3d26fb2b5d --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_blockd.h @@ -0,0 +1,305 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_COMMON_VP9_BLOCKD_H_ +#define VP9_COMMON_VP9_BLOCKD_H_ + +#include "./vpx_config.h" + +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_ports/mem.h" +#include "vpx_scale/yv12config.h" + +#include "vp9/common/vp9_common_data.h" +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_mv.h" +#include "vp9/common/vp9_scale.h" +#include "vp9/common/vp9_seg_common.h" +#include "vp9/common/vp9_tile_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_MB_PLANE 3 + +typedef enum { + KEY_FRAME = 0, + INTER_FRAME = 1, + FRAME_TYPES, +} FRAME_TYPE; + +static INLINE int is_inter_mode(PREDICTION_MODE mode) { + return mode >= NEARESTMV && mode <= NEWMV; +} + +/* For keyframes, intra block modes are predicted by the (already decoded) + modes for the Y blocks to the left and above us; for interframes, there + is a single probability table. */ + +typedef struct { + PREDICTION_MODE as_mode; + int_mv as_mv[2]; // first, second inter predictor motion vectors +} b_mode_info; + +// Note that the rate-distortion optimization loop, bit-stream writer, and +// decoder implementation modules critically rely on the defined entry values +// specified herein. They should be refactored concurrently. + +#define NONE -1 +#define INTRA_FRAME 0 +#define LAST_FRAME 1 +#define GOLDEN_FRAME 2 +#define ALTREF_FRAME 3 +#define MAX_REF_FRAMES 4 +typedef int8_t MV_REFERENCE_FRAME; + +// This structure now relates to 8x8 block regions. +typedef struct MODE_INFO { + // Common for both INTER and INTRA blocks + BLOCK_SIZE sb_type; + PREDICTION_MODE mode; + TX_SIZE tx_size; + int8_t skip; + int8_t segment_id; + int8_t seg_id_predicted; // valid only when temporal_update is enabled + + // Only for INTRA blocks + PREDICTION_MODE uv_mode; + + // Only for INTER blocks + INTERP_FILTER interp_filter; + MV_REFERENCE_FRAME ref_frame[2]; + + // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead. + int_mv mv[2]; + + b_mode_info bmi[4]; +} MODE_INFO; + +static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) { + return mi->sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode + : mi->mode; +} + +static INLINE int is_inter_block(const MODE_INFO *mi) { + return mi->ref_frame[0] > INTRA_FRAME; +} + +static INLINE int has_second_ref(const MODE_INFO *mi) { + return mi->ref_frame[1] > INTRA_FRAME; +} + +PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, + const MODE_INFO *left_mi, int b); + +PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, + const MODE_INFO *above_mi, int b); + +enum mv_precision { + MV_PRECISION_Q3, + MV_PRECISION_Q4 +}; + +struct buf_2d { + uint8_t *buf; + int stride; +}; + +struct macroblockd_plane { + tran_low_t *dqcoeff; + int subsampling_x; + int subsampling_y; + struct buf_2d dst; + struct buf_2d pre[2]; + ENTROPY_CONTEXT *above_context; + ENTROPY_CONTEXT *left_context; + int16_t seg_dequant[MAX_SEGMENTS][2]; + + // number of 4x4s in current block + uint16_t n4_w, n4_h; + // log2 of n4_w, n4_h + uint8_t n4_wl, n4_hl; + + // encoder + const int16_t *dequant; +}; + +#define BLOCK_OFFSET(x, i) ((x) + (i) * 16) + +typedef struct RefBuffer { + // TODO(dkovalev): idx is not really required and should be removed, now it + // is used in vp9_onyxd_if.c + int idx; + YV12_BUFFER_CONFIG *buf; + struct scale_factors sf; +} RefBuffer; + +typedef struct macroblockd { + struct macroblockd_plane plane[MAX_MB_PLANE]; + uint8_t bmode_blocks_wl; + uint8_t bmode_blocks_hl; + + FRAME_COUNTS *counts; + TileInfo tile; + + int mi_stride; + + MODE_INFO **mi; + MODE_INFO *left_mi; + MODE_INFO *above_mi; + + unsigned int max_blocks_wide; + unsigned int max_blocks_high; + + const vpx_prob (*partition_probs)[PARTITION_TYPES - 1]; + + /* Distance of MB away from frame edges */ + int mb_to_left_edge; + int mb_to_right_edge; + int mb_to_top_edge; + int mb_to_bottom_edge; + + FRAME_CONTEXT *fc; + + /* pointers to reference frames */ + RefBuffer *block_refs[2]; + + /* pointer to current frame */ + const YV12_BUFFER_CONFIG *cur_buf; + + ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; + ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; + + PARTITION_CONTEXT *above_seg_context; + PARTITION_CONTEXT left_seg_context[8]; + +#if CONFIG_VP9_HIGHBITDEPTH + /* Bit depth: 8, 10, 12 */ + int bd; +#endif + + int lossless; + int corrupted; + + struct vpx_internal_error_info *error_info; +} MACROBLOCKD; + +static INLINE PLANE_TYPE get_plane_type(int plane) { + return (PLANE_TYPE)(plane > 0); +} + +static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, + PARTITION_TYPE partition) { + return subsize_lookup[partition][bsize]; +} + +extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES]; + +static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, + const MACROBLOCKD *xd) { + const MODE_INFO *const mi = xd->mi[0]; + + if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) + return DCT_DCT; + + return intra_mode_to_tx_type_lookup[mi->mode]; +} + +static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type, + const MACROBLOCKD *xd, int ib) { + const MODE_INFO *const mi = xd->mi[0]; + + if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) + return DCT_DCT; + + return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)]; +} + +void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y); + +static INLINE TX_SIZE get_uv_tx_size_impl(TX_SIZE y_tx_size, BLOCK_SIZE bsize, + int xss, int yss) { + if (bsize < BLOCK_8X8) { + return TX_4X4; + } else { + const BLOCK_SIZE plane_bsize = ss_size_lookup[bsize][xss][yss]; + return VPXMIN(y_tx_size, max_txsize_lookup[plane_bsize]); + } +} + +static INLINE TX_SIZE get_uv_tx_size(const MODE_INFO *mi, + const struct macroblockd_plane *pd) { + return get_uv_tx_size_impl(mi->tx_size, mi->sb_type, pd->subsampling_x, + pd->subsampling_y); +} + +static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, + const struct macroblockd_plane *pd) { + return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y]; +} + +static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) { + struct macroblockd_plane *const pd = &xd->plane[i]; + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); + memset(pd->above_context, 0, + sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]); + memset(pd->left_context, 0, + sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]); + } +} + +static INLINE const vpx_prob *get_y_mode_probs(const MODE_INFO *mi, + const MODE_INFO *above_mi, + const MODE_INFO *left_mi, + int block) { + const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block); + const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block); + return vp9_kf_y_mode_prob[above][left]; +} + +typedef void (*foreach_transformed_block_visitor)(int plane, int block, + BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, + void *arg); + +void vp9_foreach_transformed_block_in_plane( + const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, + foreach_transformed_block_visitor visit, void *arg); + + +void vp9_foreach_transformed_block( + const MACROBLOCKD* const xd, BLOCK_SIZE bsize, + foreach_transformed_block_visitor visit, void *arg); + +static INLINE void txfrm_block_to_raster_xy(BLOCK_SIZE plane_bsize, + TX_SIZE tx_size, int block, + int *x, int *y) { + const int bwl = b_width_log2_lookup[plane_bsize]; + const int tx_cols_log2 = bwl - tx_size; + const int tx_cols = 1 << tx_cols_log2; + const int raster_mb = block >> (tx_size << 1); + *x = (raster_mb & (tx_cols - 1)) << tx_size; + *y = (raster_mb >> tx_cols_log2) << tx_size; +} + +void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, + BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, + int aoff, int loff); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_BLOCKD_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_common.h b/thirdparty/libvpx/vp9/common/vp9_common.h new file mode 100644 index 0000000000..908fa80a31 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_common.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_COMMON_H_ +#define VP9_COMMON_VP9_COMMON_H_ + +/* Interface header for common constant data structures and lookup tables */ + +#include <assert.h> + +#include "./vpx_config.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx/vpx_integer.h" +#include "vpx_ports/bitops.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Only need this for fixed-size arrays, for structs just assign. +#define vp9_copy(dest, src) { \ + assert(sizeof(dest) == sizeof(src)); \ + memcpy(dest, src, sizeof(src)); \ + } + +// Use this for variably-sized arrays. +#define vp9_copy_array(dest, src, n) { \ + assert(sizeof(*dest) == sizeof(*src)); \ + memcpy(dest, src, n * sizeof(*src)); \ + } + +#define vp9_zero(dest) memset(&(dest), 0, sizeof(dest)) +#define vp9_zero_array(dest, n) memset(dest, 0, n * sizeof(*dest)) + +static INLINE int get_unsigned_bits(unsigned int num_values) { + return num_values > 0 ? get_msb(num_values) + 1 : 0; +} + +#if CONFIG_DEBUG +#define CHECK_MEM_ERROR(cm, lval, expr) do { \ + lval = (expr); \ + if (!lval) \ + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ + "Failed to allocate "#lval" at %s:%d", \ + __FILE__, __LINE__); \ + } while (0) +#else +#define CHECK_MEM_ERROR(cm, lval, expr) do { \ + lval = (expr); \ + if (!lval) \ + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, \ + "Failed to allocate "#lval); \ + } while (0) +#endif + +#define VP9_SYNC_CODE_0 0x49 +#define VP9_SYNC_CODE_1 0x83 +#define VP9_SYNC_CODE_2 0x42 + +#define VP9_FRAME_MARKER 0x2 + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_common_data.c b/thirdparty/libvpx/vp9/common/vp9_common_data.c new file mode 100644 index 0000000000..3409d04844 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_common_data.c @@ -0,0 +1,176 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_common_data.h" +#include "vpx_dsp/vpx_dsp_common.h" + +// Log 2 conversion lookup tables for block width and height +const uint8_t b_width_log2_lookup[BLOCK_SIZES] = + {0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4}; +const uint8_t b_height_log2_lookup[BLOCK_SIZES] = + {0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4}; +const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = + {1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16}; +const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = + {1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16}; +// Log 2 conversion lookup tables for modeinfo width and height +const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = + {0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3}; +const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = + {1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8}; +const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = + {1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8}; + +// VPXMIN(3, VPXMIN(b_width_log2(bsize), b_height_log2(bsize))) +const uint8_t size_group_lookup[BLOCK_SIZES] = + {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3}; + +const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = + {4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12}; + +const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = { + { // 4X4 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID + }, { // 8X8 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID + }, { // 16X16 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID + }, { // 32X32 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, + PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, + PARTITION_INVALID, PARTITION_INVALID + }, { // 64X64 + // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, + PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, + PARTITION_NONE + } +}; + +const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = { + { // PARTITION_NONE + BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, + BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, + BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, + BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, + BLOCK_64X64, + }, { // PARTITION_HORZ + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_64X32, + }, { // PARTITION_VERT + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_32X64, + }, { // PARTITION_SPLIT + BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID, + BLOCK_32X32, + } +}; + +const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = { + TX_4X4, TX_4X4, TX_4X4, + TX_8X8, TX_8X8, TX_8X8, + TX_16X16, TX_16X16, TX_16X16, + TX_32X32, TX_32X32, TX_32X32, TX_32X32 +}; + +const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = { + BLOCK_4X4, // TX_4X4 + BLOCK_8X8, // TX_8X8 + BLOCK_16X16, // TX_16X16 + BLOCK_32X32, // TX_32X32 +}; + +const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = { + TX_4X4, // ONLY_4X4 + TX_8X8, // ALLOW_8X8 + TX_16X16, // ALLOW_16X16 + TX_32X32, // ALLOW_32X32 + TX_32X32, // TX_MODE_SELECT +}; + +const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = { +// ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 +// ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 + {{BLOCK_4X4, BLOCK_INVALID}, {BLOCK_INVALID, BLOCK_INVALID}}, + {{BLOCK_4X8, BLOCK_4X4}, {BLOCK_INVALID, BLOCK_INVALID}}, + {{BLOCK_8X4, BLOCK_INVALID}, {BLOCK_4X4, BLOCK_INVALID}}, + {{BLOCK_8X8, BLOCK_8X4}, {BLOCK_4X8, BLOCK_4X4}}, + {{BLOCK_8X16, BLOCK_8X8}, {BLOCK_INVALID, BLOCK_4X8}}, + {{BLOCK_16X8, BLOCK_INVALID}, {BLOCK_8X8, BLOCK_8X4}}, + {{BLOCK_16X16, BLOCK_16X8}, {BLOCK_8X16, BLOCK_8X8}}, + {{BLOCK_16X32, BLOCK_16X16}, {BLOCK_INVALID, BLOCK_8X16}}, + {{BLOCK_32X16, BLOCK_INVALID}, {BLOCK_16X16, BLOCK_16X8}}, + {{BLOCK_32X32, BLOCK_32X16}, {BLOCK_16X32, BLOCK_16X16}}, + {{BLOCK_32X64, BLOCK_32X32}, {BLOCK_INVALID, BLOCK_16X32}}, + {{BLOCK_64X32, BLOCK_INVALID}, {BLOCK_32X32, BLOCK_32X16}}, + {{BLOCK_64X64, BLOCK_64X32}, {BLOCK_32X64, BLOCK_32X32}}, +}; + +// Generates 4 bit field in which each bit set to 1 represents +// a blocksize partition 1111 means we split 64x64, 32x32, 16x16 +// and 8x8. 1000 means we just split the 64x64 to 32x32 +const struct { + PARTITION_CONTEXT above; + PARTITION_CONTEXT left; +} partition_context_lookup[BLOCK_SIZES]= { + {15, 15}, // 4X4 - {0b1111, 0b1111} + {15, 14}, // 4X8 - {0b1111, 0b1110} + {14, 15}, // 8X4 - {0b1110, 0b1111} + {14, 14}, // 8X8 - {0b1110, 0b1110} + {14, 12}, // 8X16 - {0b1110, 0b1100} + {12, 14}, // 16X8 - {0b1100, 0b1110} + {12, 12}, // 16X16 - {0b1100, 0b1100} + {12, 8 }, // 16X32 - {0b1100, 0b1000} + {8, 12}, // 32X16 - {0b1000, 0b1100} + {8, 8 }, // 32X32 - {0b1000, 0b1000} + {8, 0 }, // 32X64 - {0b1000, 0b0000} + {0, 8 }, // 64X32 - {0b0000, 0b1000} + {0, 0 }, // 64X64 - {0b0000, 0b0000} +}; + +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH +const uint8_t need_top_left[INTRA_MODES] = { + 0, // DC_PRED + 0, // V_PRED + 0, // H_PRED + 0, // D45_PRED + 1, // D135_PRED + 1, // D117_PRED + 1, // D153_PRED + 0, // D207_PRED + 0, // D63_PRED + 1, // TM_PRED +}; +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vp9/common/vp9_common_data.h b/thirdparty/libvpx/vp9/common/vp9_common_data.h new file mode 100644 index 0000000000..0ae24dad54 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_common_data.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_COMMON_DATA_H_ +#define VP9_COMMON_VP9_COMMON_DATA_H_ + +#include "vp9/common/vp9_enums.h" +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +extern const uint8_t b_width_log2_lookup[BLOCK_SIZES]; +extern const uint8_t b_height_log2_lookup[BLOCK_SIZES]; +extern const uint8_t mi_width_log2_lookup[BLOCK_SIZES]; +extern const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES]; +extern const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES]; +extern const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES]; +extern const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES]; +extern const uint8_t size_group_lookup[BLOCK_SIZES]; +extern const uint8_t num_pels_log2_lookup[BLOCK_SIZES]; +extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZES]; +extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES]; +extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES]; +extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES]; +extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES]; +extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2]; +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH +extern const uint8_t need_top_left[INTRA_MODES]; +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_COMMON_DATA_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_debugmodes.c b/thirdparty/libvpx/vp9/common/vp9_debugmodes.c new file mode 100644 index 0000000000..d9c1fd9686 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_debugmodes.c @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdio.h> + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_onyxc_int.h" + +static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) { + fprintf(f, "%s", str); + fprintf(f, "(Frame %d, Show:%d, Q:%d): \n", cm->current_video_frame, + cm->show_frame, cm->base_qindex); +} +/* This function dereferences a pointer to the mbmi structure + * and uses the passed in member offset to print out the value of an integer + * for each mbmi member value in the mi structure. + */ +static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, + size_t member_offset) { + int mi_row, mi_col; + MODE_INFO **mi = cm->mi_grid_visible; + int rows = cm->mi_rows; + int cols = cm->mi_cols; + char prefix = descriptor[0]; + + log_frame_info(cm, descriptor, file); + for (mi_row = 0; mi_row < rows; mi_row++) { + fprintf(file, "%c ", prefix); + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(file, "%2d ", + *((int*) ((char *) (mi[0]) + + member_offset))); + mi++; + } + fprintf(file, "\n"); + mi += 8; + } + fprintf(file, "\n"); +} + +void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { + int mi_row; + int mi_col; + FILE *mvs = fopen(file, "a"); + MODE_INFO **mi = cm->mi_grid_visible; + int rows = cm->mi_rows; + int cols = cm->mi_cols; + + print_mi_data(cm, mvs, "Partitions:", offsetof(MODE_INFO, sb_type)); + print_mi_data(cm, mvs, "Modes:", offsetof(MODE_INFO, mode)); + print_mi_data(cm, mvs, "Ref frame:", offsetof(MODE_INFO, ref_frame[0])); + print_mi_data(cm, mvs, "Transform:", offsetof(MODE_INFO, tx_size)); + print_mi_data(cm, mvs, "UV Modes:", offsetof(MODE_INFO, uv_mode)); + + // output skip infomation. + log_frame_info(cm, "Skips:", mvs); + for (mi_row = 0; mi_row < rows; mi_row++) { + fprintf(mvs, "S "); + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%2d ", mi[0]->skip); + mi++; + } + fprintf(mvs, "\n"); + mi += 8; + } + fprintf(mvs, "\n"); + + // output motion vectors. + log_frame_info(cm, "Vectors ", mvs); + mi = cm->mi_grid_visible; + for (mi_row = 0; mi_row < rows; mi_row++) { + fprintf(mvs, "V "); + for (mi_col = 0; mi_col < cols; mi_col++) { + fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row, + mi[0]->mv[0].as_mv.col); + mi++; + } + fprintf(mvs, "\n"); + mi += 8; + } + fprintf(mvs, "\n"); + + fclose(mvs); +} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropy.c b/thirdparty/libvpx/vp9/common/vp9_entropy.c new file mode 100644 index 0000000000..7b490af34f --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropy.c @@ -0,0 +1,802 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_entropymode.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx/vpx_integer.h" + +// Unconstrained Node Tree +const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = { + 2, 6, // 0 = LOW_VAL + -TWO_TOKEN, 4, // 1 = TWO + -THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE + 8, 10, // 3 = HIGH_LOW + -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE + 12, 14, // 5 = CAT_THREEFOUR + -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE + -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE +}; + +const vpx_prob vp9_cat1_prob[] = { 159 }; +const vpx_prob vp9_cat2_prob[] = { 165, 145 }; +const vpx_prob vp9_cat3_prob[] = { 173, 148, 140 }; +const vpx_prob vp9_cat4_prob[] = { 176, 155, 140, 135 }; +const vpx_prob vp9_cat5_prob[] = { 180, 157, 141, 134, 130 }; +const vpx_prob vp9_cat6_prob[] = { + 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 +}; +#if CONFIG_VP9_HIGHBITDEPTH +const vpx_prob vp9_cat6_prob_high12[] = { + 255, 255, 255, 255, 254, 254, 254, 252, 249, + 243, 230, 196, 177, 153, 140, 133, 130, 129 +}; +#endif + +const uint8_t vp9_coefband_trans_8x8plus[1024] = { + 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 5, + // beyond MAXBAND_INDEX+1 all values are filled as 5 + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, +}; + +const uint8_t vp9_coefband_trans_4x4[16] = { + 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, +}; + +const uint8_t vp9_pt_energy_class[ENTROPY_TOKENS] = { + 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5 +}; + +// Model obtained from a 2-sided zero-centerd distribuition derived +// from a Pareto distribution. The cdf of the distribution is: +// cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta] +// +// For a given beta and a given probablity of the 1-node, the alpha +// is first solved, and then the {alpha, beta} pair is used to generate +// the probabilities for the rest of the nodes. + +// beta = 8 + +// Every odd line in this table can be generated from the even lines +// by averaging : +// vp9_pareto8_full[l][node] = (vp9_pareto8_full[l-1][node] + +// vp9_pareto8_full[l+1][node] ) >> 1; +const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = { + { 3, 86, 128, 6, 86, 23, 88, 29}, + { 6, 86, 128, 11, 87, 42, 91, 52}, + { 9, 86, 129, 17, 88, 61, 94, 76}, + { 12, 86, 129, 22, 88, 77, 97, 93}, + { 15, 87, 129, 28, 89, 93, 100, 110}, + { 17, 87, 129, 33, 90, 105, 103, 123}, + { 20, 88, 130, 38, 91, 118, 106, 136}, + { 23, 88, 130, 43, 91, 128, 108, 146}, + { 26, 89, 131, 48, 92, 139, 111, 156}, + { 28, 89, 131, 53, 93, 147, 114, 163}, + { 31, 90, 131, 58, 94, 156, 117, 171}, + { 34, 90, 131, 62, 94, 163, 119, 177}, + { 37, 90, 132, 66, 95, 171, 122, 184}, + { 39, 90, 132, 70, 96, 177, 124, 189}, + { 42, 91, 132, 75, 97, 183, 127, 194}, + { 44, 91, 132, 79, 97, 188, 129, 198}, + { 47, 92, 133, 83, 98, 193, 132, 202}, + { 49, 92, 133, 86, 99, 197, 134, 205}, + { 52, 93, 133, 90, 100, 201, 137, 208}, + { 54, 93, 133, 94, 100, 204, 139, 211}, + { 57, 94, 134, 98, 101, 208, 142, 214}, + { 59, 94, 134, 101, 102, 211, 144, 216}, + { 62, 94, 135, 105, 103, 214, 146, 218}, + { 64, 94, 135, 108, 103, 216, 148, 220}, + { 66, 95, 135, 111, 104, 219, 151, 222}, + { 68, 95, 135, 114, 105, 221, 153, 223}, + { 71, 96, 136, 117, 106, 224, 155, 225}, + { 73, 96, 136, 120, 106, 225, 157, 226}, + { 76, 97, 136, 123, 107, 227, 159, 228}, + { 78, 97, 136, 126, 108, 229, 160, 229}, + { 80, 98, 137, 129, 109, 231, 162, 231}, + { 82, 98, 137, 131, 109, 232, 164, 232}, + { 84, 98, 138, 134, 110, 234, 166, 233}, + { 86, 98, 138, 137, 111, 235, 168, 234}, + { 89, 99, 138, 140, 112, 236, 170, 235}, + { 91, 99, 138, 142, 112, 237, 171, 235}, + { 93, 100, 139, 145, 113, 238, 173, 236}, + { 95, 100, 139, 147, 114, 239, 174, 237}, + { 97, 101, 140, 149, 115, 240, 176, 238}, + { 99, 101, 140, 151, 115, 241, 177, 238}, + {101, 102, 140, 154, 116, 242, 179, 239}, + {103, 102, 140, 156, 117, 242, 180, 239}, + {105, 103, 141, 158, 118, 243, 182, 240}, + {107, 103, 141, 160, 118, 243, 183, 240}, + {109, 104, 141, 162, 119, 244, 185, 241}, + {111, 104, 141, 164, 119, 244, 186, 241}, + {113, 104, 142, 166, 120, 245, 187, 242}, + {114, 104, 142, 168, 121, 245, 188, 242}, + {116, 105, 143, 170, 122, 246, 190, 243}, + {118, 105, 143, 171, 122, 246, 191, 243}, + {120, 106, 143, 173, 123, 247, 192, 244}, + {121, 106, 143, 175, 124, 247, 193, 244}, + {123, 107, 144, 177, 125, 248, 195, 244}, + {125, 107, 144, 178, 125, 248, 196, 244}, + {127, 108, 145, 180, 126, 249, 197, 245}, + {128, 108, 145, 181, 127, 249, 198, 245}, + {130, 109, 145, 183, 128, 249, 199, 245}, + {132, 109, 145, 184, 128, 249, 200, 245}, + {134, 110, 146, 186, 129, 250, 201, 246}, + {135, 110, 146, 187, 130, 250, 202, 246}, + {137, 111, 147, 189, 131, 251, 203, 246}, + {138, 111, 147, 190, 131, 251, 204, 246}, + {140, 112, 147, 192, 132, 251, 205, 247}, + {141, 112, 147, 193, 132, 251, 206, 247}, + {143, 113, 148, 194, 133, 251, 207, 247}, + {144, 113, 148, 195, 134, 251, 207, 247}, + {146, 114, 149, 197, 135, 252, 208, 248}, + {147, 114, 149, 198, 135, 252, 209, 248}, + {149, 115, 149, 199, 136, 252, 210, 248}, + {150, 115, 149, 200, 137, 252, 210, 248}, + {152, 115, 150, 201, 138, 252, 211, 248}, + {153, 115, 150, 202, 138, 252, 212, 248}, + {155, 116, 151, 204, 139, 253, 213, 249}, + {156, 116, 151, 205, 139, 253, 213, 249}, + {158, 117, 151, 206, 140, 253, 214, 249}, + {159, 117, 151, 207, 141, 253, 215, 249}, + {161, 118, 152, 208, 142, 253, 216, 249}, + {162, 118, 152, 209, 142, 253, 216, 249}, + {163, 119, 153, 210, 143, 253, 217, 249}, + {164, 119, 153, 211, 143, 253, 217, 249}, + {166, 120, 153, 212, 144, 254, 218, 250}, + {167, 120, 153, 212, 145, 254, 219, 250}, + {168, 121, 154, 213, 146, 254, 220, 250}, + {169, 121, 154, 214, 146, 254, 220, 250}, + {171, 122, 155, 215, 147, 254, 221, 250}, + {172, 122, 155, 216, 147, 254, 221, 250}, + {173, 123, 155, 217, 148, 254, 222, 250}, + {174, 123, 155, 217, 149, 254, 222, 250}, + {176, 124, 156, 218, 150, 254, 223, 250}, + {177, 124, 156, 219, 150, 254, 223, 250}, + {178, 125, 157, 220, 151, 254, 224, 251}, + {179, 125, 157, 220, 151, 254, 224, 251}, + {180, 126, 157, 221, 152, 254, 225, 251}, + {181, 126, 157, 221, 152, 254, 225, 251}, + {183, 127, 158, 222, 153, 254, 226, 251}, + {184, 127, 158, 223, 154, 254, 226, 251}, + {185, 128, 159, 224, 155, 255, 227, 251}, + {186, 128, 159, 224, 155, 255, 227, 251}, + {187, 129, 160, 225, 156, 255, 228, 251}, + {188, 130, 160, 225, 156, 255, 228, 251}, + {189, 131, 160, 226, 157, 255, 228, 251}, + {190, 131, 160, 226, 158, 255, 228, 251}, + {191, 132, 161, 227, 159, 255, 229, 251}, + {192, 132, 161, 227, 159, 255, 229, 251}, + {193, 133, 162, 228, 160, 255, 230, 252}, + {194, 133, 162, 229, 160, 255, 230, 252}, + {195, 134, 163, 230, 161, 255, 231, 252}, + {196, 134, 163, 230, 161, 255, 231, 252}, + {197, 135, 163, 231, 162, 255, 231, 252}, + {198, 135, 163, 231, 162, 255, 231, 252}, + {199, 136, 164, 232, 163, 255, 232, 252}, + {200, 136, 164, 232, 164, 255, 232, 252}, + {201, 137, 165, 233, 165, 255, 233, 252}, + {201, 137, 165, 233, 165, 255, 233, 252}, + {202, 138, 166, 233, 166, 255, 233, 252}, + {203, 138, 166, 233, 166, 255, 233, 252}, + {204, 139, 166, 234, 167, 255, 234, 252}, + {205, 139, 166, 234, 167, 255, 234, 252}, + {206, 140, 167, 235, 168, 255, 235, 252}, + {206, 140, 167, 235, 168, 255, 235, 252}, + {207, 141, 168, 236, 169, 255, 235, 252}, + {208, 141, 168, 236, 170, 255, 235, 252}, + {209, 142, 169, 237, 171, 255, 236, 252}, + {209, 143, 169, 237, 171, 255, 236, 252}, + {210, 144, 169, 237, 172, 255, 236, 252}, + {211, 144, 169, 237, 172, 255, 236, 252}, + {212, 145, 170, 238, 173, 255, 237, 252}, + {213, 145, 170, 238, 173, 255, 237, 252}, + {214, 146, 171, 239, 174, 255, 237, 253}, + {214, 146, 171, 239, 174, 255, 237, 253}, + {215, 147, 172, 240, 175, 255, 238, 253}, + {215, 147, 172, 240, 175, 255, 238, 253}, + {216, 148, 173, 240, 176, 255, 238, 253}, + {217, 148, 173, 240, 176, 255, 238, 253}, + {218, 149, 173, 241, 177, 255, 239, 253}, + {218, 149, 173, 241, 178, 255, 239, 253}, + {219, 150, 174, 241, 179, 255, 239, 253}, + {219, 151, 174, 241, 179, 255, 239, 253}, + {220, 152, 175, 242, 180, 255, 240, 253}, + {221, 152, 175, 242, 180, 255, 240, 253}, + {222, 153, 176, 242, 181, 255, 240, 253}, + {222, 153, 176, 242, 181, 255, 240, 253}, + {223, 154, 177, 243, 182, 255, 240, 253}, + {223, 154, 177, 243, 182, 255, 240, 253}, + {224, 155, 178, 244, 183, 255, 241, 253}, + {224, 155, 178, 244, 183, 255, 241, 253}, + {225, 156, 178, 244, 184, 255, 241, 253}, + {225, 157, 178, 244, 184, 255, 241, 253}, + {226, 158, 179, 244, 185, 255, 242, 253}, + {227, 158, 179, 244, 185, 255, 242, 253}, + {228, 159, 180, 245, 186, 255, 242, 253}, + {228, 159, 180, 245, 186, 255, 242, 253}, + {229, 160, 181, 245, 187, 255, 242, 253}, + {229, 160, 181, 245, 187, 255, 242, 253}, + {230, 161, 182, 246, 188, 255, 243, 253}, + {230, 162, 182, 246, 188, 255, 243, 253}, + {231, 163, 183, 246, 189, 255, 243, 253}, + {231, 163, 183, 246, 189, 255, 243, 253}, + {232, 164, 184, 247, 190, 255, 243, 253}, + {232, 164, 184, 247, 190, 255, 243, 253}, + {233, 165, 185, 247, 191, 255, 244, 253}, + {233, 165, 185, 247, 191, 255, 244, 253}, + {234, 166, 185, 247, 192, 255, 244, 253}, + {234, 167, 185, 247, 192, 255, 244, 253}, + {235, 168, 186, 248, 193, 255, 244, 253}, + {235, 168, 186, 248, 193, 255, 244, 253}, + {236, 169, 187, 248, 194, 255, 244, 253}, + {236, 169, 187, 248, 194, 255, 244, 253}, + {236, 170, 188, 248, 195, 255, 245, 253}, + {236, 170, 188, 248, 195, 255, 245, 253}, + {237, 171, 189, 249, 196, 255, 245, 254}, + {237, 172, 189, 249, 196, 255, 245, 254}, + {238, 173, 190, 249, 197, 255, 245, 254}, + {238, 173, 190, 249, 197, 255, 245, 254}, + {239, 174, 191, 249, 198, 255, 245, 254}, + {239, 174, 191, 249, 198, 255, 245, 254}, + {240, 175, 192, 249, 199, 255, 246, 254}, + {240, 176, 192, 249, 199, 255, 246, 254}, + {240, 177, 193, 250, 200, 255, 246, 254}, + {240, 177, 193, 250, 200, 255, 246, 254}, + {241, 178, 194, 250, 201, 255, 246, 254}, + {241, 178, 194, 250, 201, 255, 246, 254}, + {242, 179, 195, 250, 202, 255, 246, 254}, + {242, 180, 195, 250, 202, 255, 246, 254}, + {242, 181, 196, 250, 203, 255, 247, 254}, + {242, 181, 196, 250, 203, 255, 247, 254}, + {243, 182, 197, 251, 204, 255, 247, 254}, + {243, 183, 197, 251, 204, 255, 247, 254}, + {244, 184, 198, 251, 205, 255, 247, 254}, + {244, 184, 198, 251, 205, 255, 247, 254}, + {244, 185, 199, 251, 206, 255, 247, 254}, + {244, 185, 199, 251, 206, 255, 247, 254}, + {245, 186, 200, 251, 207, 255, 247, 254}, + {245, 187, 200, 251, 207, 255, 247, 254}, + {246, 188, 201, 252, 207, 255, 248, 254}, + {246, 188, 201, 252, 207, 255, 248, 254}, + {246, 189, 202, 252, 208, 255, 248, 254}, + {246, 190, 202, 252, 208, 255, 248, 254}, + {247, 191, 203, 252, 209, 255, 248, 254}, + {247, 191, 203, 252, 209, 255, 248, 254}, + {247, 192, 204, 252, 210, 255, 248, 254}, + {247, 193, 204, 252, 210, 255, 248, 254}, + {248, 194, 205, 252, 211, 255, 248, 254}, + {248, 194, 205, 252, 211, 255, 248, 254}, + {248, 195, 206, 252, 212, 255, 249, 254}, + {248, 196, 206, 252, 212, 255, 249, 254}, + {249, 197, 207, 253, 213, 255, 249, 254}, + {249, 197, 207, 253, 213, 255, 249, 254}, + {249, 198, 208, 253, 214, 255, 249, 254}, + {249, 199, 209, 253, 214, 255, 249, 254}, + {250, 200, 210, 253, 215, 255, 249, 254}, + {250, 200, 210, 253, 215, 255, 249, 254}, + {250, 201, 211, 253, 215, 255, 249, 254}, + {250, 202, 211, 253, 215, 255, 249, 254}, + {250, 203, 212, 253, 216, 255, 249, 254}, + {250, 203, 212, 253, 216, 255, 249, 254}, + {251, 204, 213, 253, 217, 255, 250, 254}, + {251, 205, 213, 253, 217, 255, 250, 254}, + {251, 206, 214, 254, 218, 255, 250, 254}, + {251, 206, 215, 254, 218, 255, 250, 254}, + {252, 207, 216, 254, 219, 255, 250, 254}, + {252, 208, 216, 254, 219, 255, 250, 254}, + {252, 209, 217, 254, 220, 255, 250, 254}, + {252, 210, 217, 254, 220, 255, 250, 254}, + {252, 211, 218, 254, 221, 255, 250, 254}, + {252, 212, 218, 254, 221, 255, 250, 254}, + {253, 213, 219, 254, 222, 255, 250, 254}, + {253, 213, 220, 254, 222, 255, 250, 254}, + {253, 214, 221, 254, 223, 255, 250, 254}, + {253, 215, 221, 254, 223, 255, 250, 254}, + {253, 216, 222, 254, 224, 255, 251, 254}, + {253, 217, 223, 254, 224, 255, 251, 254}, + {253, 218, 224, 254, 225, 255, 251, 254}, + {253, 219, 224, 254, 225, 255, 251, 254}, + {254, 220, 225, 254, 225, 255, 251, 254}, + {254, 221, 226, 254, 225, 255, 251, 254}, + {254, 222, 227, 255, 226, 255, 251, 254}, + {254, 223, 227, 255, 226, 255, 251, 254}, + {254, 224, 228, 255, 227, 255, 251, 254}, + {254, 225, 229, 255, 227, 255, 251, 254}, + {254, 226, 230, 255, 228, 255, 251, 254}, + {254, 227, 230, 255, 229, 255, 251, 254}, + {255, 228, 231, 255, 230, 255, 251, 254}, + {255, 229, 232, 255, 230, 255, 251, 254}, + {255, 230, 233, 255, 231, 255, 252, 254}, + {255, 231, 234, 255, 231, 255, 252, 254}, + {255, 232, 235, 255, 232, 255, 252, 254}, + {255, 233, 236, 255, 232, 255, 252, 254}, + {255, 235, 237, 255, 233, 255, 252, 254}, + {255, 236, 238, 255, 234, 255, 252, 254}, + {255, 238, 240, 255, 235, 255, 252, 255}, + {255, 239, 241, 255, 235, 255, 252, 254}, + {255, 241, 243, 255, 236, 255, 252, 254}, + {255, 243, 245, 255, 237, 255, 252, 254}, + {255, 246, 247, 255, 239, 255, 253, 255}, +}; + +static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 195, 29, 183 }, { 84, 49, 136 }, { 8, 42, 71 } + }, { // Band 1 + { 31, 107, 169 }, { 35, 99, 159 }, { 17, 82, 140 }, + { 8, 66, 114 }, { 2, 44, 76 }, { 1, 19, 32 } + }, { // Band 2 + { 40, 132, 201 }, { 29, 114, 187 }, { 13, 91, 157 }, + { 7, 75, 127 }, { 3, 58, 95 }, { 1, 28, 47 } + }, { // Band 3 + { 69, 142, 221 }, { 42, 122, 201 }, { 15, 91, 159 }, + { 6, 67, 121 }, { 1, 42, 77 }, { 1, 17, 31 } + }, { // Band 4 + { 102, 148, 228 }, { 67, 117, 204 }, { 17, 82, 154 }, + { 6, 59, 114 }, { 2, 39, 75 }, { 1, 15, 29 } + }, { // Band 5 + { 156, 57, 233 }, { 119, 57, 212 }, { 58, 48, 163 }, + { 29, 40, 124 }, { 12, 30, 81 }, { 3, 12, 31 } + } + }, { // Inter + { // Band 0 + { 191, 107, 226 }, { 124, 117, 204 }, { 25, 99, 155 } + }, { // Band 1 + { 29, 148, 210 }, { 37, 126, 194 }, { 8, 93, 157 }, + { 2, 68, 118 }, { 1, 39, 69 }, { 1, 17, 33 } + }, { // Band 2 + { 41, 151, 213 }, { 27, 123, 193 }, { 3, 82, 144 }, + { 1, 58, 105 }, { 1, 32, 60 }, { 1, 13, 26 } + }, { // Band 3 + { 59, 159, 220 }, { 23, 126, 198 }, { 4, 88, 151 }, + { 1, 66, 114 }, { 1, 38, 71 }, { 1, 18, 34 } + }, { // Band 4 + { 114, 136, 232 }, { 51, 114, 207 }, { 11, 83, 155 }, + { 3, 56, 105 }, { 1, 33, 65 }, { 1, 17, 34 } + }, { // Band 5 + { 149, 65, 234 }, { 121, 57, 215 }, { 61, 49, 166 }, + { 28, 36, 114 }, { 12, 25, 76 }, { 3, 16, 42 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 214, 49, 220 }, { 132, 63, 188 }, { 42, 65, 137 } + }, { // Band 1 + { 85, 137, 221 }, { 104, 131, 216 }, { 49, 111, 192 }, + { 21, 87, 155 }, { 2, 49, 87 }, { 1, 16, 28 } + }, { // Band 2 + { 89, 163, 230 }, { 90, 137, 220 }, { 29, 100, 183 }, + { 10, 70, 135 }, { 2, 42, 81 }, { 1, 17, 33 } + }, { // Band 3 + { 108, 167, 237 }, { 55, 133, 222 }, { 15, 97, 179 }, + { 4, 72, 135 }, { 1, 45, 85 }, { 1, 19, 38 } + }, { // Band 4 + { 124, 146, 240 }, { 66, 124, 224 }, { 17, 88, 175 }, + { 4, 58, 122 }, { 1, 36, 75 }, { 1, 18, 37 } + }, { // Band 5 + { 141, 79, 241 }, { 126, 70, 227 }, { 66, 58, 182 }, + { 30, 44, 136 }, { 12, 34, 96 }, { 2, 20, 47 } + } + }, { // Inter + { // Band 0 + { 229, 99, 249 }, { 143, 111, 235 }, { 46, 109, 192 } + }, { // Band 1 + { 82, 158, 236 }, { 94, 146, 224 }, { 25, 117, 191 }, + { 9, 87, 149 }, { 3, 56, 99 }, { 1, 33, 57 } + }, { // Band 2 + { 83, 167, 237 }, { 68, 145, 222 }, { 10, 103, 177 }, + { 2, 72, 131 }, { 1, 41, 79 }, { 1, 20, 39 } + }, { // Band 3 + { 99, 167, 239 }, { 47, 141, 224 }, { 10, 104, 178 }, + { 2, 73, 133 }, { 1, 44, 85 }, { 1, 22, 47 } + }, { // Band 4 + { 127, 145, 243 }, { 71, 129, 228 }, { 17, 93, 177 }, + { 3, 61, 124 }, { 1, 41, 84 }, { 1, 21, 52 } + }, { // Band 5 + { 157, 78, 244 }, { 140, 72, 231 }, { 69, 58, 184 }, + { 31, 44, 137 }, { 14, 38, 105 }, { 8, 23, 61 } + } + } + } +}; + +static const vp9_coeff_probs_model default_coef_probs_8x8[PLANE_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 125, 34, 187 }, { 52, 41, 133 }, { 6, 31, 56 } + }, { // Band 1 + { 37, 109, 153 }, { 51, 102, 147 }, { 23, 87, 128 }, + { 8, 67, 101 }, { 1, 41, 63 }, { 1, 19, 29 } + }, { // Band 2 + { 31, 154, 185 }, { 17, 127, 175 }, { 6, 96, 145 }, + { 2, 73, 114 }, { 1, 51, 82 }, { 1, 28, 45 } + }, { // Band 3 + { 23, 163, 200 }, { 10, 131, 185 }, { 2, 93, 148 }, + { 1, 67, 111 }, { 1, 41, 69 }, { 1, 14, 24 } + }, { // Band 4 + { 29, 176, 217 }, { 12, 145, 201 }, { 3, 101, 156 }, + { 1, 69, 111 }, { 1, 39, 63 }, { 1, 14, 23 } + }, { // Band 5 + { 57, 192, 233 }, { 25, 154, 215 }, { 6, 109, 167 }, + { 3, 78, 118 }, { 1, 48, 69 }, { 1, 21, 29 } + } + }, { // Inter + { // Band 0 + { 202, 105, 245 }, { 108, 106, 216 }, { 18, 90, 144 } + }, { // Band 1 + { 33, 172, 219 }, { 64, 149, 206 }, { 14, 117, 177 }, + { 5, 90, 141 }, { 2, 61, 95 }, { 1, 37, 57 } + }, { // Band 2 + { 33, 179, 220 }, { 11, 140, 198 }, { 1, 89, 148 }, + { 1, 60, 104 }, { 1, 33, 57 }, { 1, 12, 21 } + }, { // Band 3 + { 30, 181, 221 }, { 8, 141, 198 }, { 1, 87, 145 }, + { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 20 } + }, { // Band 4 + { 32, 186, 224 }, { 7, 142, 198 }, { 1, 86, 143 }, + { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 22 } + }, { // Band 5 + { 57, 192, 227 }, { 20, 143, 204 }, { 3, 96, 154 }, + { 1, 68, 112 }, { 1, 42, 69 }, { 1, 19, 32 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 212, 35, 215 }, { 113, 47, 169 }, { 29, 48, 105 } + }, { // Band 1 + { 74, 129, 203 }, { 106, 120, 203 }, { 49, 107, 178 }, + { 19, 84, 144 }, { 4, 50, 84 }, { 1, 15, 25 } + }, { // Band 2 + { 71, 172, 217 }, { 44, 141, 209 }, { 15, 102, 173 }, + { 6, 76, 133 }, { 2, 51, 89 }, { 1, 24, 42 } + }, { // Band 3 + { 64, 185, 231 }, { 31, 148, 216 }, { 8, 103, 175 }, + { 3, 74, 131 }, { 1, 46, 81 }, { 1, 18, 30 } + }, { // Band 4 + { 65, 196, 235 }, { 25, 157, 221 }, { 5, 105, 174 }, + { 1, 67, 120 }, { 1, 38, 69 }, { 1, 15, 30 } + }, { // Band 5 + { 65, 204, 238 }, { 30, 156, 224 }, { 7, 107, 177 }, + { 2, 70, 124 }, { 1, 42, 73 }, { 1, 18, 34 } + } + }, { // Inter + { // Band 0 + { 225, 86, 251 }, { 144, 104, 235 }, { 42, 99, 181 } + }, { // Band 1 + { 85, 175, 239 }, { 112, 165, 229 }, { 29, 136, 200 }, + { 12, 103, 162 }, { 6, 77, 123 }, { 2, 53, 84 } + }, { // Band 2 + { 75, 183, 239 }, { 30, 155, 221 }, { 3, 106, 171 }, + { 1, 74, 128 }, { 1, 44, 76 }, { 1, 17, 28 } + }, { // Band 3 + { 73, 185, 240 }, { 27, 159, 222 }, { 2, 107, 172 }, + { 1, 75, 127 }, { 1, 42, 73 }, { 1, 17, 29 } + }, { // Band 4 + { 62, 190, 238 }, { 21, 159, 222 }, { 2, 107, 172 }, + { 1, 72, 122 }, { 1, 40, 71 }, { 1, 18, 32 } + }, { // Band 5 + { 61, 199, 240 }, { 27, 161, 226 }, { 4, 113, 180 }, + { 1, 76, 129 }, { 1, 46, 80 }, { 1, 23, 41 } + } + } + } +}; + +static const vp9_coeff_probs_model default_coef_probs_16x16[PLANE_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 7, 27, 153 }, { 5, 30, 95 }, { 1, 16, 30 } + }, { // Band 1 + { 50, 75, 127 }, { 57, 75, 124 }, { 27, 67, 108 }, + { 10, 54, 86 }, { 1, 33, 52 }, { 1, 12, 18 } + }, { // Band 2 + { 43, 125, 151 }, { 26, 108, 148 }, { 7, 83, 122 }, + { 2, 59, 89 }, { 1, 38, 60 }, { 1, 17, 27 } + }, { // Band 3 + { 23, 144, 163 }, { 13, 112, 154 }, { 2, 75, 117 }, + { 1, 50, 81 }, { 1, 31, 51 }, { 1, 14, 23 } + }, { // Band 4 + { 18, 162, 185 }, { 6, 123, 171 }, { 1, 78, 125 }, + { 1, 51, 86 }, { 1, 31, 54 }, { 1, 14, 23 } + }, { // Band 5 + { 15, 199, 227 }, { 3, 150, 204 }, { 1, 91, 146 }, + { 1, 55, 95 }, { 1, 30, 53 }, { 1, 11, 20 } + } + }, { // Inter + { // Band 0 + { 19, 55, 240 }, { 19, 59, 196 }, { 3, 52, 105 } + }, { // Band 1 + { 41, 166, 207 }, { 104, 153, 199 }, { 31, 123, 181 }, + { 14, 101, 152 }, { 5, 72, 106 }, { 1, 36, 52 } + }, { // Band 2 + { 35, 176, 211 }, { 12, 131, 190 }, { 2, 88, 144 }, + { 1, 60, 101 }, { 1, 36, 60 }, { 1, 16, 28 } + }, { // Band 3 + { 28, 183, 213 }, { 8, 134, 191 }, { 1, 86, 142 }, + { 1, 56, 96 }, { 1, 30, 53 }, { 1, 12, 20 } + }, { // Band 4 + { 20, 190, 215 }, { 4, 135, 192 }, { 1, 84, 139 }, + { 1, 53, 91 }, { 1, 28, 49 }, { 1, 11, 20 } + }, { // Band 5 + { 13, 196, 216 }, { 2, 137, 192 }, { 1, 86, 143 }, + { 1, 57, 99 }, { 1, 32, 56 }, { 1, 13, 24 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 211, 29, 217 }, { 96, 47, 156 }, { 22, 43, 87 } + }, { // Band 1 + { 78, 120, 193 }, { 111, 116, 186 }, { 46, 102, 164 }, + { 15, 80, 128 }, { 2, 49, 76 }, { 1, 18, 28 } + }, { // Band 2 + { 71, 161, 203 }, { 42, 132, 192 }, { 10, 98, 150 }, + { 3, 69, 109 }, { 1, 44, 70 }, { 1, 18, 29 } + }, { // Band 3 + { 57, 186, 211 }, { 30, 140, 196 }, { 4, 93, 146 }, + { 1, 62, 102 }, { 1, 38, 65 }, { 1, 16, 27 } + }, { // Band 4 + { 47, 199, 217 }, { 14, 145, 196 }, { 1, 88, 142 }, + { 1, 57, 98 }, { 1, 36, 62 }, { 1, 15, 26 } + }, { // Band 5 + { 26, 219, 229 }, { 5, 155, 207 }, { 1, 94, 151 }, + { 1, 60, 104 }, { 1, 36, 62 }, { 1, 16, 28 } + } + }, { // Inter + { // Band 0 + { 233, 29, 248 }, { 146, 47, 220 }, { 43, 52, 140 } + }, { // Band 1 + { 100, 163, 232 }, { 179, 161, 222 }, { 63, 142, 204 }, + { 37, 113, 174 }, { 26, 89, 137 }, { 18, 68, 97 } + }, { // Band 2 + { 85, 181, 230 }, { 32, 146, 209 }, { 7, 100, 164 }, + { 3, 71, 121 }, { 1, 45, 77 }, { 1, 18, 30 } + }, { // Band 3 + { 65, 187, 230 }, { 20, 148, 207 }, { 2, 97, 159 }, + { 1, 68, 116 }, { 1, 40, 70 }, { 1, 14, 29 } + }, { // Band 4 + { 40, 194, 227 }, { 8, 147, 204 }, { 1, 94, 155 }, + { 1, 65, 112 }, { 1, 39, 66 }, { 1, 14, 26 } + }, { // Band 5 + { 16, 208, 228 }, { 3, 151, 207 }, { 1, 98, 160 }, + { 1, 67, 117 }, { 1, 41, 74 }, { 1, 17, 31 } + } + } + } +}; + +static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = { + { // Y plane + { // Intra + { // Band 0 + { 17, 38, 140 }, { 7, 34, 80 }, { 1, 17, 29 } + }, { // Band 1 + { 37, 75, 128 }, { 41, 76, 128 }, { 26, 66, 116 }, + { 12, 52, 94 }, { 2, 32, 55 }, { 1, 10, 16 } + }, { // Band 2 + { 50, 127, 154 }, { 37, 109, 152 }, { 16, 82, 121 }, + { 5, 59, 85 }, { 1, 35, 54 }, { 1, 13, 20 } + }, { // Band 3 + { 40, 142, 167 }, { 17, 110, 157 }, { 2, 71, 112 }, + { 1, 44, 72 }, { 1, 27, 45 }, { 1, 11, 17 } + }, { // Band 4 + { 30, 175, 188 }, { 9, 124, 169 }, { 1, 74, 116 }, + { 1, 48, 78 }, { 1, 30, 49 }, { 1, 11, 18 } + }, { // Band 5 + { 10, 222, 223 }, { 2, 150, 194 }, { 1, 83, 128 }, + { 1, 48, 79 }, { 1, 27, 45 }, { 1, 11, 17 } + } + }, { // Inter + { // Band 0 + { 36, 41, 235 }, { 29, 36, 193 }, { 10, 27, 111 } + }, { // Band 1 + { 85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 }, + { 57, 113, 168 }, { 23, 83, 120 }, { 10, 49, 61 } + }, { // Band 2 + { 85, 190, 223 }, { 36, 139, 200 }, { 5, 90, 146 }, + { 1, 60, 103 }, { 1, 38, 65 }, { 1, 18, 30 } + }, { // Band 3 + { 72, 202, 223 }, { 23, 141, 199 }, { 2, 86, 140 }, + { 1, 56, 97 }, { 1, 36, 61 }, { 1, 16, 27 } + }, { // Band 4 + { 55, 218, 225 }, { 13, 145, 200 }, { 1, 86, 141 }, + { 1, 57, 99 }, { 1, 35, 61 }, { 1, 13, 22 } + }, { // Band 5 + { 15, 235, 212 }, { 1, 132, 184 }, { 1, 84, 139 }, + { 1, 57, 97 }, { 1, 34, 56 }, { 1, 14, 23 } + } + } + }, { // UV plane + { // Intra + { // Band 0 + { 181, 21, 201 }, { 61, 37, 123 }, { 10, 38, 71 } + }, { // Band 1 + { 47, 106, 172 }, { 95, 104, 173 }, { 42, 93, 159 }, + { 18, 77, 131 }, { 4, 50, 81 }, { 1, 17, 23 } + }, { // Band 2 + { 62, 147, 199 }, { 44, 130, 189 }, { 28, 102, 154 }, + { 18, 75, 115 }, { 2, 44, 65 }, { 1, 12, 19 } + }, { // Band 3 + { 55, 153, 210 }, { 24, 130, 194 }, { 3, 93, 146 }, + { 1, 61, 97 }, { 1, 31, 50 }, { 1, 10, 16 } + }, { // Band 4 + { 49, 186, 223 }, { 17, 148, 204 }, { 1, 96, 142 }, + { 1, 53, 83 }, { 1, 26, 44 }, { 1, 11, 17 } + }, { // Band 5 + { 13, 217, 212 }, { 2, 136, 180 }, { 1, 78, 124 }, + { 1, 50, 83 }, { 1, 29, 49 }, { 1, 14, 23 } + } + }, { // Inter + { // Band 0 + { 197, 13, 247 }, { 82, 17, 222 }, { 25, 17, 162 } + }, { // Band 1 + { 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 }, + { 104, 158, 220 }, { 66, 128, 186 }, { 55, 90, 137 } + }, { // Band 2 + { 111, 197, 242 }, { 46, 158, 219 }, { 9, 104, 171 }, + { 2, 65, 125 }, { 1, 44, 80 }, { 1, 17, 91 } + }, { // Band 3 + { 104, 208, 245 }, { 39, 168, 224 }, { 3, 109, 162 }, + { 1, 79, 124 }, { 1, 50, 102 }, { 1, 43, 102 } + }, { // Band 4 + { 84, 220, 246 }, { 31, 177, 231 }, { 2, 115, 180 }, + { 1, 79, 134 }, { 1, 55, 77 }, { 1, 60, 79 } + }, { // Band 5 + { 43, 243, 240 }, { 8, 180, 217 }, { 1, 115, 166 }, + { 1, 84, 121 }, { 1, 51, 67 }, { 1, 16, 6 } + } + } + } +}; + +static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) { + assert(p != 0); + memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob)); +} + +void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) { + if (full != model) + memcpy(full, model, sizeof(vpx_prob) * UNCONSTRAINED_NODES); + extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]); +} + +void vp9_default_coef_probs(VP9_COMMON *cm) { + vp9_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4); + vp9_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8); + vp9_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16); + vp9_copy(cm->fc->coef_probs[TX_32X32], default_coef_probs_32x32); +} + +#define COEF_COUNT_SAT 24 +#define COEF_MAX_UPDATE_FACTOR 112 +#define COEF_COUNT_SAT_KEY 24 +#define COEF_MAX_UPDATE_FACTOR_KEY 112 +#define COEF_COUNT_SAT_AFTER_KEY 24 +#define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128 + +static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size, + unsigned int count_sat, + unsigned int update_factor) { + const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; + vp9_coeff_probs_model *const probs = cm->fc->coef_probs[tx_size]; + const vp9_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size]; + vp9_coeff_count_model *counts = cm->counts.coef[tx_size]; + unsigned int (*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = + cm->counts.eob_branch[tx_size]; + int i, j, k, l, m; + + for (i = 0; i < PLANE_TYPES; ++i) + for (j = 0; j < REF_TYPES; ++j) + for (k = 0; k < COEF_BANDS; ++k) + for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { + const int n0 = counts[i][j][k][l][ZERO_TOKEN]; + const int n1 = counts[i][j][k][l][ONE_TOKEN]; + const int n2 = counts[i][j][k][l][TWO_TOKEN]; + const int neob = counts[i][j][k][l][EOB_MODEL_TOKEN]; + const unsigned int branch_ct[UNCONSTRAINED_NODES][2] = { + { neob, eob_counts[i][j][k][l] - neob }, + { n0, n1 + n2 }, + { n1, n2 } + }; + for (m = 0; m < UNCONSTRAINED_NODES; ++m) + probs[i][j][k][l][m] = merge_probs(pre_probs[i][j][k][l][m], + branch_ct[m], + count_sat, update_factor); + } +} + +void vp9_adapt_coef_probs(VP9_COMMON *cm) { + TX_SIZE t; + unsigned int count_sat, update_factor; + + if (frame_is_intra_only(cm)) { + update_factor = COEF_MAX_UPDATE_FACTOR_KEY; + count_sat = COEF_COUNT_SAT_KEY; + } else if (cm->last_frame_type == KEY_FRAME) { + update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */ + count_sat = COEF_COUNT_SAT_AFTER_KEY; + } else { + update_factor = COEF_MAX_UPDATE_FACTOR; + count_sat = COEF_COUNT_SAT; + } + for (t = TX_4X4; t <= TX_32X32; t++) + adapt_coef_probs(cm, t, count_sat, update_factor); +} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropy.h b/thirdparty/libvpx/vp9/common/vp9_entropy.h new file mode 100644 index 0000000000..63b3bff5d9 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropy.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_ENTROPY_H_ +#define VP9_COMMON_VP9_ENTROPY_H_ + +#include "vpx/vpx_integer.h" +#include "vpx_dsp/prob.h" + +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_enums.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define DIFF_UPDATE_PROB 252 + +// Coefficient token alphabet +#define ZERO_TOKEN 0 // 0 Extra Bits 0+0 +#define ONE_TOKEN 1 // 1 Extra Bits 0+1 +#define TWO_TOKEN 2 // 2 Extra Bits 0+1 +#define THREE_TOKEN 3 // 3 Extra Bits 0+1 +#define FOUR_TOKEN 4 // 4 Extra Bits 0+1 +#define CATEGORY1_TOKEN 5 // 5-6 Extra Bits 1+1 +#define CATEGORY2_TOKEN 6 // 7-10 Extra Bits 2+1 +#define CATEGORY3_TOKEN 7 // 11-18 Extra Bits 3+1 +#define CATEGORY4_TOKEN 8 // 19-34 Extra Bits 4+1 +#define CATEGORY5_TOKEN 9 // 35-66 Extra Bits 5+1 +#define CATEGORY6_TOKEN 10 // 67+ Extra Bits 14+1 +#define EOB_TOKEN 11 // EOB Extra Bits 0+0 + +#define ENTROPY_TOKENS 12 + +#define ENTROPY_NODES 11 + +DECLARE_ALIGNED(16, extern const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]); + +#define CAT1_MIN_VAL 5 +#define CAT2_MIN_VAL 7 +#define CAT3_MIN_VAL 11 +#define CAT4_MIN_VAL 19 +#define CAT5_MIN_VAL 35 +#define CAT6_MIN_VAL 67 + +// Extra bit probabilities. +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob[1]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob[2]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob[3]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob[4]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob[5]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob[14]); + +#if CONFIG_VP9_HIGHBITDEPTH +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high10[1]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high10[2]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high10[3]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high10[4]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high10[5]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high10[16]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high12[1]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high12[2]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high12[3]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high12[4]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high12[5]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high12[18]); +#endif // CONFIG_VP9_HIGHBITDEPTH + +#define EOB_MODEL_TOKEN 3 + +#define DCT_MAX_VALUE 16384 +#if CONFIG_VP9_HIGHBITDEPTH +#define DCT_MAX_VALUE_HIGH10 65536 +#define DCT_MAX_VALUE_HIGH12 262144 +#endif // CONFIG_VP9_HIGHBITDEPTH + +/* Coefficients are predicted via a 3-dimensional probability table. */ + +#define REF_TYPES 2 // intra=0, inter=1 + +/* Middle dimension reflects the coefficient position within the transform. */ +#define COEF_BANDS 6 + +/* Inside dimension is measure of nearby complexity, that reflects the energy + of nearby coefficients are nonzero. For the first coefficient (DC, unless + block type is 0), we look at the (already encoded) blocks above and to the + left of the current block. The context index is then the number (0,1,or 2) + of these blocks having nonzero coefficients. + After decoding a coefficient, the measure is determined by the size of the + most recently decoded coefficient. + Note that the intuitive meaning of this measure changes as coefficients + are decoded, e.g., prior to the first token, a zero means that my neighbors + are empty while, after the first token, because of the use of end-of-block, + a zero means we just decoded a zero and hence guarantees that a non-zero + coefficient will appear later in this block. However, this shift + in meaning is perfectly OK because our context depends also on the + coefficient band (and since zigzag positions 0, 1, and 2 are in + distinct bands). */ + +#define COEFF_CONTEXTS 6 +#define BAND_COEFF_CONTEXTS(band) ((band) == 0 ? 3 : COEFF_CONTEXTS) + +// #define ENTROPY_STATS + +typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] + [ENTROPY_TOKENS]; +typedef unsigned int vp9_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] + [ENTROPY_NODES][2]; + +#define SUBEXP_PARAM 4 /* Subexponential code parameter */ +#define MODULUS_PARAM 13 /* Modulus parameter */ + +struct VP9Common; +void vp9_default_coef_probs(struct VP9Common *cm); +void vp9_adapt_coef_probs(struct VP9Common *cm); + +// This is the index in the scan order beyond which all coefficients for +// 8x8 transform and above are in the top band. +// This macro is currently unused but may be used by certain implementations +#define MAXBAND_INDEX 21 + +DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_8x8plus[1024]); +DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_4x4[16]); + +static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { + return tx_size == TX_4X4 ? vp9_coefband_trans_4x4 + : vp9_coefband_trans_8x8plus; +} + +// 128 lists of probabilities are stored for the following ONE node probs: +// 1, 3, 5, 7, ..., 253, 255 +// In between probabilities are interpolated linearly + +#define COEFF_PROB_MODELS 255 + +#define UNCONSTRAINED_NODES 3 + +#define PIVOT_NODE 2 // which node is pivot + +#define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES) +extern const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)]; +extern const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES]; + +typedef vpx_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS] + [COEFF_CONTEXTS][UNCONSTRAINED_NODES]; + +typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS] + [COEFF_CONTEXTS] + [UNCONSTRAINED_NODES + 1]; + +void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full); + +typedef char ENTROPY_CONTEXT; + +static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a, + ENTROPY_CONTEXT b) { + return (a != 0) + (b != 0); +} + +static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, + const ENTROPY_CONTEXT *l) { + ENTROPY_CONTEXT above_ec = 0, left_ec = 0; + + switch (tx_size) { + case TX_4X4: + above_ec = a[0] != 0; + left_ec = l[0] != 0; + break; + case TX_8X8: + above_ec = !!*(const uint16_t *)a; + left_ec = !!*(const uint16_t *)l; + break; + case TX_16X16: + above_ec = !!*(const uint32_t *)a; + left_ec = !!*(const uint32_t *)l; + break; + case TX_32X32: + above_ec = !!*(const uint64_t *)a; + left_ec = !!*(const uint64_t *)l; + break; + default: + assert(0 && "Invalid transform size."); + break; + } + + return combine_entropy_contexts(above_ec, left_ec); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ENTROPY_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymode.c b/thirdparty/libvpx/vp9/common/vp9_entropymode.c new file mode 100644 index 0000000000..670348bafd --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropymode.c @@ -0,0 +1,469 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_mem/vpx_mem.h" + +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_seg_common.h" + +const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = { + { // above = dc + { 137, 30, 42, 148, 151, 207, 70, 52, 91 }, // left = dc + { 92, 45, 102, 136, 116, 180, 74, 90, 100 }, // left = v + { 73, 32, 19, 187, 222, 215, 46, 34, 100 }, // left = h + { 91, 30, 32, 116, 121, 186, 93, 86, 94 }, // left = d45 + { 72, 35, 36, 149, 68, 206, 68, 63, 105 }, // left = d135 + { 73, 31, 28, 138, 57, 124, 55, 122, 151 }, // left = d117 + { 67, 23, 21, 140, 126, 197, 40, 37, 171 }, // left = d153 + { 86, 27, 28, 128, 154, 212, 45, 43, 53 }, // left = d207 + { 74, 32, 27, 107, 86, 160, 63, 134, 102 }, // left = d63 + { 59, 67, 44, 140, 161, 202, 78, 67, 119 } // left = tm + }, { // above = v + { 63, 36, 126, 146, 123, 158, 60, 90, 96 }, // left = dc + { 43, 46, 168, 134, 107, 128, 69, 142, 92 }, // left = v + { 44, 29, 68, 159, 201, 177, 50, 57, 77 }, // left = h + { 58, 38, 76, 114, 97, 172, 78, 133, 92 }, // left = d45 + { 46, 41, 76, 140, 63, 184, 69, 112, 57 }, // left = d135 + { 38, 32, 85, 140, 46, 112, 54, 151, 133 }, // left = d117 + { 39, 27, 61, 131, 110, 175, 44, 75, 136 }, // left = d153 + { 52, 30, 74, 113, 130, 175, 51, 64, 58 }, // left = d207 + { 47, 35, 80, 100, 74, 143, 64, 163, 74 }, // left = d63 + { 36, 61, 116, 114, 128, 162, 80, 125, 82 } // left = tm + }, { // above = h + { 82, 26, 26, 171, 208, 204, 44, 32, 105 }, // left = dc + { 55, 44, 68, 166, 179, 192, 57, 57, 108 }, // left = v + { 42, 26, 11, 199, 241, 228, 23, 15, 85 }, // left = h + { 68, 42, 19, 131, 160, 199, 55, 52, 83 }, // left = d45 + { 58, 50, 25, 139, 115, 232, 39, 52, 118 }, // left = d135 + { 50, 35, 33, 153, 104, 162, 64, 59, 131 }, // left = d117 + { 44, 24, 16, 150, 177, 202, 33, 19, 156 }, // left = d153 + { 55, 27, 12, 153, 203, 218, 26, 27, 49 }, // left = d207 + { 53, 49, 21, 110, 116, 168, 59, 80, 76 }, // left = d63 + { 38, 72, 19, 168, 203, 212, 50, 50, 107 } // left = tm + }, { // above = d45 + { 103, 26, 36, 129, 132, 201, 83, 80, 93 }, // left = dc + { 59, 38, 83, 112, 103, 162, 98, 136, 90 }, // left = v + { 62, 30, 23, 158, 200, 207, 59, 57, 50 }, // left = h + { 67, 30, 29, 84, 86, 191, 102, 91, 59 }, // left = d45 + { 60, 32, 33, 112, 71, 220, 64, 89, 104 }, // left = d135 + { 53, 26, 34, 130, 56, 149, 84, 120, 103 }, // left = d117 + { 53, 21, 23, 133, 109, 210, 56, 77, 172 }, // left = d153 + { 77, 19, 29, 112, 142, 228, 55, 66, 36 }, // left = d207 + { 61, 29, 29, 93, 97, 165, 83, 175, 162 }, // left = d63 + { 47, 47, 43, 114, 137, 181, 100, 99, 95 } // left = tm + }, { // above = d135 + { 69, 23, 29, 128, 83, 199, 46, 44, 101 }, // left = dc + { 53, 40, 55, 139, 69, 183, 61, 80, 110 }, // left = v + { 40, 29, 19, 161, 180, 207, 43, 24, 91 }, // left = h + { 60, 34, 19, 105, 61, 198, 53, 64, 89 }, // left = d45 + { 52, 31, 22, 158, 40, 209, 58, 62, 89 }, // left = d135 + { 44, 31, 29, 147, 46, 158, 56, 102, 198 }, // left = d117 + { 35, 19, 12, 135, 87, 209, 41, 45, 167 }, // left = d153 + { 55, 25, 21, 118, 95, 215, 38, 39, 66 }, // left = d207 + { 51, 38, 25, 113, 58, 164, 70, 93, 97 }, // left = d63 + { 47, 54, 34, 146, 108, 203, 72, 103, 151 } // left = tm + }, { // above = d117 + { 64, 19, 37, 156, 66, 138, 49, 95, 133 }, // left = dc + { 46, 27, 80, 150, 55, 124, 55, 121, 135 }, // left = v + { 36, 23, 27, 165, 149, 166, 54, 64, 118 }, // left = h + { 53, 21, 36, 131, 63, 163, 60, 109, 81 }, // left = d45 + { 40, 26, 35, 154, 40, 185, 51, 97, 123 }, // left = d135 + { 35, 19, 34, 179, 19, 97, 48, 129, 124 }, // left = d117 + { 36, 20, 26, 136, 62, 164, 33, 77, 154 }, // left = d153 + { 45, 18, 32, 130, 90, 157, 40, 79, 91 }, // left = d207 + { 45, 26, 28, 129, 45, 129, 49, 147, 123 }, // left = d63 + { 38, 44, 51, 136, 74, 162, 57, 97, 121 } // left = tm + }, { // above = d153 + { 75, 17, 22, 136, 138, 185, 32, 34, 166 }, // left = dc + { 56, 39, 58, 133, 117, 173, 48, 53, 187 }, // left = v + { 35, 21, 12, 161, 212, 207, 20, 23, 145 }, // left = h + { 56, 29, 19, 117, 109, 181, 55, 68, 112 }, // left = d45 + { 47, 29, 17, 153, 64, 220, 59, 51, 114 }, // left = d135 + { 46, 16, 24, 136, 76, 147, 41, 64, 172 }, // left = d117 + { 34, 17, 11, 108, 152, 187, 13, 15, 209 }, // left = d153 + { 51, 24, 14, 115, 133, 209, 32, 26, 104 }, // left = d207 + { 55, 30, 18, 122, 79, 179, 44, 88, 116 }, // left = d63 + { 37, 49, 25, 129, 168, 164, 41, 54, 148 } // left = tm + }, { // above = d207 + { 82, 22, 32, 127, 143, 213, 39, 41, 70 }, // left = dc + { 62, 44, 61, 123, 105, 189, 48, 57, 64 }, // left = v + { 47, 25, 17, 175, 222, 220, 24, 30, 86 }, // left = h + { 68, 36, 17, 106, 102, 206, 59, 74, 74 }, // left = d45 + { 57, 39, 23, 151, 68, 216, 55, 63, 58 }, // left = d135 + { 49, 30, 35, 141, 70, 168, 82, 40, 115 }, // left = d117 + { 51, 25, 15, 136, 129, 202, 38, 35, 139 }, // left = d153 + { 68, 26, 16, 111, 141, 215, 29, 28, 28 }, // left = d207 + { 59, 39, 19, 114, 75, 180, 77, 104, 42 }, // left = d63 + { 40, 61, 26, 126, 152, 206, 61, 59, 93 } // left = tm + }, { // above = d63 + { 78, 23, 39, 111, 117, 170, 74, 124, 94 }, // left = dc + { 48, 34, 86, 101, 92, 146, 78, 179, 134 }, // left = v + { 47, 22, 24, 138, 187, 178, 68, 69, 59 }, // left = h + { 56, 25, 33, 105, 112, 187, 95, 177, 129 }, // left = d45 + { 48, 31, 27, 114, 63, 183, 82, 116, 56 }, // left = d135 + { 43, 28, 37, 121, 63, 123, 61, 192, 169 }, // left = d117 + { 42, 17, 24, 109, 97, 177, 56, 76, 122 }, // left = d153 + { 58, 18, 28, 105, 139, 182, 70, 92, 63 }, // left = d207 + { 46, 23, 32, 74, 86, 150, 67, 183, 88 }, // left = d63 + { 36, 38, 48, 92, 122, 165, 88, 137, 91 } // left = tm + }, { // above = tm + { 65, 70, 60, 155, 159, 199, 61, 60, 81 }, // left = dc + { 44, 78, 115, 132, 119, 173, 71, 112, 93 }, // left = v + { 39, 38, 21, 184, 227, 206, 42, 32, 64 }, // left = h + { 58, 47, 36, 124, 137, 193, 80, 82, 78 }, // left = d45 + { 49, 50, 35, 144, 95, 205, 63, 78, 59 }, // left = d135 + { 41, 53, 52, 148, 71, 142, 65, 128, 51 }, // left = d117 + { 40, 36, 28, 143, 143, 202, 40, 55, 137 }, // left = d153 + { 52, 34, 29, 129, 183, 227, 42, 35, 43 }, // left = d207 + { 42, 44, 44, 104, 105, 164, 64, 130, 80 }, // left = d63 + { 43, 81, 53, 140, 169, 204, 68, 84, 72 } // left = tm + } +}; + +const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = { + { 144, 11, 54, 157, 195, 130, 46, 58, 108 }, // y = dc + { 118, 15, 123, 148, 131, 101, 44, 93, 131 }, // y = v + { 113, 12, 23, 188, 226, 142, 26, 32, 125 }, // y = h + { 120, 11, 50, 123, 163, 135, 64, 77, 103 }, // y = d45 + { 113, 9, 36, 155, 111, 157, 32, 44, 161 }, // y = d135 + { 116, 9, 55, 176, 76, 96, 37, 61, 149 }, // y = d117 + { 115, 9, 28, 141, 161, 167, 21, 25, 193 }, // y = d153 + { 120, 12, 32, 145, 195, 142, 32, 38, 86 }, // y = d207 + { 116, 12, 64, 120, 140, 125, 49, 115, 121 }, // y = d63 + { 102, 19, 66, 162, 182, 122, 35, 59, 128 } // y = tm +}; + +static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = { + { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8 + { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16 + { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32 + { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32 +}; + +static const vpx_prob default_if_uv_probs[INTRA_MODES][INTRA_MODES - 1] = { + { 120, 7, 76, 176, 208, 126, 28, 54, 103 }, // y = dc + { 48, 12, 154, 155, 139, 90, 34, 117, 119 }, // y = v + { 67, 6, 25, 204, 243, 158, 13, 21, 96 }, // y = h + { 97, 5, 44, 131, 176, 139, 48, 68, 97 }, // y = d45 + { 83, 5, 42, 156, 111, 152, 26, 49, 152 }, // y = d135 + { 80, 5, 58, 178, 74, 83, 33, 62, 145 }, // y = d117 + { 86, 5, 32, 154, 192, 168, 14, 22, 163 }, // y = d153 + { 85, 5, 32, 156, 216, 148, 19, 29, 73 }, // y = d207 + { 77, 7, 64, 116, 132, 122, 37, 126, 120 }, // y = d63 + { 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm +}; + +const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] + [PARTITION_TYPES - 1] = { + // 8x8 -> 4x4 + { 158, 97, 94 }, // a/l both not split + { 93, 24, 99 }, // a split, l not split + { 85, 119, 44 }, // l split, a not split + { 62, 59, 67 }, // a/l both split + // 16x16 -> 8x8 + { 149, 53, 53 }, // a/l both not split + { 94, 20, 48 }, // a split, l not split + { 83, 53, 24 }, // l split, a not split + { 52, 18, 18 }, // a/l both split + // 32x32 -> 16x16 + { 150, 40, 39 }, // a/l both not split + { 78, 12, 26 }, // a split, l not split + { 67, 33, 11 }, // l split, a not split + { 24, 7, 5 }, // a/l both split + // 64x64 -> 32x32 + { 174, 35, 49 }, // a/l both not split + { 68, 11, 27 }, // a split, l not split + { 57, 15, 9 }, // l split, a not split + { 12, 3, 3 }, // a/l both split +}; + +static const vpx_prob default_partition_probs[PARTITION_CONTEXTS] + [PARTITION_TYPES - 1] = { + // 8x8 -> 4x4 + { 199, 122, 141 }, // a/l both not split + { 147, 63, 159 }, // a split, l not split + { 148, 133, 118 }, // l split, a not split + { 121, 104, 114 }, // a/l both split + // 16x16 -> 8x8 + { 174, 73, 87 }, // a/l both not split + { 92, 41, 83 }, // a split, l not split + { 82, 99, 50 }, // l split, a not split + { 53, 39, 39 }, // a/l both split + // 32x32 -> 16x16 + { 177, 58, 59 }, // a/l both not split + { 68, 26, 63 }, // a split, l not split + { 52, 79, 25 }, // l split, a not split + { 17, 14, 12 }, // a/l both split + // 64x64 -> 32x32 + { 222, 34, 30 }, // a/l both not split + { 72, 16, 44 }, // a split, l not split + { 58, 32, 12 }, // l split, a not split + { 10, 7, 6 }, // a/l both split +}; + +static const vpx_prob default_inter_mode_probs[INTER_MODE_CONTEXTS] + [INTER_MODES - 1] = { + {2, 173, 34}, // 0 = both zero mv + {7, 145, 85}, // 1 = one zero mv + one a predicted mv + {7, 166, 63}, // 2 = two predicted mvs + {7, 94, 66}, // 3 = one predicted/zero and one new mv + {8, 64, 46}, // 4 = two new mvs + {17, 81, 31}, // 5 = one intra neighbour + x + {25, 29, 30}, // 6 = two intra neighbours +}; + +/* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ +const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = { + -DC_PRED, 2, /* 0 = DC_NODE */ + -TM_PRED, 4, /* 1 = TM_NODE */ + -V_PRED, 6, /* 2 = V_NODE */ + 8, 12, /* 3 = COM_NODE */ + -H_PRED, 10, /* 4 = H_NODE */ + -D135_PRED, -D117_PRED, /* 5 = D135_NODE */ + -D45_PRED, 14, /* 6 = D45_NODE */ + -D63_PRED, 16, /* 7 = D63_NODE */ + -D153_PRED, -D207_PRED /* 8 = D153_NODE */ +}; + +const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = { + -INTER_OFFSET(ZEROMV), 2, + -INTER_OFFSET(NEARESTMV), 4, + -INTER_OFFSET(NEARMV), -INTER_OFFSET(NEWMV) +}; + +const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = { + -PARTITION_NONE, 2, + -PARTITION_HORZ, 4, + -PARTITION_VERT, -PARTITION_SPLIT +}; + +static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { + 9, 102, 187, 225 +}; + +static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { + 239, 183, 119, 96, 41 +}; + +static const vpx_prob default_comp_ref_p[REF_CONTEXTS] = { + 50, 126, 123, 221, 226 +}; + +static const vpx_prob default_single_ref_p[REF_CONTEXTS][2] = { + { 33, 16 }, + { 77, 74 }, + { 142, 142 }, + { 172, 170 }, + { 238, 247 } +}; + +static const struct tx_probs default_tx_probs = { + { { 3, 136, 37 }, + { 5, 52, 13 } }, + + { { 20, 152 }, + { 15, 101 } }, + + { { 100 }, + { 66 } } +}; + +void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, + unsigned int (*ct_32x32p)[2]) { + ct_32x32p[0][0] = tx_count_32x32p[TX_4X4]; + ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + + tx_count_32x32p[TX_16X16] + + tx_count_32x32p[TX_32X32]; + ct_32x32p[1][0] = tx_count_32x32p[TX_8X8]; + ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + + tx_count_32x32p[TX_32X32]; + ct_32x32p[2][0] = tx_count_32x32p[TX_16X16]; + ct_32x32p[2][1] = tx_count_32x32p[TX_32X32]; +} + +void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, + unsigned int (*ct_16x16p)[2]) { + ct_16x16p[0][0] = tx_count_16x16p[TX_4X4]; + ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16]; + ct_16x16p[1][0] = tx_count_16x16p[TX_8X8]; + ct_16x16p[1][1] = tx_count_16x16p[TX_16X16]; +} + +void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, + unsigned int (*ct_8x8p)[2]) { + ct_8x8p[0][0] = tx_count_8x8p[TX_4X4]; + ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; +} + +static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = { + 192, 128, 64 +}; + +static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] + [SWITCHABLE_FILTERS - 1] = { + { 235, 162, }, + { 36, 255, }, + { 34, 3, }, + { 149, 144, }, +}; + +static void init_mode_probs(FRAME_CONTEXT *fc) { + vp9_copy(fc->uv_mode_prob, default_if_uv_probs); + vp9_copy(fc->y_mode_prob, default_if_y_probs); + vp9_copy(fc->switchable_interp_prob, default_switchable_interp_prob); + vp9_copy(fc->partition_prob, default_partition_probs); + vp9_copy(fc->intra_inter_prob, default_intra_inter_p); + vp9_copy(fc->comp_inter_prob, default_comp_inter_p); + vp9_copy(fc->comp_ref_prob, default_comp_ref_p); + vp9_copy(fc->single_ref_prob, default_single_ref_p); + fc->tx_probs = default_tx_probs; + vp9_copy(fc->skip_probs, default_skip_probs); + vp9_copy(fc->inter_mode_probs, default_inter_mode_probs); +} + +const vpx_tree_index vp9_switchable_interp_tree + [TREE_SIZE(SWITCHABLE_FILTERS)] = { + -EIGHTTAP, 2, + -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP +}; + +void vp9_adapt_mode_probs(VP9_COMMON *cm) { + int i, j; + FRAME_CONTEXT *fc = cm->fc; + const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; + const FRAME_COUNTS *counts = &cm->counts; + + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) + fc->intra_inter_prob[i] = mode_mv_merge_probs(pre_fc->intra_inter_prob[i], + counts->intra_inter[i]); + for (i = 0; i < COMP_INTER_CONTEXTS; i++) + fc->comp_inter_prob[i] = mode_mv_merge_probs(pre_fc->comp_inter_prob[i], + counts->comp_inter[i]); + for (i = 0; i < REF_CONTEXTS; i++) + fc->comp_ref_prob[i] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i], + counts->comp_ref[i]); + for (i = 0; i < REF_CONTEXTS; i++) + for (j = 0; j < 2; j++) + fc->single_ref_prob[i][j] = mode_mv_merge_probs( + pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]); + + for (i = 0; i < INTER_MODE_CONTEXTS; i++) + vpx_tree_merge_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i], + counts->inter_mode[i], fc->inter_mode_probs[i]); + + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) + vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i], + counts->y_mode[i], fc->y_mode_prob[i]); + + for (i = 0; i < INTRA_MODES; ++i) + vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i], + counts->uv_mode[i], fc->uv_mode_prob[i]); + + for (i = 0; i < PARTITION_CONTEXTS; i++) + vpx_tree_merge_probs(vp9_partition_tree, pre_fc->partition_prob[i], + counts->partition[i], fc->partition_prob[i]); + + if (cm->interp_filter == SWITCHABLE) { + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) + vpx_tree_merge_probs(vp9_switchable_interp_tree, + pre_fc->switchable_interp_prob[i], + counts->switchable_interp[i], + fc->switchable_interp_prob[i]); + } + + if (cm->tx_mode == TX_MODE_SELECT) { + int j; + unsigned int branch_ct_8x8p[TX_SIZES - 3][2]; + unsigned int branch_ct_16x16p[TX_SIZES - 2][2]; + unsigned int branch_ct_32x32p[TX_SIZES - 1][2]; + + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { + tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p); + for (j = 0; j < TX_SIZES - 3; ++j) + fc->tx_probs.p8x8[i][j] = mode_mv_merge_probs( + pre_fc->tx_probs.p8x8[i][j], branch_ct_8x8p[j]); + + tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p); + for (j = 0; j < TX_SIZES - 2; ++j) + fc->tx_probs.p16x16[i][j] = mode_mv_merge_probs( + pre_fc->tx_probs.p16x16[i][j], branch_ct_16x16p[j]); + + tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p); + for (j = 0; j < TX_SIZES - 1; ++j) + fc->tx_probs.p32x32[i][j] = mode_mv_merge_probs( + pre_fc->tx_probs.p32x32[i][j], branch_ct_32x32p[j]); + } + } + + for (i = 0; i < SKIP_CONTEXTS; ++i) + fc->skip_probs[i] = mode_mv_merge_probs( + pre_fc->skip_probs[i], counts->skip[i]); +} + +static void set_default_lf_deltas(struct loopfilter *lf) { + lf->mode_ref_delta_enabled = 1; + lf->mode_ref_delta_update = 1; + + lf->ref_deltas[INTRA_FRAME] = 1; + lf->ref_deltas[LAST_FRAME] = 0; + lf->ref_deltas[GOLDEN_FRAME] = -1; + lf->ref_deltas[ALTREF_FRAME] = -1; + + lf->mode_deltas[0] = 0; + lf->mode_deltas[1] = 0; +} + +void vp9_setup_past_independence(VP9_COMMON *cm) { + // Reset the segment feature data to the default stats: + // Features disabled, 0, with delta coding (Default state). + struct loopfilter *const lf = &cm->lf; + + int i; + vp9_clearall_segfeatures(&cm->seg); + cm->seg.abs_delta = SEGMENT_DELTADATA; + + if (cm->last_frame_seg_map && !cm->frame_parallel_decode) + memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); + + if (cm->current_frame_seg_map) + memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); + + // Reset the mode ref deltas for loop filter + vp9_zero(lf->last_ref_deltas); + vp9_zero(lf->last_mode_deltas); + set_default_lf_deltas(lf); + + // To force update of the sharpness + lf->last_sharpness_level = -1; + + vp9_default_coef_probs(cm); + init_mode_probs(cm->fc); + vp9_init_mv_probs(cm); + cm->fc->initialized = 1; + + if (cm->frame_type == KEY_FRAME || + cm->error_resilient_mode || cm->reset_frame_context == 3) { + // Reset all frame contexts. + for (i = 0; i < FRAME_CONTEXTS; ++i) + cm->frame_contexts[i] = *cm->fc; + } else if (cm->reset_frame_context == 2) { + // Reset only the frame context specified in the frame header. + cm->frame_contexts[cm->frame_context_idx] = *cm->fc; + } + + // prev_mip will only be allocated in encoder. + if (frame_is_intra_only(cm) && cm->prev_mip && !cm->frame_parallel_decode) + memset(cm->prev_mip, 0, + cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip)); + + vp9_zero(cm->ref_frame_sign_bias); + + cm->frame_context_idx = 0; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymode.h b/thirdparty/libvpx/vp9/common/vp9_entropymode.h new file mode 100644 index 0000000000..0285be1557 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropymode.h @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_ENTROPYMODE_H_ +#define VP9_COMMON_VP9_ENTROPYMODE_H_ + +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymv.h" +#include "vp9/common/vp9_filter.h" +#include "vpx_dsp/vpx_filter.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define BLOCK_SIZE_GROUPS 4 + +#define TX_SIZE_CONTEXTS 2 + +#define INTER_OFFSET(mode) ((mode) - NEARESTMV) + +struct VP9Common; + +struct tx_probs { + vpx_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1]; + vpx_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2]; + vpx_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3]; +}; + +struct tx_counts { + unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES]; + unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1]; + unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2]; + unsigned int tx_totals[TX_SIZES]; +}; + +typedef struct frame_contexts { + vpx_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1]; + vpx_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; + vpx_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1]; + vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES]; + vpx_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] + [SWITCHABLE_FILTERS - 1]; + vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; + vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; + vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS]; + vpx_prob single_ref_prob[REF_CONTEXTS][2]; + vpx_prob comp_ref_prob[REF_CONTEXTS]; + struct tx_probs tx_probs; + vpx_prob skip_probs[SKIP_CONTEXTS]; + nmv_context nmvc; + int initialized; +} FRAME_CONTEXT; + +typedef struct FRAME_COUNTS { + unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES]; + unsigned int uv_mode[INTRA_MODES][INTRA_MODES]; + unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES]; + vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES]; + unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES] + [COEF_BANDS][COEFF_CONTEXTS]; + unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS] + [SWITCHABLE_FILTERS]; + unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES]; + unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; + unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; + unsigned int single_ref[REF_CONTEXTS][2][2]; + unsigned int comp_ref[REF_CONTEXTS][2]; + struct tx_counts tx; + unsigned int skip[SKIP_CONTEXTS][2]; + nmv_context_counts mv; +} FRAME_COUNTS; + +extern const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; +extern const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES] + [INTRA_MODES - 1]; +extern const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] + [PARTITION_TYPES - 1]; +extern const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)]; +extern const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)]; +extern const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)]; +extern const vpx_tree_index vp9_switchable_interp_tree + [TREE_SIZE(SWITCHABLE_FILTERS)]; + +void vp9_setup_past_independence(struct VP9Common *cm); + +void vp9_adapt_mode_probs(struct VP9Common *cm); + +void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, + unsigned int (*ct_32x32p)[2]); +void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, + unsigned int (*ct_16x16p)[2]); +void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, + unsigned int (*ct_8x8p)[2]); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ENTROPYMODE_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymv.c b/thirdparty/libvpx/vp9/common/vp9_entropymv.c new file mode 100644 index 0000000000..566ae91cf7 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropymv.c @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_entropymv.h" + +const vpx_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { + -MV_JOINT_ZERO, 2, + -MV_JOINT_HNZVZ, 4, + -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ +}; + +const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = { + -MV_CLASS_0, 2, + -MV_CLASS_1, 4, + 6, 8, + -MV_CLASS_2, -MV_CLASS_3, + 10, 12, + -MV_CLASS_4, -MV_CLASS_5, + -MV_CLASS_6, 14, + 16, 18, + -MV_CLASS_7, -MV_CLASS_8, + -MV_CLASS_9, -MV_CLASS_10, +}; + +const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { + -0, -1, +}; + +const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { + -0, 2, + -1, 4, + -2, -3 +}; + +static const nmv_context default_nmv_context = { + {32, 64, 96}, + { + { // Vertical component + 128, // sign + {224, 144, 192, 168, 192, 176, 192, 198, 198, 245}, // class + {216}, // class0 + {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits + {{128, 128, 64}, {96, 112, 64}}, // class0_fp + {64, 96, 64}, // fp + 160, // class0_hp bit + 128, // hp + }, + { // Horizontal component + 128, // sign + {216, 128, 176, 160, 176, 176, 192, 198, 198, 208}, // class + {208}, // class0 + {136, 140, 148, 160, 176, 192, 224, 234, 234, 240}, // bits + {{128, 128, 64}, {96, 112, 64}}, // class0_fp + {64, 96, 64}, // fp + 160, // class0_hp bit + 128, // hp + } + }, +}; + +static const uint8_t log_in_base_2[] = { + 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10 +}; + +static INLINE int mv_class_base(MV_CLASS_TYPE c) { + return c ? CLASS0_SIZE << (c + 2) : 0; +} + +MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) { + const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ? + MV_CLASS_10 : (MV_CLASS_TYPE)log_in_base_2[z >> 3]; + if (offset) + *offset = z - mv_class_base(c); + return c; +} + +static void inc_mv_component(int v, nmv_component_counts *comp_counts, + int incr, int usehp) { + int s, z, c, o, d, e, f; + assert(v != 0); /* should not be zero */ + s = v < 0; + comp_counts->sign[s] += incr; + z = (s ? -v : v) - 1; /* magnitude - 1 */ + + c = vp9_get_mv_class(z, &o); + comp_counts->classes[c] += incr; + + d = (o >> 3); /* int mv data */ + f = (o >> 1) & 3; /* fractional pel mv data */ + e = (o & 1); /* high precision mv data */ + + if (c == MV_CLASS_0) { + comp_counts->class0[d] += incr; + comp_counts->class0_fp[d][f] += incr; + comp_counts->class0_hp[e] += usehp * incr; + } else { + int i; + int b = c + CLASS0_BITS - 1; // number of bits + for (i = 0; i < b; ++i) + comp_counts->bits[i][((d >> i) & 1)] += incr; + comp_counts->fp[f] += incr; + comp_counts->hp[e] += usehp * incr; + } +} + +void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) { + if (counts != NULL) { + const MV_JOINT_TYPE j = vp9_get_mv_joint(mv); + ++counts->joints[j]; + + if (mv_joint_vertical(j)) { + inc_mv_component(mv->row, &counts->comps[0], 1, 1); + } + + if (mv_joint_horizontal(j)) { + inc_mv_component(mv->col, &counts->comps[1], 1, 1); + } + } +} + +void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) { + int i, j; + + nmv_context *fc = &cm->fc->nmvc; + const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc; + const nmv_context_counts *counts = &cm->counts.mv; + + vpx_tree_merge_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints, + fc->joints); + + for (i = 0; i < 2; ++i) { + nmv_component *comp = &fc->comps[i]; + const nmv_component *pre_comp = &pre_fc->comps[i]; + const nmv_component_counts *c = &counts->comps[i]; + + comp->sign = mode_mv_merge_probs(pre_comp->sign, c->sign); + vpx_tree_merge_probs(vp9_mv_class_tree, pre_comp->classes, c->classes, + comp->classes); + vpx_tree_merge_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0, + comp->class0); + + for (j = 0; j < MV_OFFSET_BITS; ++j) + comp->bits[j] = mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]); + + for (j = 0; j < CLASS0_SIZE; ++j) + vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j], + c->class0_fp[j], comp->class0_fp[j]); + + vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp); + + if (allow_hp) { + comp->class0_hp = mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp); + comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp); + } + } +} + +void vp9_init_mv_probs(VP9_COMMON *cm) { + cm->fc->nmvc = default_nmv_context; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_entropymv.h b/thirdparty/libvpx/vp9/common/vp9_entropymv.h new file mode 100644 index 0000000000..2f05ad44b6 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_entropymv.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_COMMON_VP9_ENTROPYMV_H_ +#define VP9_COMMON_VP9_ENTROPYMV_H_ + +#include "./vpx_config.h" + +#include "vpx_dsp/prob.h" + +#include "vp9/common/vp9_mv.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9Common; + +void vp9_init_mv_probs(struct VP9Common *cm); + +void vp9_adapt_mv_probs(struct VP9Common *cm, int usehp); + +// Integer pel reference mv threshold for use of high-precision 1/8 mv +#define COMPANDED_MVREF_THRESH 8 + +static INLINE int use_mv_hp(const MV *ref) { + return (abs(ref->row) >> 3) < COMPANDED_MVREF_THRESH && + (abs(ref->col) >> 3) < COMPANDED_MVREF_THRESH; +} + +#define MV_UPDATE_PROB 252 + +/* Symbols for coding which components are zero jointly */ +#define MV_JOINTS 4 +typedef enum { + MV_JOINT_ZERO = 0, /* Zero vector */ + MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */ + MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */ + MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ +} MV_JOINT_TYPE; + +static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) { + return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ; +} + +static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) { + return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ; +} + +/* Symbols for coding magnitude class of nonzero components */ +#define MV_CLASSES 11 +typedef enum { + MV_CLASS_0 = 0, /* (0, 2] integer pel */ + MV_CLASS_1 = 1, /* (2, 4] integer pel */ + MV_CLASS_2 = 2, /* (4, 8] integer pel */ + MV_CLASS_3 = 3, /* (8, 16] integer pel */ + MV_CLASS_4 = 4, /* (16, 32] integer pel */ + MV_CLASS_5 = 5, /* (32, 64] integer pel */ + MV_CLASS_6 = 6, /* (64, 128] integer pel */ + MV_CLASS_7 = 7, /* (128, 256] integer pel */ + MV_CLASS_8 = 8, /* (256, 512] integer pel */ + MV_CLASS_9 = 9, /* (512, 1024] integer pel */ + MV_CLASS_10 = 10, /* (1024,2048] integer pel */ +} MV_CLASS_TYPE; + +#define CLASS0_BITS 1 /* bits at integer precision for class 0 */ +#define CLASS0_SIZE (1 << CLASS0_BITS) +#define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2) +#define MV_FP_SIZE 4 + +#define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2) +#define MV_MAX ((1 << MV_MAX_BITS) - 1) +#define MV_VALS ((MV_MAX << 1) + 1) + +#define MV_IN_USE_BITS 14 +#define MV_UPP ((1 << MV_IN_USE_BITS) - 1) +#define MV_LOW (-(1 << MV_IN_USE_BITS)) + +extern const vpx_tree_index vp9_mv_joint_tree[]; +extern const vpx_tree_index vp9_mv_class_tree[]; +extern const vpx_tree_index vp9_mv_class0_tree[]; +extern const vpx_tree_index vp9_mv_fp_tree[]; + +typedef struct { + vpx_prob sign; + vpx_prob classes[MV_CLASSES - 1]; + vpx_prob class0[CLASS0_SIZE - 1]; + vpx_prob bits[MV_OFFSET_BITS]; + vpx_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1]; + vpx_prob fp[MV_FP_SIZE - 1]; + vpx_prob class0_hp; + vpx_prob hp; +} nmv_component; + +typedef struct { + vpx_prob joints[MV_JOINTS - 1]; + nmv_component comps[2]; +} nmv_context; + +static INLINE MV_JOINT_TYPE vp9_get_mv_joint(const MV *mv) { + if (mv->row == 0) { + return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ; + } else { + return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ; + } +} + +MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset); + +typedef struct { + unsigned int sign[2]; + unsigned int classes[MV_CLASSES]; + unsigned int class0[CLASS0_SIZE]; + unsigned int bits[MV_OFFSET_BITS][2]; + unsigned int class0_fp[CLASS0_SIZE][MV_FP_SIZE]; + unsigned int fp[MV_FP_SIZE]; + unsigned int class0_hp[2]; + unsigned int hp[2]; +} nmv_component_counts; + +typedef struct { + unsigned int joints[MV_JOINTS]; + nmv_component_counts comps[2]; +} nmv_context_counts; + +void vp9_inc_mv(const MV *mv, nmv_context_counts *mvctx); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ENTROPYMV_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_enums.h b/thirdparty/libvpx/vp9/common/vp9_enums.h new file mode 100644 index 0000000000..d089f23f97 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_enums.h @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_ENUMS_H_ +#define VP9_COMMON_VP9_ENUMS_H_ + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MI_SIZE_LOG2 3 +#define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6 + +#define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit +#define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block + +#define MI_MASK (MI_BLOCK_SIZE - 1) + +// Bitstream profiles indicated by 2-3 bits in the uncompressed header. +// 00: Profile 0. 8-bit 4:2:0 only. +// 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0. +// 01: Profile 2. 10-bit and 12-bit color only, with 4:2:0 sampling. +// 110: Profile 3. 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0 +// sampling. +// 111: Undefined profile. +typedef enum BITSTREAM_PROFILE { + PROFILE_0, + PROFILE_1, + PROFILE_2, + PROFILE_3, + MAX_PROFILES +} BITSTREAM_PROFILE; + +#define BLOCK_4X4 0 +#define BLOCK_4X8 1 +#define BLOCK_8X4 2 +#define BLOCK_8X8 3 +#define BLOCK_8X16 4 +#define BLOCK_16X8 5 +#define BLOCK_16X16 6 +#define BLOCK_16X32 7 +#define BLOCK_32X16 8 +#define BLOCK_32X32 9 +#define BLOCK_32X64 10 +#define BLOCK_64X32 11 +#define BLOCK_64X64 12 +#define BLOCK_SIZES 13 +#define BLOCK_INVALID BLOCK_SIZES +typedef uint8_t BLOCK_SIZE; + +typedef enum PARTITION_TYPE { + PARTITION_NONE, + PARTITION_HORZ, + PARTITION_VERT, + PARTITION_SPLIT, + PARTITION_TYPES, + PARTITION_INVALID = PARTITION_TYPES +} PARTITION_TYPE; + +typedef char PARTITION_CONTEXT; +#define PARTITION_PLOFFSET 4 // number of probability models per block size +#define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) + +// block transform size +typedef uint8_t TX_SIZE; +#define TX_4X4 ((TX_SIZE)0) // 4x4 transform +#define TX_8X8 ((TX_SIZE)1) // 8x8 transform +#define TX_16X16 ((TX_SIZE)2) // 16x16 transform +#define TX_32X32 ((TX_SIZE)3) // 32x32 transform +#define TX_SIZES ((TX_SIZE)4) + +// frame transform mode +typedef enum { + ONLY_4X4 = 0, // only 4x4 transform used + ALLOW_8X8 = 1, // allow block transform size up to 8x8 + ALLOW_16X16 = 2, // allow block transform size up to 16x16 + ALLOW_32X32 = 3, // allow block transform size up to 32x32 + TX_MODE_SELECT = 4, // transform specified for each block + TX_MODES = 5, +} TX_MODE; + +typedef enum { + DCT_DCT = 0, // DCT in both horizontal and vertical + ADST_DCT = 1, // ADST in vertical, DCT in horizontal + DCT_ADST = 2, // DCT in vertical, ADST in horizontal + ADST_ADST = 3, // ADST in both directions + TX_TYPES = 4 +} TX_TYPE; + +typedef enum { + VP9_LAST_FLAG = 1 << 0, + VP9_GOLD_FLAG = 1 << 1, + VP9_ALT_FLAG = 1 << 2, +} VP9_REFFRAME; + +typedef enum { + PLANE_TYPE_Y = 0, + PLANE_TYPE_UV = 1, + PLANE_TYPES +} PLANE_TYPE; + +#define DC_PRED 0 // Average of above and left pixels +#define V_PRED 1 // Vertical +#define H_PRED 2 // Horizontal +#define D45_PRED 3 // Directional 45 deg = round(arctan(1/1) * 180/pi) +#define D135_PRED 4 // Directional 135 deg = 180 - 45 +#define D117_PRED 5 // Directional 117 deg = 180 - 63 +#define D153_PRED 6 // Directional 153 deg = 180 - 27 +#define D207_PRED 7 // Directional 207 deg = 180 + 27 +#define D63_PRED 8 // Directional 63 deg = round(arctan(2/1) * 180/pi) +#define TM_PRED 9 // True-motion +#define NEARESTMV 10 +#define NEARMV 11 +#define ZEROMV 12 +#define NEWMV 13 +#define MB_MODE_COUNT 14 +typedef uint8_t PREDICTION_MODE; + +#define INTRA_MODES (TM_PRED + 1) + +#define INTER_MODES (1 + NEWMV - NEARESTMV) + +#define SKIP_CONTEXTS 3 +#define INTER_MODE_CONTEXTS 7 + +/* Segment Feature Masks */ +#define MAX_MV_REF_CANDIDATES 2 + +#define INTRA_INTER_CONTEXTS 4 +#define COMP_INTER_CONTEXTS 5 +#define REF_CONTEXTS 5 + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ENUMS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_filter.c b/thirdparty/libvpx/vp9/common/vp9_filter.c new file mode 100644 index 0000000000..4b2198fc40 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_filter.c @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> + +#include "vp9/common/vp9_filter.h" + +DECLARE_ALIGNED(256, static const InterpKernel, + bilinear_filters[SUBPEL_SHIFTS]) = { + { 0, 0, 0, 128, 0, 0, 0, 0 }, + { 0, 0, 0, 120, 8, 0, 0, 0 }, + { 0, 0, 0, 112, 16, 0, 0, 0 }, + { 0, 0, 0, 104, 24, 0, 0, 0 }, + { 0, 0, 0, 96, 32, 0, 0, 0 }, + { 0, 0, 0, 88, 40, 0, 0, 0 }, + { 0, 0, 0, 80, 48, 0, 0, 0 }, + { 0, 0, 0, 72, 56, 0, 0, 0 }, + { 0, 0, 0, 64, 64, 0, 0, 0 }, + { 0, 0, 0, 56, 72, 0, 0, 0 }, + { 0, 0, 0, 48, 80, 0, 0, 0 }, + { 0, 0, 0, 40, 88, 0, 0, 0 }, + { 0, 0, 0, 32, 96, 0, 0, 0 }, + { 0, 0, 0, 24, 104, 0, 0, 0 }, + { 0, 0, 0, 16, 112, 0, 0, 0 }, + { 0, 0, 0, 8, 120, 0, 0, 0 } +}; + +// Lagrangian interpolation filter +DECLARE_ALIGNED(256, static const InterpKernel, + sub_pel_filters_8[SUBPEL_SHIFTS]) = { + { 0, 0, 0, 128, 0, 0, 0, 0}, + { 0, 1, -5, 126, 8, -3, 1, 0}, + { -1, 3, -10, 122, 18, -6, 2, 0}, + { -1, 4, -13, 118, 27, -9, 3, -1}, + { -1, 4, -16, 112, 37, -11, 4, -1}, + { -1, 5, -18, 105, 48, -14, 4, -1}, + { -1, 5, -19, 97, 58, -16, 5, -1}, + { -1, 6, -19, 88, 68, -18, 5, -1}, + { -1, 6, -19, 78, 78, -19, 6, -1}, + { -1, 5, -18, 68, 88, -19, 6, -1}, + { -1, 5, -16, 58, 97, -19, 5, -1}, + { -1, 4, -14, 48, 105, -18, 5, -1}, + { -1, 4, -11, 37, 112, -16, 4, -1}, + { -1, 3, -9, 27, 118, -13, 4, -1}, + { 0, 2, -6, 18, 122, -10, 3, -1}, + { 0, 1, -3, 8, 126, -5, 1, 0} +}; + +// DCT based filter +DECLARE_ALIGNED(256, static const InterpKernel, + sub_pel_filters_8s[SUBPEL_SHIFTS]) = { + {0, 0, 0, 128, 0, 0, 0, 0}, + {-1, 3, -7, 127, 8, -3, 1, 0}, + {-2, 5, -13, 125, 17, -6, 3, -1}, + {-3, 7, -17, 121, 27, -10, 5, -2}, + {-4, 9, -20, 115, 37, -13, 6, -2}, + {-4, 10, -23, 108, 48, -16, 8, -3}, + {-4, 10, -24, 100, 59, -19, 9, -3}, + {-4, 11, -24, 90, 70, -21, 10, -4}, + {-4, 11, -23, 80, 80, -23, 11, -4}, + {-4, 10, -21, 70, 90, -24, 11, -4}, + {-3, 9, -19, 59, 100, -24, 10, -4}, + {-3, 8, -16, 48, 108, -23, 10, -4}, + {-2, 6, -13, 37, 115, -20, 9, -4}, + {-2, 5, -10, 27, 121, -17, 7, -3}, + {-1, 3, -6, 17, 125, -13, 5, -2}, + {0, 1, -3, 8, 127, -7, 3, -1} +}; + +// freqmultiplier = 0.5 +DECLARE_ALIGNED(256, static const InterpKernel, + sub_pel_filters_8lp[SUBPEL_SHIFTS]) = { + { 0, 0, 0, 128, 0, 0, 0, 0}, + {-3, -1, 32, 64, 38, 1, -3, 0}, + {-2, -2, 29, 63, 41, 2, -3, 0}, + {-2, -2, 26, 63, 43, 4, -4, 0}, + {-2, -3, 24, 62, 46, 5, -4, 0}, + {-2, -3, 21, 60, 49, 7, -4, 0}, + {-1, -4, 18, 59, 51, 9, -4, 0}, + {-1, -4, 16, 57, 53, 12, -4, -1}, + {-1, -4, 14, 55, 55, 14, -4, -1}, + {-1, -4, 12, 53, 57, 16, -4, -1}, + { 0, -4, 9, 51, 59, 18, -4, -1}, + { 0, -4, 7, 49, 60, 21, -3, -2}, + { 0, -4, 5, 46, 62, 24, -3, -2}, + { 0, -4, 4, 43, 63, 26, -2, -2}, + { 0, -3, 2, 41, 63, 29, -2, -2}, + { 0, -3, 1, 38, 64, 32, -1, -3} +}; + + +const InterpKernel *vp9_filter_kernels[4] = { + sub_pel_filters_8, + sub_pel_filters_8lp, + sub_pel_filters_8s, + bilinear_filters +}; diff --git a/thirdparty/libvpx/vp9/common/vp9_filter.h b/thirdparty/libvpx/vp9/common/vp9_filter.h new file mode 100644 index 0000000000..efa24bc67b --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_filter.h @@ -0,0 +1,42 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_FILTER_H_ +#define VP9_COMMON_VP9_FILTER_H_ + +#include "./vpx_config.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_filter.h" +#include "vpx_ports/mem.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +#define EIGHTTAP 0 +#define EIGHTTAP_SMOOTH 1 +#define EIGHTTAP_SHARP 2 +#define SWITCHABLE_FILTERS 3 /* Number of switchable filters */ +#define BILINEAR 3 +// The codec can operate in four possible inter prediction filter mode: +// 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three. +#define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) +#define SWITCHABLE 4 /* should be the last one */ + +typedef uint8_t INTERP_FILTER; + +extern const InterpKernel *vp9_filter_kernels[4]; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_FILTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c new file mode 100644 index 0000000000..0f41d66985 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.c @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> + +#include "vp9/common/vp9_frame_buffers.h" +#include "vpx_mem/vpx_mem.h" + +int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) { + assert(list != NULL); + vp9_free_internal_frame_buffers(list); + + list->num_internal_frame_buffers = + VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; + list->int_fb = + (InternalFrameBuffer *)vpx_calloc(list->num_internal_frame_buffers, + sizeof(*list->int_fb)); + return (list->int_fb == NULL); +} + +void vp9_free_internal_frame_buffers(InternalFrameBufferList *list) { + int i; + + assert(list != NULL); + + for (i = 0; i < list->num_internal_frame_buffers; ++i) { + vpx_free(list->int_fb[i].data); + list->int_fb[i].data = NULL; + } + vpx_free(list->int_fb); + list->int_fb = NULL; +} + +int vp9_get_frame_buffer(void *cb_priv, size_t min_size, + vpx_codec_frame_buffer_t *fb) { + int i; + InternalFrameBufferList *const int_fb_list = + (InternalFrameBufferList *)cb_priv; + if (int_fb_list == NULL) + return -1; + + // Find a free frame buffer. + for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) { + if (!int_fb_list->int_fb[i].in_use) + break; + } + + if (i == int_fb_list->num_internal_frame_buffers) + return -1; + + if (int_fb_list->int_fb[i].size < min_size) { + int_fb_list->int_fb[i].data = + (uint8_t *)vpx_realloc(int_fb_list->int_fb[i].data, min_size); + if (!int_fb_list->int_fb[i].data) + return -1; + + // This memset is needed for fixing valgrind error from C loop filter + // due to access uninitialized memory in frame border. It could be + // removed if border is totally removed. + memset(int_fb_list->int_fb[i].data, 0, min_size); + int_fb_list->int_fb[i].size = min_size; + } + + fb->data = int_fb_list->int_fb[i].data; + fb->size = int_fb_list->int_fb[i].size; + int_fb_list->int_fb[i].in_use = 1; + + // Set the frame buffer's private data to point at the internal frame buffer. + fb->priv = &int_fb_list->int_fb[i]; + return 0; +} + +int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) { + InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv; + (void)cb_priv; + if (int_fb) + int_fb->in_use = 0; + return 0; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h new file mode 100644 index 0000000000..e2cfe61b66 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_frame_buffers.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_FRAME_BUFFERS_H_ +#define VP9_COMMON_VP9_FRAME_BUFFERS_H_ + +#include "vpx/vpx_frame_buffer.h" +#include "vpx/vpx_integer.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct InternalFrameBuffer { + uint8_t *data; + size_t size; + int in_use; +} InternalFrameBuffer; + +typedef struct InternalFrameBufferList { + int num_internal_frame_buffers; + InternalFrameBuffer *int_fb; +} InternalFrameBufferList; + +// Initializes |list|. Returns 0 on success. +int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list); + +// Free any data allocated to the frame buffers. +void vp9_free_internal_frame_buffers(InternalFrameBufferList *list); + +// Callback used by libvpx to request an external frame buffer. |cb_priv| +// Callback private data, which points to an InternalFrameBufferList. +// |min_size| is the minimum size in bytes needed to decode the next frame. +// |fb| pointer to the frame buffer. +int vp9_get_frame_buffer(void *cb_priv, size_t min_size, + vpx_codec_frame_buffer_t *fb); + +// Callback used by libvpx when there are no references to the frame buffer. +// |cb_priv| is not used. |fb| pointer to the frame buffer. +int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_FRAME_BUFFERS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_idct.c b/thirdparty/libvpx/vp9/common/vp9_idct.c new file mode 100644 index 0000000000..1b420143bb --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_idct.c @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <math.h> + +#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_idct.h" +#include "vpx_dsp/inv_txfm.h" +#include "vpx_ports/mem.h" + +void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + const transform_2d IHT_4[] = { + { idct4_c, idct4_c }, // DCT_DCT = 0 + { iadst4_c, idct4_c }, // ADST_DCT = 1 + { idct4_c, iadst4_c }, // DCT_ADST = 2 + { iadst4_c, iadst4_c } // ADST_ADST = 3 + }; + + int i, j; + tran_low_t out[4 * 4]; + tran_low_t *outptr = out; + tran_low_t temp_in[4], temp_out[4]; + + // inverse transform row vectors + for (i = 0; i < 4; ++i) { + IHT_4[tx_type].rows(input, outptr); + input += 4; + outptr += 4; + } + + // inverse transform column vectors + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = out[j * 4 + i]; + IHT_4[tx_type].cols(temp_in, temp_out); + for (j = 0; j < 4; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 4)); + } + } +} + +static const transform_2d IHT_8[] = { + { idct8_c, idct8_c }, // DCT_DCT = 0 + { iadst8_c, idct8_c }, // ADST_DCT = 1 + { idct8_c, iadst8_c }, // DCT_ADST = 2 + { iadst8_c, iadst8_c } // ADST_ADST = 3 +}; + +void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + int i, j; + tran_low_t out[8 * 8]; + tran_low_t *outptr = out; + tran_low_t temp_in[8], temp_out[8]; + const transform_2d ht = IHT_8[tx_type]; + + // inverse transform row vectors + for (i = 0; i < 8; ++i) { + ht.rows(input, outptr); + input += 8; + outptr += 8; + } + + // inverse transform column vectors + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j * 8 + i]; + ht.cols(temp_in, temp_out); + for (j = 0; j < 8; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 5)); + } + } +} + +static const transform_2d IHT_16[] = { + { idct16_c, idct16_c }, // DCT_DCT = 0 + { iadst16_c, idct16_c }, // ADST_DCT = 1 + { idct16_c, iadst16_c }, // DCT_ADST = 2 + { iadst16_c, iadst16_c } // ADST_ADST = 3 +}; + +void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + int i, j; + tran_low_t out[16 * 16]; + tran_low_t *outptr = out; + tran_low_t temp_in[16], temp_out[16]; + const transform_2d ht = IHT_16[tx_type]; + + // Rows + for (i = 0; i < 16; ++i) { + ht.rows(input, outptr); + input += 16; + outptr += 16; + } + + // Columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j * 16 + i]; + ht.cols(temp_in, temp_out); + for (j = 0; j < 16; ++j) { + dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], + ROUND_POWER_OF_TWO(temp_out[j], 6)); + } + } +} + +// idct +void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob) { + if (eob > 1) + vpx_idct4x4_16_add(input, dest, stride); + else + vpx_idct4x4_1_add(input, dest, stride); +} + + +void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob) { + if (eob > 1) + vpx_iwht4x4_16_add(input, dest, stride); + else + vpx_iwht4x4_1_add(input, dest, stride); +} + +void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob) { + // If dc is 1, then input[0] is the reconstructed value, do not need + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to decide what to do. + // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. + // Combine that with code here. + if (eob == 1) + // DC only DCT coefficient + vpx_idct8x8_1_add(input, dest, stride); + else if (eob <= 12) + vpx_idct8x8_12_add(input, dest, stride); + else + vpx_idct8x8_64_add(input, dest, stride); +} + +void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob) { + /* The calculation can be simplified if there are not many non-zero dct + * coefficients. Use eobs to separate different cases. */ + if (eob == 1) + /* DC only DCT coefficient. */ + vpx_idct16x16_1_add(input, dest, stride); + else if (eob <= 10) + vpx_idct16x16_10_add(input, dest, stride); + else + vpx_idct16x16_256_add(input, dest, stride); +} + +void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob) { + if (eob == 1) + vpx_idct32x32_1_add(input, dest, stride); + else if (eob <= 34) + // non-zero coeff only in upper-left 8x8 + vpx_idct32x32_34_add(input, dest, stride); + else if (eob <= 135) + // non-zero coeff only in upper-left 16x16 + vpx_idct32x32_135_add(input, dest, stride); + else + vpx_idct32x32_1024_add(input, dest, stride); +} + +// iht +void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob) { + if (tx_type == DCT_DCT) + vp9_idct4x4_add(input, dest, stride, eob); + else + vp9_iht4x4_16_add(input, dest, stride, tx_type); +} + +void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob) { + if (tx_type == DCT_DCT) { + vp9_idct8x8_add(input, dest, stride, eob); + } else { + vp9_iht8x8_64_add(input, dest, stride, tx_type); + } +} + +void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob) { + if (tx_type == DCT_DCT) { + vp9_idct16x16_add(input, dest, stride, eob); + } else { + vp9_iht16x16_256_add(input, dest, stride, tx_type); + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int tx_type, int bd) { + const highbd_transform_2d IHT_4[] = { + { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 + { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 + { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 + { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 + }; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + int i, j; + tran_low_t out[4 * 4]; + tran_low_t *outptr = out; + tran_low_t temp_in[4], temp_out[4]; + + // Inverse transform row vectors. + for (i = 0; i < 4; ++i) { + IHT_4[tx_type].rows(input, outptr, bd); + input += 4; + outptr += 4; + } + + // Inverse transform column vectors. + for (i = 0; i < 4; ++i) { + for (j = 0; j < 4; ++j) + temp_in[j] = out[j * 4 + i]; + IHT_4[tx_type].cols(temp_in, temp_out, bd); + for (j = 0; j < 4; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); + } + } +} + +static const highbd_transform_2d HIGH_IHT_8[] = { + { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 + { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 + { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 + { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3 +}; + +void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int tx_type, int bd) { + int i, j; + tran_low_t out[8 * 8]; + tran_low_t *outptr = out; + tran_low_t temp_in[8], temp_out[8]; + const highbd_transform_2d ht = HIGH_IHT_8[tx_type]; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + // Inverse transform row vectors. + for (i = 0; i < 8; ++i) { + ht.rows(input, outptr, bd); + input += 8; + outptr += 8; + } + + // Inverse transform column vectors. + for (i = 0; i < 8; ++i) { + for (j = 0; j < 8; ++j) + temp_in[j] = out[j * 8 + i]; + ht.cols(temp_in, temp_out, bd); + for (j = 0; j < 8; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); + } + } +} + +static const highbd_transform_2d HIGH_IHT_16[] = { + { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 + { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 + { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 + { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3 +}; + +void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest8, + int stride, int tx_type, int bd) { + int i, j; + tran_low_t out[16 * 16]; + tran_low_t *outptr = out; + tran_low_t temp_in[16], temp_out[16]; + const highbd_transform_2d ht = HIGH_IHT_16[tx_type]; + uint16_t *dest = CONVERT_TO_SHORTPTR(dest8); + + // Rows + for (i = 0; i < 16; ++i) { + ht.rows(input, outptr, bd); + input += 16; + outptr += 16; + } + + // Columns + for (i = 0; i < 16; ++i) { + for (j = 0; j < 16; ++j) + temp_in[j] = out[j * 16 + i]; + ht.cols(temp_in, temp_out, bd); + for (j = 0; j < 16; ++j) { + dest[j * stride + i] = highbd_clip_pixel_add( + dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); + } + } +} + +// idct +void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd) { + if (eob > 1) + vpx_highbd_idct4x4_16_add(input, dest, stride, bd); + else + vpx_highbd_idct4x4_1_add(input, dest, stride, bd); +} + + +void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd) { + if (eob > 1) + vpx_highbd_iwht4x4_16_add(input, dest, stride, bd); + else + vpx_highbd_iwht4x4_1_add(input, dest, stride, bd); +} + +void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd) { + // If dc is 1, then input[0] is the reconstructed value, do not need + // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. + + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to decide what to do. + // TODO(yunqingwang): "eobs = 1" case is also handled in vp9_short_idct8x8_c. + // Combine that with code here. + // DC only DCT coefficient + if (eob == 1) { + vpx_highbd_idct8x8_1_add(input, dest, stride, bd); + } else if (eob <= 10) { + vpx_highbd_idct8x8_10_add(input, dest, stride, bd); + } else { + vpx_highbd_idct8x8_64_add(input, dest, stride, bd); + } +} + +void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, + int stride, int eob, int bd) { + // The calculation can be simplified if there are not many non-zero dct + // coefficients. Use eobs to separate different cases. + // DC only DCT coefficient. + if (eob == 1) { + vpx_highbd_idct16x16_1_add(input, dest, stride, bd); + } else if (eob <= 10) { + vpx_highbd_idct16x16_10_add(input, dest, stride, bd); + } else { + vpx_highbd_idct16x16_256_add(input, dest, stride, bd); + } +} + +void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, + int stride, int eob, int bd) { + // Non-zero coeff only in upper-left 8x8 + if (eob == 1) { + vpx_highbd_idct32x32_1_add(input, dest, stride, bd); + } else if (eob <= 34) { + vpx_highbd_idct32x32_34_add(input, dest, stride, bd); + } else { + vpx_highbd_idct32x32_1024_add(input, dest, stride, bd); + } +} + +// iht +void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd) { + if (tx_type == DCT_DCT) + vp9_highbd_idct4x4_add(input, dest, stride, eob, bd); + else + vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); +} + +void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd) { + if (tx_type == DCT_DCT) { + vp9_highbd_idct8x8_add(input, dest, stride, eob, bd); + } else { + vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); + } +} + +void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd) { + if (tx_type == DCT_DCT) { + vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); + } else { + vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH diff --git a/thirdparty/libvpx/vp9/common/vp9_idct.h b/thirdparty/libvpx/vp9/common/vp9_idct.h new file mode 100644 index 0000000000..b5a3fbf362 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_idct.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_IDCT_H_ +#define VP9_COMMON_VP9_IDCT_H_ + +#include <assert.h> + +#include "./vpx_config.h" +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_enums.h" +#include "vpx_dsp/inv_txfm.h" +#include "vpx_dsp/txfm_common.h" +#include "vpx_ports/mem.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef void (*transform_1d)(const tran_low_t*, tran_low_t*); + +typedef struct { + transform_1d cols, rows; // vertical and horizontal +} transform_2d; + +#if CONFIG_VP9_HIGHBITDEPTH +typedef void (*highbd_transform_1d)(const tran_low_t*, tran_low_t*, int bd); + +typedef struct { + highbd_transform_1d cols, rows; // vertical and horizontal +} highbd_transform_2d; +#endif // CONFIG_VP9_HIGHBITDEPTH + +void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob); +void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob); +void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob); +void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob); +void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob); + +void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob); +void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob); +void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, + int stride, int eob); + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd); +void vp9_highbd_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd); +void vp9_highbd_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, + int eob, int bd); +void vp9_highbd_idct16x16_add(const tran_low_t *input, uint8_t *dest, + int stride, int eob, int bd); +void vp9_highbd_idct32x32_add(const tran_low_t *input, uint8_t *dest, + int stride, int eob, int bd); +void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd); +void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd); +void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, + uint8_t *dest, int stride, int eob, int bd); +#endif // CONFIG_VP9_HIGHBITDEPTH +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_IDCT_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_loopfilter.c b/thirdparty/libvpx/vp9/common/vp9_loopfilter.c new file mode 100644 index 0000000000..183dec4e71 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_loopfilter.c @@ -0,0 +1,1697 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" +#include "vp9/common/vp9_loopfilter.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_reconinter.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" + +#include "vp9/common/vp9_seg_common.h" + +// 64 bit masks for left transform size. Each 1 represents a position where +// we should apply a loop filter across the left border of an 8x8 block +// boundary. +// +// In the case of TX_16X16-> ( in low order byte first we end up with +// a mask that looks like this +// +// 10101010 +// 10101010 +// 10101010 +// 10101010 +// 10101010 +// 10101010 +// 10101010 +// 10101010 +// +// A loopfilter should be applied to every other 8x8 horizontally. +static const uint64_t left_64x64_txform_mask[TX_SIZES]= { + 0xffffffffffffffffULL, // TX_4X4 + 0xffffffffffffffffULL, // TX_8x8 + 0x5555555555555555ULL, // TX_16x16 + 0x1111111111111111ULL, // TX_32x32 +}; + +// 64 bit masks for above transform size. Each 1 represents a position where +// we should apply a loop filter across the top border of an 8x8 block +// boundary. +// +// In the case of TX_32x32 -> ( in low order byte first we end up with +// a mask that looks like this +// +// 11111111 +// 00000000 +// 00000000 +// 00000000 +// 11111111 +// 00000000 +// 00000000 +// 00000000 +// +// A loopfilter should be applied to every other 4 the row vertically. +static const uint64_t above_64x64_txform_mask[TX_SIZES]= { + 0xffffffffffffffffULL, // TX_4X4 + 0xffffffffffffffffULL, // TX_8x8 + 0x00ff00ff00ff00ffULL, // TX_16x16 + 0x000000ff000000ffULL, // TX_32x32 +}; + +// 64 bit masks for prediction sizes (left). Each 1 represents a position +// where left border of an 8x8 block. These are aligned to the right most +// appropriate bit, and then shifted into place. +// +// In the case of TX_16x32 -> ( low order byte first ) we end up with +// a mask that looks like this : +// +// 10000000 +// 10000000 +// 10000000 +// 10000000 +// 00000000 +// 00000000 +// 00000000 +// 00000000 +static const uint64_t left_prediction_mask[BLOCK_SIZES] = { + 0x0000000000000001ULL, // BLOCK_4X4, + 0x0000000000000001ULL, // BLOCK_4X8, + 0x0000000000000001ULL, // BLOCK_8X4, + 0x0000000000000001ULL, // BLOCK_8X8, + 0x0000000000000101ULL, // BLOCK_8X16, + 0x0000000000000001ULL, // BLOCK_16X8, + 0x0000000000000101ULL, // BLOCK_16X16, + 0x0000000001010101ULL, // BLOCK_16X32, + 0x0000000000000101ULL, // BLOCK_32X16, + 0x0000000001010101ULL, // BLOCK_32X32, + 0x0101010101010101ULL, // BLOCK_32X64, + 0x0000000001010101ULL, // BLOCK_64X32, + 0x0101010101010101ULL, // BLOCK_64X64 +}; + +// 64 bit mask to shift and set for each prediction size. +static const uint64_t above_prediction_mask[BLOCK_SIZES] = { + 0x0000000000000001ULL, // BLOCK_4X4 + 0x0000000000000001ULL, // BLOCK_4X8 + 0x0000000000000001ULL, // BLOCK_8X4 + 0x0000000000000001ULL, // BLOCK_8X8 + 0x0000000000000001ULL, // BLOCK_8X16, + 0x0000000000000003ULL, // BLOCK_16X8 + 0x0000000000000003ULL, // BLOCK_16X16 + 0x0000000000000003ULL, // BLOCK_16X32, + 0x000000000000000fULL, // BLOCK_32X16, + 0x000000000000000fULL, // BLOCK_32X32, + 0x000000000000000fULL, // BLOCK_32X64, + 0x00000000000000ffULL, // BLOCK_64X32, + 0x00000000000000ffULL, // BLOCK_64X64 +}; +// 64 bit mask to shift and set for each prediction size. A bit is set for +// each 8x8 block that would be in the left most block of the given block +// size in the 64x64 block. +static const uint64_t size_mask[BLOCK_SIZES] = { + 0x0000000000000001ULL, // BLOCK_4X4 + 0x0000000000000001ULL, // BLOCK_4X8 + 0x0000000000000001ULL, // BLOCK_8X4 + 0x0000000000000001ULL, // BLOCK_8X8 + 0x0000000000000101ULL, // BLOCK_8X16, + 0x0000000000000003ULL, // BLOCK_16X8 + 0x0000000000000303ULL, // BLOCK_16X16 + 0x0000000003030303ULL, // BLOCK_16X32, + 0x0000000000000f0fULL, // BLOCK_32X16, + 0x000000000f0f0f0fULL, // BLOCK_32X32, + 0x0f0f0f0f0f0f0f0fULL, // BLOCK_32X64, + 0x00000000ffffffffULL, // BLOCK_64X32, + 0xffffffffffffffffULL, // BLOCK_64X64 +}; + +// These are used for masking the left and above borders. +static const uint64_t left_border = 0x1111111111111111ULL; +static const uint64_t above_border = 0x000000ff000000ffULL; + +// 16 bit masks for uv transform sizes. +static const uint16_t left_64x64_txform_mask_uv[TX_SIZES]= { + 0xffff, // TX_4X4 + 0xffff, // TX_8x8 + 0x5555, // TX_16x16 + 0x1111, // TX_32x32 +}; + +static const uint16_t above_64x64_txform_mask_uv[TX_SIZES]= { + 0xffff, // TX_4X4 + 0xffff, // TX_8x8 + 0x0f0f, // TX_16x16 + 0x000f, // TX_32x32 +}; + +// 16 bit left mask to shift and set for each uv prediction size. +static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = { + 0x0001, // BLOCK_4X4, + 0x0001, // BLOCK_4X8, + 0x0001, // BLOCK_8X4, + 0x0001, // BLOCK_8X8, + 0x0001, // BLOCK_8X16, + 0x0001, // BLOCK_16X8, + 0x0001, // BLOCK_16X16, + 0x0011, // BLOCK_16X32, + 0x0001, // BLOCK_32X16, + 0x0011, // BLOCK_32X32, + 0x1111, // BLOCK_32X64 + 0x0011, // BLOCK_64X32, + 0x1111, // BLOCK_64X64 +}; +// 16 bit above mask to shift and set for uv each prediction size. +static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = { + 0x0001, // BLOCK_4X4 + 0x0001, // BLOCK_4X8 + 0x0001, // BLOCK_8X4 + 0x0001, // BLOCK_8X8 + 0x0001, // BLOCK_8X16, + 0x0001, // BLOCK_16X8 + 0x0001, // BLOCK_16X16 + 0x0001, // BLOCK_16X32, + 0x0003, // BLOCK_32X16, + 0x0003, // BLOCK_32X32, + 0x0003, // BLOCK_32X64, + 0x000f, // BLOCK_64X32, + 0x000f, // BLOCK_64X64 +}; + +// 64 bit mask to shift and set for each uv prediction size +static const uint16_t size_mask_uv[BLOCK_SIZES] = { + 0x0001, // BLOCK_4X4 + 0x0001, // BLOCK_4X8 + 0x0001, // BLOCK_8X4 + 0x0001, // BLOCK_8X8 + 0x0001, // BLOCK_8X16, + 0x0001, // BLOCK_16X8 + 0x0001, // BLOCK_16X16 + 0x0011, // BLOCK_16X32, + 0x0003, // BLOCK_32X16, + 0x0033, // BLOCK_32X32, + 0x3333, // BLOCK_32X64, + 0x00ff, // BLOCK_64X32, + 0xffff, // BLOCK_64X64 +}; +static const uint16_t left_border_uv = 0x1111; +static const uint16_t above_border_uv = 0x000f; + +static const int mode_lf_lut[MB_MODE_COUNT] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES + 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) +}; + +static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { + int lvl; + + // For each possible value for the loop filter fill out limits + for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { + // Set loop filter parameters that control sharpness. + int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); + + if (sharpness_lvl > 0) { + if (block_inside_limit > (9 - sharpness_lvl)) + block_inside_limit = (9 - sharpness_lvl); + } + + if (block_inside_limit < 1) + block_inside_limit = 1; + + memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); + memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), + SIMD_WIDTH); + } +} + +static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, + const MODE_INFO *mi) { + return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]] + [mode_lf_lut[mi->mode]]; +} + +void vp9_loop_filter_init(VP9_COMMON *cm) { + loop_filter_info_n *lfi = &cm->lf_info; + struct loopfilter *lf = &cm->lf; + int lvl; + + // init limits for given sharpness + update_sharpness(lfi, lf->sharpness_level); + lf->last_sharpness_level = lf->sharpness_level; + + // init hev threshold const vectors + for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) + memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); +} + +void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { + int seg_id; + // n_shift is the multiplier for lf_deltas + // the multiplier is 1 for when filter_lvl is between 0 and 31; + // 2 when filter_lvl is between 32 and 63 + const int scale = 1 << (default_filt_lvl >> 5); + loop_filter_info_n *const lfi = &cm->lf_info; + struct loopfilter *const lf = &cm->lf; + const struct segmentation *const seg = &cm->seg; + + // update limits if sharpness has changed + if (lf->last_sharpness_level != lf->sharpness_level) { + update_sharpness(lfi, lf->sharpness_level); + lf->last_sharpness_level = lf->sharpness_level; + } + + for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { + int lvl_seg = default_filt_lvl; + if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { + const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF); + lvl_seg = clamp(seg->abs_delta == SEGMENT_ABSDATA ? + data : default_filt_lvl + data, + 0, MAX_LOOP_FILTER); + } + + if (!lf->mode_ref_delta_enabled) { + // we could get rid of this if we assume that deltas are set to + // zero when not in use; encoder always uses deltas + memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); + } else { + int ref, mode; + const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; + lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); + + for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { + for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { + const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale + + lf->mode_deltas[mode] * scale; + lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); + } + } + } + } +} + +static void filter_selectively_vert_row2(int subsampling_factor, + uint8_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl) { + const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; + const int lfl_forward = subsampling_factor ? 4 : 8; + const unsigned int dual_one = 1 | (1 << lfl_forward); + unsigned int mask; + uint8_t *ss[2]; + ss[0] = s; + + for (mask = + (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; + mask; mask = (mask & ~dual_one) >> 1) { + if (mask & dual_one) { + const loop_filter_thresh *lfis[2]; + lfis[0] = lfthr + *lfl; + lfis[1] = lfthr + *(lfl + lfl_forward); + ss[1] = ss[0] + 8 * pitch; + + if (mask_16x16 & dual_one) { + if ((mask_16x16 & dual_one) == dual_one) { + vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; + vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr); + } + } + + if (mask_8x8 & dual_one) { + if ((mask_8x8 & dual_one) == dual_one) { + vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr, lfis[1]->mblim, + lfis[1]->lim, lfis[1]->hev_thr); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; + vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } + + if (mask_4x4 & dual_one) { + if ((mask_4x4 & dual_one) == dual_one) { + vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, + lfis[0]->hev_thr, lfis[1]->mblim, + lfis[1]->lim, lfis[1]->hev_thr); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; + vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } + + if (mask_4x4_int & dual_one) { + if ((mask_4x4_int & dual_one) == dual_one) { + vpx_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; + vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr); + } + } + } + + ss[0] += 8; + lfl += 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_filter_selectively_vert_row2(int subsampling_factor, + uint16_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl, int bd) { + const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; + const int lfl_forward = subsampling_factor ? 4 : 8; + const unsigned int dual_one = 1 | (1 << lfl_forward); + unsigned int mask; + uint16_t *ss[2]; + ss[0] = s; + + for (mask = + (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; + mask; mask = (mask & ~dual_one) >> 1) { + if (mask & dual_one) { + const loop_filter_thresh *lfis[2]; + lfis[0] = lfthr + *lfl; + lfis[1] = lfthr + *(lfl + lfl_forward); + ss[1] = ss[0] + 8 * pitch; + + if (mask_16x16 & dual_one) { + if ((mask_16x16 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, bd); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; + vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } + } + + if (mask_8x8 & dual_one) { + if ((mask_8x8 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; + vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } + } + + if (mask_4x4 & dual_one) { + if ((mask_4x4 & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; + vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } + } + + if (mask_4x4_int & dual_one) { + if ((mask_4x4_int & dual_one) == dual_one) { + vpx_highbd_lpf_vertical_4_dual(ss[0] + 4, pitch, lfis[0]->mblim, + lfis[0]->lim, lfis[0]->hev_thr, + lfis[1]->mblim, lfis[1]->lim, + lfis[1]->hev_thr, bd); + } else { + const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; + vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, + lfi->mblim, lfi->lim, lfi->hev_thr, bd); + } + } + } + + ss[0] += 8; + lfl += 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +static void filter_selectively_horiz(uint8_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl) { + unsigned int mask; + int count; + + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; + mask; mask >>= count) { + count = 1; + if (mask & 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + + if (mask_16x16 & 1) { + if ((mask_16x16 & 3) == 3) { + vpx_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + count = 2; + } else { + vpx_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } else if (mask_8x8 & 1) { + if ((mask_8x8 & 3) == 3) { + // Next block's thresholds. + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); + + vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr); + + if ((mask_4x4_int & 3) == 3) { + vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, lfin->mblim, + lfin->lim, lfin->hev_thr); + } else { + if (mask_4x4_int & 1) + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + else if (mask_4x4_int & 2) + vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr); + } + count = 2; + } else { + vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + + if (mask_4x4_int & 1) + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } else if (mask_4x4 & 1) { + if ((mask_4x4 & 3) == 3) { + // Next block's thresholds. + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); + + vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr); + if ((mask_4x4_int & 3) == 3) { + vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, lfin->mblim, + lfin->lim, lfin->hev_thr); + } else { + if (mask_4x4_int & 1) + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + else if (mask_4x4_int & 2) + vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr); + } + count = 2; + } else { + vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + + if (mask_4x4_int & 1) + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } else { + vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr); + } + } + s += 8 * count; + lfl += count; + mask_16x16 >>= count; + mask_8x8 >>= count; + mask_4x4 >>= count; + mask_4x4_int >>= count; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_filter_selectively_horiz(uint16_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl, int bd) { + unsigned int mask; + int count; + + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; + mask; mask >>= count) { + count = 1; + if (mask & 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + + if (mask_16x16 & 1) { + if ((mask_16x16 & 3) == 3) { + vpx_highbd_lpf_horizontal_edge_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + count = 2; + } else { + vpx_highbd_lpf_horizontal_edge_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + } + } else if (mask_8x8 & 1) { + if ((mask_8x8 & 3) == 3) { + // Next block's thresholds. + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); + + vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr, bd); + + if ((mask_4x4_int & 3) == 3) { + vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, + lfin->mblim, lfin->lim, + lfin->hev_thr, bd); + } else { + if (mask_4x4_int & 1) { + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } else if (mask_4x4_int & 2) { + vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr, bd); + } + } + count = 2; + } else { + vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + + if (mask_4x4_int & 1) { + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } + } + } else if (mask_4x4 & 1) { + if ((mask_4x4 & 3) == 3) { + // Next block's thresholds. + const loop_filter_thresh *lfin = lfthr + *(lfl + 1); + + vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, lfin->mblim, lfin->lim, + lfin->hev_thr, bd); + if ((mask_4x4_int & 3) == 3) { + vpx_highbd_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, + lfin->mblim, lfin->lim, + lfin->hev_thr, bd); + } else { + if (mask_4x4_int & 1) { + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } else if (mask_4x4_int & 2) { + vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, + lfin->lim, lfin->hev_thr, bd); + } + } + count = 2; + } else { + vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + + if (mask_4x4_int & 1) { + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, + lfi->lim, lfi->hev_thr, bd); + } + } + } else { + vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + } + } + s += 8 * count; + lfl += count; + mask_16x16 >>= count; + mask_8x8 >>= count; + mask_4x4 >>= count; + mask_4x4_int >>= count; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +// This function ors into the current lfm structure, where to do loop +// filters for the specific mi we are looking at. It uses information +// including the block_size_type (32x16, 32x32, etc.), the transform size, +// whether there were any coefficients encoded, and the loop filter strength +// block we are currently looking at. Shift is used to position the +// 1's we produce. +static void build_masks(const loop_filter_info_n *const lfi_n, + const MODE_INFO *mi, const int shift_y, + const int shift_uv, + LOOP_FILTER_MASK *lfm) { + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; + const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); + const int filter_level = get_filter_level(lfi_n, mi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; + uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; + uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; + int i; + + // If filter level is 0 we don't loop filter. + if (!filter_level) { + return; + } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; + int index = shift_y; + for (i = 0; i < h; i++) { + memset(&lfm->lfl_y[index], filter_level, w); + index += 8; + } + } + + // These set 1 in the current block size for the block size edges. + // For instance if the block size is 32x16, we'll set: + // above = 1111 + // 0000 + // and + // left = 1000 + // = 1000 + // NOTE : In this example the low bit is left most ( 1000 ) is stored as + // 1, not 8... + // + // U and V set things on a 16 bit scale. + // + *above_y |= above_prediction_mask[block_size] << shift_y; + *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; + *left_y |= left_prediction_mask[block_size] << shift_y; + *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; + + // If the block has no coefficients and is not intra we skip applying + // the loop filter on block edges. + if (mi->skip && is_inter_block(mi)) + return; + + // Here we are adding a mask for the transform size. The transform + // size mask is set to be correct for a 64x64 prediction block size. We + // mask to match the size of the block we are working on and then shift it + // into place.. + *above_y |= (size_mask[block_size] & + above_64x64_txform_mask[tx_size_y]) << shift_y; + *above_uv |= (size_mask_uv[block_size] & + above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + + *left_y |= (size_mask[block_size] & + left_64x64_txform_mask[tx_size_y]) << shift_y; + *left_uv |= (size_mask_uv[block_size] & + left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + + // Here we are trying to determine what to do with the internal 4x4 block + // boundaries. These differ from the 4x4 boundaries on the outside edge of + // an 8x8 in that the internal ones can be skipped and don't depend on + // the prediction block size. + if (tx_size_y == TX_4X4) + *int_4x4_y |= size_mask[block_size] << shift_y; + + if (tx_size_uv == TX_4X4) + *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; +} + +// This function does the same thing as the one above with the exception that +// it only affects the y masks. It exists because for blocks < 16x16 in size, +// we only update u and v masks on the first block. +static void build_y_mask(const loop_filter_info_n *const lfi_n, + const MODE_INFO *mi, const int shift_y, + LOOP_FILTER_MASK *lfm) { + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; + const int filter_level = get_filter_level(lfi_n, mi); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + int i; + + if (!filter_level) { + return; + } else { + const int w = num_8x8_blocks_wide_lookup[block_size]; + const int h = num_8x8_blocks_high_lookup[block_size]; + int index = shift_y; + for (i = 0; i < h; i++) { + memset(&lfm->lfl_y[index], filter_level, w); + index += 8; + } + } + + *above_y |= above_prediction_mask[block_size] << shift_y; + *left_y |= left_prediction_mask[block_size] << shift_y; + + if (mi->skip && is_inter_block(mi)) + return; + + *above_y |= (size_mask[block_size] & + above_64x64_txform_mask[tx_size_y]) << shift_y; + + *left_y |= (size_mask[block_size] & + left_64x64_txform_mask[tx_size_y]) << shift_y; + + if (tx_size_y == TX_4X4) + *int_4x4_y |= size_mask[block_size] << shift_y; +} + +void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, + const int mi_col, LOOP_FILTER_MASK *lfm) { + int i; + + // The largest loopfilter we have is 16x16 so we use the 16x16 mask + // for 32x32 transforms also. + lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32]; + lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32]; + lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32]; + lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32]; + + // We do at least 8 tap filter on every 32x32 even if the transform size + // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and + // remove it from the 4x4. + lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border; + lfm->left_y[TX_4X4] &= ~left_border; + lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border; + lfm->above_y[TX_4X4] &= ~above_border; + lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv; + lfm->left_uv[TX_4X4] &= ~left_border_uv; + lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv; + lfm->above_uv[TX_4X4] &= ~above_border_uv; + + // We do some special edge handling. + if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) { + const uint64_t rows = cm->mi_rows - mi_row; + + // Each pixel inside the border gets a 1, + const uint64_t mask_y = (((uint64_t) 1 << (rows << 3)) - 1); + const uint16_t mask_uv = (((uint16_t) 1 << (((rows + 1) >> 1) << 2)) - 1); + + // Remove values completely outside our border. + for (i = 0; i < TX_32X32; i++) { + lfm->left_y[i] &= mask_y; + lfm->above_y[i] &= mask_y; + lfm->left_uv[i] &= mask_uv; + lfm->above_uv[i] &= mask_uv; + } + lfm->int_4x4_y &= mask_y; + lfm->int_4x4_uv &= mask_uv; + + // We don't apply a wide loop filter on the last uv block row. If set + // apply the shorter one instead. + if (rows == 1) { + lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16]; + lfm->above_uv[TX_16X16] = 0; + } + if (rows == 5) { + lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00; + lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00); + } + } + + if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) { + const uint64_t columns = cm->mi_cols - mi_col; + + // Each pixel inside the border gets a 1, the multiply copies the border + // to where we need it. + const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL; + const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111; + + // Internal edges are not applied on the last column of the image so + // we mask 1 more for the internal edges + const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111; + + // Remove the bits outside the image edge. + for (i = 0; i < TX_32X32; i++) { + lfm->left_y[i] &= mask_y; + lfm->above_y[i] &= mask_y; + lfm->left_uv[i] &= mask_uv; + lfm->above_uv[i] &= mask_uv; + } + lfm->int_4x4_y &= mask_y; + lfm->int_4x4_uv &= mask_uv_int; + + // We don't apply a wide loop filter on the last uv column. If set + // apply the shorter one instead. + if (columns == 1) { + lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16]; + lfm->left_uv[TX_16X16] = 0; + } + if (columns == 5) { + lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc); + lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc); + } + } + // We don't apply a loop filter on the first column in the image, mask that + // out. + if (mi_col == 0) { + for (i = 0; i < TX_32X32; i++) { + lfm->left_y[i] &= 0xfefefefefefefefeULL; + lfm->left_uv[i] &= 0xeeee; + } + } + + // Assert if we try to apply 2 different loop filters at the same position. + assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); + assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); + assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); + assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); + assert(!(lfm->left_uv[TX_16X16]&lfm->left_uv[TX_8X8])); + assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); + assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); + assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); + assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); + assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); + assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); + assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); + assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); + assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); + assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); + assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); +} + +// This function sets up the bit masks for the entire 64x64 region represented +// by mi_row, mi_col. +void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, + MODE_INFO **mi, const int mode_info_stride, + LOOP_FILTER_MASK *lfm) { + int idx_32, idx_16, idx_8; + const loop_filter_info_n *const lfi_n = &cm->lf_info; + MODE_INFO **mip = mi; + MODE_INFO **mip2 = mi; + + // These are offsets to the next mi in the 64x64 block. It is what gets + // added to the mi ptr as we go through each loop. It helps us to avoid + // setting up special row and column counters for each index. The last step + // brings us out back to the starting position. + const int offset_32[] = {4, (mode_info_stride << 2) - 4, 4, + -(mode_info_stride << 2) - 4}; + const int offset_16[] = {2, (mode_info_stride << 1) - 2, 2, + -(mode_info_stride << 1) - 2}; + const int offset[] = {1, mode_info_stride - 1, 1, -mode_info_stride - 1}; + + // Following variables represent shifts to position the current block + // mask over the appropriate block. A shift of 36 to the left will move + // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left + // 4 rows to the appropriate spot. + const int shift_32_y[] = {0, 4, 32, 36}; + const int shift_16_y[] = {0, 2, 16, 18}; + const int shift_8_y[] = {0, 1, 8, 9}; + const int shift_32_uv[] = {0, 2, 8, 10}; + const int shift_16_uv[] = {0, 1, 4, 5}; + const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? + cm->mi_rows - mi_row : MI_BLOCK_SIZE); + const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? + cm->mi_cols - mi_col : MI_BLOCK_SIZE); + + vp9_zero(*lfm); + assert(mip[0] != NULL); + + switch (mip[0]->sb_type) { + case BLOCK_64X64: + build_masks(lfi_n, mip[0] , 0, 0, lfm); + break; + case BLOCK_64X32: + build_masks(lfi_n, mip[0], 0, 0, lfm); + mip2 = mip + mode_info_stride * 4; + if (4 >= max_rows) + break; + build_masks(lfi_n, mip2[0], 32, 8, lfm); + break; + case BLOCK_32X64: + build_masks(lfi_n, mip[0], 0, 0, lfm); + mip2 = mip + 4; + if (4 >= max_cols) + break; + build_masks(lfi_n, mip2[0], 4, 2, lfm); + break; + default: + for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { + const int shift_y = shift_32_y[idx_32]; + const int shift_uv = shift_32_uv[idx_32]; + const int mi_32_col_offset = ((idx_32 & 1) << 2); + const int mi_32_row_offset = ((idx_32 >> 1) << 2); + if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) + continue; + switch (mip[0]->sb_type) { + case BLOCK_32X32: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + break; + case BLOCK_32X16: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + if (mi_32_row_offset + 2 >= max_rows) + continue; + mip2 = mip + mode_info_stride * 2; + build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); + break; + case BLOCK_16X32: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + if (mi_32_col_offset + 2 >= max_cols) + continue; + mip2 = mip + 2; + build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); + break; + default: + for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { + const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; + const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; + const int mi_16_col_offset = mi_32_col_offset + + ((idx_16 & 1) << 1); + const int mi_16_row_offset = mi_32_row_offset + + ((idx_16 >> 1) << 1); + + if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) + continue; + + switch (mip[0]->sb_type) { + case BLOCK_16X16: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + break; + case BLOCK_16X8: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + if (mi_16_row_offset + 1 >= max_rows) + continue; + mip2 = mip + mode_info_stride; + build_y_mask(lfi_n, mip2[0], shift_y+8, lfm); + break; + case BLOCK_8X16: + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + if (mi_16_col_offset +1 >= max_cols) + continue; + mip2 = mip + 1; + build_y_mask(lfi_n, mip2[0], shift_y+1, lfm); + break; + default: { + const int shift_y = shift_32_y[idx_32] + + shift_16_y[idx_16] + + shift_8_y[0]; + build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); + mip += offset[0]; + for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { + const int shift_y = shift_32_y[idx_32] + + shift_16_y[idx_16] + + shift_8_y[idx_8]; + const int mi_8_col_offset = mi_16_col_offset + + ((idx_8 & 1)); + const int mi_8_row_offset = mi_16_row_offset + + ((idx_8 >> 1)); + + if (mi_8_col_offset >= max_cols || + mi_8_row_offset >= max_rows) + continue; + build_y_mask(lfi_n, mip[0], shift_y, lfm); + } + break; + } + } + } + break; + } + } + break; + } +} + +static void filter_selectively_vert(uint8_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl) { + unsigned int mask; + + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; + mask; mask >>= 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + + if (mask & 1) { + if (mask_16x16 & 1) { + vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + } else if (mask_8x8 & 1) { + vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + } else if (mask_4x4 & 1) { + vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + } + } + if (mask_4x4_int & 1) + vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); + s += 8; + lfl += 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; + } +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void highbd_filter_selectively_vert(uint16_t *s, int pitch, + unsigned int mask_16x16, + unsigned int mask_8x8, + unsigned int mask_4x4, + unsigned int mask_4x4_int, + const loop_filter_thresh *lfthr, + const uint8_t *lfl, int bd) { + unsigned int mask; + + for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; + mask; mask >>= 1) { + const loop_filter_thresh *lfi = lfthr + *lfl; + + if (mask & 1) { + if (mask_16x16 & 1) { + vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + } else if (mask_8x8 & 1) { + vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + } else if (mask_4x4 & 1) { + vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + } + } + if (mask_4x4_int & 1) + vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, + lfi->hev_thr, bd); + s += 8; + lfl += 1; + mask_16x16 >>= 1; + mask_8x8 >>= 1; + mask_4x4 >>= 1; + mask_4x4_int >>= 1; + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +void vp9_filter_block_plane_non420(VP9_COMMON *cm, + struct macroblockd_plane *plane, + MODE_INFO **mi_8x8, + int mi_row, int mi_col) { + const int ss_x = plane->subsampling_x; + const int ss_y = plane->subsampling_y; + const int row_step = 1 << ss_y; + const int col_step = 1 << ss_x; + const int row_step_stride = cm->mi_stride * row_step; + struct buf_2d *const dst = &plane->dst; + uint8_t* const dst0 = dst->buf; + unsigned int mask_16x16[MI_BLOCK_SIZE] = {0}; + unsigned int mask_8x8[MI_BLOCK_SIZE] = {0}; + unsigned int mask_4x4[MI_BLOCK_SIZE] = {0}; + unsigned int mask_4x4_int[MI_BLOCK_SIZE] = {0}; + uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; + int r, c; + + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { + unsigned int mask_16x16_c = 0; + unsigned int mask_8x8_c = 0; + unsigned int mask_4x4_c = 0; + unsigned int border_mask; + + // Determine the vertical edges that need filtering + for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { + const MODE_INFO *mi = mi_8x8[c]; + const BLOCK_SIZE sb_type = mi[0].sb_type; + const int skip_this = mi[0].skip && is_inter_block(mi); + // left edge of current unit is block/partition edge -> no skip + const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? + !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; + const int skip_this_c = skip_this && !block_edge_left; + // top edge of current unit is block/partition edge -> no skip + const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? + !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; + const int skip_this_r = skip_this && !block_edge_above; + const TX_SIZE tx_size = get_uv_tx_size(mi, plane); + const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; + const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; + + // Filter level can vary per MI + if (!(lfl[(r << 3) + (c >> ss_x)] = + get_filter_level(&cm->lf_info, mi))) + continue; + + // Build masks based on the transform size of each block + if (tx_size == TX_32X32) { + if (!skip_this_c && ((c >> ss_x) & 3) == 0) { + if (!skip_border_4x4_c) + mask_16x16_c |= 1 << (c >> ss_x); + else + mask_8x8_c |= 1 << (c >> ss_x); + } + if (!skip_this_r && ((r >> ss_y) & 3) == 0) { + if (!skip_border_4x4_r) + mask_16x16[r] |= 1 << (c >> ss_x); + else + mask_8x8[r] |= 1 << (c >> ss_x); + } + } else if (tx_size == TX_16X16) { + if (!skip_this_c && ((c >> ss_x) & 1) == 0) { + if (!skip_border_4x4_c) + mask_16x16_c |= 1 << (c >> ss_x); + else + mask_8x8_c |= 1 << (c >> ss_x); + } + if (!skip_this_r && ((r >> ss_y) & 1) == 0) { + if (!skip_border_4x4_r) + mask_16x16[r] |= 1 << (c >> ss_x); + else + mask_8x8[r] |= 1 << (c >> ss_x); + } + } else { + // force 8x8 filtering on 32x32 boundaries + if (!skip_this_c) { + if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0) + mask_8x8_c |= 1 << (c >> ss_x); + else + mask_4x4_c |= 1 << (c >> ss_x); + } + + if (!skip_this_r) { + if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0) + mask_8x8[r] |= 1 << (c >> ss_x); + else + mask_4x4[r] |= 1 << (c >> ss_x); + } + + if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c) + mask_4x4_int[r] |= 1 << (c >> ss_x); + } + } + + // Disable filtering on the leftmost column + border_mask = ~(mi_col == 0); +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + mask_16x16_c & border_mask, + mask_8x8_c & border_mask, + mask_4x4_c & border_mask, + mask_4x4_int[r], + cm->lf_info.lfthr, &lfl[r << 3], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_vert(dst->buf, dst->stride, + mask_16x16_c & border_mask, + mask_8x8_c & border_mask, + mask_4x4_c & border_mask, + mask_4x4_int[r], + cm->lf_info.lfthr, &lfl[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + dst->buf += 8 * dst->stride; + mi_8x8 += row_step_stride; + } + + // Now do horizontal pass + dst->buf = dst0; + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { + const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; + const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; + + unsigned int mask_16x16_r; + unsigned int mask_8x8_r; + unsigned int mask_4x4_r; + + if (mi_row + r == 0) { + mask_16x16_r = 0; + mask_8x8_r = 0; + mask_4x4_r = 0; + } else { + mask_16x16_r = mask_16x16[r]; + mask_8x8_r = mask_8x8[r]; + mask_4x4_r = mask_4x4[r]; + } +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + mask_16x16_r, + mask_8x8_r, + mask_4x4_r, + mask_4x4_int_r, + cm->lf_info.lfthr, &lfl[r << 3], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_horiz(dst->buf, dst->stride, + mask_16x16_r, + mask_8x8_r, + mask_4x4_r, + mask_4x4_int_r, + cm->lf_info.lfthr, &lfl[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + dst->buf += 8 * dst->stride; + } +} + +void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm) { + struct buf_2d *const dst = &plane->dst; + uint8_t *const dst0 = dst->buf; + int r; + uint64_t mask_16x16 = lfm->left_y[TX_16X16]; + uint64_t mask_8x8 = lfm->left_y[TX_8X8]; + uint64_t mask_4x4 = lfm->left_y[TX_4X4]; + uint64_t mask_4x4_int = lfm->int_4x4_y; + + assert(plane->subsampling_x == 0 && plane->subsampling_y == 0); + + // Vertical pass: do 2 rows at one time + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + // Disable filtering on the leftmost column. +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert_row2(plane->subsampling_x, + CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, + &lfm->lfl_y[r << 3], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + dst->buf += 16 * dst->stride; + mask_16x16 >>= 16; + mask_8x8 >>= 16; + mask_4x4 >>= 16; + mask_4x4_int >>= 16; + } + + // Horizontal pass + dst->buf = dst0; + mask_16x16 = lfm->above_y[TX_16X16]; + mask_8x8 = lfm->above_y[TX_8X8]; + mask_4x4 = lfm->above_y[TX_4X4]; + mask_4x4_int = lfm->int_4x4_y; + + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { + unsigned int mask_16x16_r; + unsigned int mask_8x8_r; + unsigned int mask_4x4_r; + + if (mi_row + r == 0) { + mask_16x16_r = 0; + mask_8x8_r = 0; + mask_4x4_r = 0; + } else { + mask_16x16_r = mask_16x16 & 0xff; + mask_8x8_r = mask_8x8 & 0xff; + mask_4x4_r = mask_4x4 & 0xff; + } + +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int & 0xff, + cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + dst->buf += 8 * dst->stride; + mask_16x16 >>= 8; + mask_8x8 >>= 8; + mask_4x4 >>= 8; + mask_4x4_int >>= 8; + } +} + +void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm) { + struct buf_2d *const dst = &plane->dst; + uint8_t *const dst0 = dst->buf; + int r, c; + uint8_t lfl_uv[16]; + + uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; + uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; + uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; + uint16_t mask_4x4_int = lfm->int_4x4_uv; + + assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); + + // Vertical pass: do 2 rows at one time + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { + for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { + lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; + lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; + } + + // Disable filtering on the leftmost column. +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_vert_row2(plane->subsampling_x, + CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfl_uv[r << 1], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_vert_row2(plane->subsampling_x, dst->buf, dst->stride, + (unsigned int)mask_16x16, + (unsigned int)mask_8x8, + (unsigned int)mask_4x4, + (unsigned int)mask_4x4_int, + cm->lf_info.lfthr, &lfl_uv[r << 1]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + dst->buf += 16 * dst->stride; + mask_16x16 >>= 8; + mask_8x8 >>= 8; + mask_4x4 >>= 8; + mask_4x4_int >>= 8; + } + + // Horizontal pass + dst->buf = dst0; + mask_16x16 = lfm->above_uv[TX_16X16]; + mask_8x8 = lfm->above_uv[TX_8X8]; + mask_4x4 = lfm->above_uv[TX_4X4]; + mask_4x4_int = lfm->int_4x4_uv; + + for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { + const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; + const unsigned int mask_4x4_int_r = + skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf); + unsigned int mask_16x16_r; + unsigned int mask_8x8_r; + unsigned int mask_4x4_r; + + if (mi_row + r == 0) { + mask_16x16_r = 0; + mask_8x8_r = 0; + mask_4x4_r = 0; + } else { + mask_16x16_r = mask_16x16 & 0xf; + mask_8x8_r = mask_8x8 & 0xf; + mask_4x4_r = mask_4x4 & 0xf; + } + +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->use_highbitdepth) { + highbd_filter_selectively_horiz(CONVERT_TO_SHORTPTR(dst->buf), + dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, + cm->lf_info.lfthr, &lfl_uv[r << 1], + (int)cm->bit_depth); + } else { +#endif // CONFIG_VP9_HIGHBITDEPTH + filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, + mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, + &lfl_uv[r << 1]); +#if CONFIG_VP9_HIGHBITDEPTH + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + dst->buf += 8 * dst->stride; + mask_16x16 >>= 4; + mask_8x8 >>= 4; + mask_4x4 >>= 4; + mask_4x4_int >>= 4; + } +} + +static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int start, int stop, int y_only) { + const int num_planes = y_only ? 1 : MAX_MB_PLANE; + enum lf_path path; + int mi_row, mi_col; + + if (y_only) + path = LF_PATH_444; + else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) + path = LF_PATH_420; + else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) + path = LF_PATH_444; + else + path = LF_PATH_SLOW; + + for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { + MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; + LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); + + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { + int plane; + + vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); + + // TODO(jimbankoski): For 444 only need to do y mask. + vp9_adjust_mask(cm, mi_row, mi_col, lfm); + + vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); + for (plane = 1; plane < num_planes; ++plane) { + switch (path) { + case LF_PATH_420: + vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); + break; + case LF_PATH_444: + vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); + break; + case LF_PATH_SLOW: + vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, + mi_row, mi_col); + break; + } + } + } + } +} + +void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, + VP9_COMMON *cm, MACROBLOCKD *xd, + int frame_filter_level, + int y_only, int partial_frame) { + int start_mi_row, end_mi_row, mi_rows_to_filter; + if (!frame_filter_level) return; + start_mi_row = 0; + mi_rows_to_filter = cm->mi_rows; + if (partial_frame && cm->mi_rows > 8) { + start_mi_row = cm->mi_rows >> 1; + start_mi_row &= 0xfffffff8; + mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); + } + end_mi_row = start_mi_row + mi_rows_to_filter; + loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only); +} + +// Used by the encoder to build the loopfilter masks. +// TODO(slavarnway): Do the encoder the same way the decoder does it and +// build the masks in line as part of the encode process. +void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level, + int partial_frame) { + int start_mi_row, end_mi_row, mi_rows_to_filter; + int mi_col, mi_row; + if (!frame_filter_level) return; + start_mi_row = 0; + mi_rows_to_filter = cm->mi_rows; + if (partial_frame && cm->mi_rows > 8) { + start_mi_row = cm->mi_rows >> 1; + start_mi_row &= 0xfffffff8; + mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); + } + end_mi_row = start_mi_row + mi_rows_to_filter; + + vp9_loop_filter_frame_init(cm, frame_filter_level); + + for (mi_row = start_mi_row; mi_row < end_mi_row; mi_row += MI_BLOCK_SIZE) { + MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { + // vp9_setup_mask() zeros lfm + vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, + get_lfm(&cm->lf, mi_row, mi_col)); + } + } +} + +// 8x8 blocks in a superblock. A "1" represents the first block in a 16x16 +// or greater area. +static const uint8_t first_block_in_16x16[8][8] = { + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0}, + {1, 0, 1, 0, 1, 0, 1, 0}, + {0, 0, 0, 0, 0, 0, 0, 0} +}; + +// This function sets up the bit masks for a block represented +// by mi_row, mi_col in a 64x64 region. +// TODO(SJL): This function only works for yv12. +void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row, + int mi_col, int bw, int bh) { + const BLOCK_SIZE block_size = mi->sb_type; + const TX_SIZE tx_size_y = mi->tx_size; + const loop_filter_info_n *const lfi_n = &cm->lf_info; + const int filter_level = get_filter_level(lfi_n, mi); + const TX_SIZE tx_size_uv = get_uv_tx_size_impl(tx_size_y, block_size, 1, 1); + LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col); + uint64_t *const left_y = &lfm->left_y[tx_size_y]; + uint64_t *const above_y = &lfm->above_y[tx_size_y]; + uint64_t *const int_4x4_y = &lfm->int_4x4_y; + uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; + uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; + uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; + const int row_in_sb = (mi_row & 7); + const int col_in_sb = (mi_col & 7); + const int shift_y = col_in_sb + (row_in_sb << 3); + const int shift_uv = (col_in_sb >> 1) + ((row_in_sb >> 1) << 2); + const int build_uv = first_block_in_16x16[row_in_sb][col_in_sb]; + + if (!filter_level) { + return; + } else { + int index = shift_y; + int i; + for (i = 0; i < bh; i++) { + memset(&lfm->lfl_y[index], filter_level, bw); + index += 8; + } + } + + // These set 1 in the current block size for the block size edges. + // For instance if the block size is 32x16, we'll set: + // above = 1111 + // 0000 + // and + // left = 1000 + // = 1000 + // NOTE : In this example the low bit is left most ( 1000 ) is stored as + // 1, not 8... + // + // U and V set things on a 16 bit scale. + // + *above_y |= above_prediction_mask[block_size] << shift_y; + *left_y |= left_prediction_mask[block_size] << shift_y; + + if (build_uv) { + *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; + *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; + } + + // If the block has no coefficients and is not intra we skip applying + // the loop filter on block edges. + if (mi->skip && is_inter_block(mi)) + return; + + // Add a mask for the transform size. The transform size mask is set to + // be correct for a 64x64 prediction block size. Mask to match the size of + // the block we are working on and then shift it into place. + *above_y |= (size_mask[block_size] & + above_64x64_txform_mask[tx_size_y]) << shift_y; + *left_y |= (size_mask[block_size] & + left_64x64_txform_mask[tx_size_y]) << shift_y; + + if (build_uv) { + *above_uv |= (size_mask_uv[block_size] & + above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + + *left_uv |= (size_mask_uv[block_size] & + left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; + } + + // Try to determine what to do with the internal 4x4 block boundaries. These + // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the + // internal ones can be skipped and don't depend on the prediction block size. + if (tx_size_y == TX_4X4) + *int_4x4_y |= size_mask[block_size] << shift_y; + + if (build_uv && tx_size_uv == TX_4X4) + *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; +} + +void vp9_loop_filter_data_reset( + LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, + struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) { + lf_data->frame_buffer = frame_buffer; + lf_data->cm = cm; + lf_data->start = 0; + lf_data->stop = 0; + lf_data->y_only = 0; + memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); +} + +void vp9_reset_lfm(VP9_COMMON *const cm) { + if (cm->lf.filter_level) { + memset(cm->lf.lfm, 0, + ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride * + sizeof(*cm->lf.lfm)); + } +} + +int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused) { + (void)unused; + loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, + lf_data->start, lf_data->stop, lf_data->y_only); + return 1; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_loopfilter.h b/thirdparty/libvpx/vp9/common/vp9_loopfilter.h new file mode 100644 index 0000000000..fca8830fa1 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_loopfilter.h @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_LOOPFILTER_H_ +#define VP9_COMMON_VP9_LOOPFILTER_H_ + +#include "vpx_ports/mem.h" +#include "./vpx_config.h" + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_seg_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_LOOP_FILTER 63 +#define MAX_SHARPNESS 7 + +#define SIMD_WIDTH 16 + +#define MAX_REF_LF_DELTAS 4 +#define MAX_MODE_LF_DELTAS 2 + +enum lf_path { + LF_PATH_420, + LF_PATH_444, + LF_PATH_SLOW, +}; + +// Need to align this structure so when it is declared and +// passed it can be loaded into vector registers. +typedef struct { + DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]); + DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]); + DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]); +} loop_filter_thresh; + +typedef struct { + loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; + uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; +} loop_filter_info_n; + +// This structure holds bit masks for all 8x8 blocks in a 64x64 region. +// Each 1 bit represents a position in which we want to apply the loop filter. +// Left_ entries refer to whether we apply a filter on the border to the +// left of the block. Above_ entries refer to whether or not to apply a +// filter on the above border. Int_ entries refer to whether or not to +// apply borders on the 4x4 edges within the 8x8 block that each bit +// represents. +// Since each transform is accompanied by a potentially different type of +// loop filter there is a different entry in the array for each transform size. +typedef struct { + uint64_t left_y[TX_SIZES]; + uint64_t above_y[TX_SIZES]; + uint64_t int_4x4_y; + uint16_t left_uv[TX_SIZES]; + uint16_t above_uv[TX_SIZES]; + uint16_t int_4x4_uv; + uint8_t lfl_y[64]; +} LOOP_FILTER_MASK; + +struct loopfilter { + int filter_level; + int last_filt_level; + + int sharpness_level; + int last_sharpness_level; + + uint8_t mode_ref_delta_enabled; + uint8_t mode_ref_delta_update; + + // 0 = Intra, Last, GF, ARF + signed char ref_deltas[MAX_REF_LF_DELTAS]; + signed char last_ref_deltas[MAX_REF_LF_DELTAS]; + + // 0 = ZERO_MV, MV + signed char mode_deltas[MAX_MODE_LF_DELTAS]; + signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; + + LOOP_FILTER_MASK *lfm; + int lfm_stride; +}; + +/* assorted loopfilter functions which get used elsewhere */ +struct VP9Common; +struct macroblockd; +struct VP9LfSyncData; + +// This function sets up the bit masks for the entire 64x64 region represented +// by mi_row, mi_col. +void vp9_setup_mask(struct VP9Common *const cm, + const int mi_row, const int mi_col, + MODE_INFO **mi_8x8, const int mode_info_stride, + LOOP_FILTER_MASK *lfm); + +void vp9_filter_block_plane_ss00(struct VP9Common *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm); + +void vp9_filter_block_plane_ss11(struct VP9Common *const cm, + struct macroblockd_plane *const plane, + int mi_row, + LOOP_FILTER_MASK *lfm); + +void vp9_filter_block_plane_non420(struct VP9Common *cm, + struct macroblockd_plane *plane, + MODE_INFO **mi_8x8, + int mi_row, int mi_col); + +void vp9_loop_filter_init(struct VP9Common *cm); + +// Update the loop filter for the current frame. +// This should be called before vp9_loop_filter_frame(), vp9_build_mask_frame() +// calls this function directly. +void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl); + +void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, + struct VP9Common *cm, + struct macroblockd *mbd, + int filter_level, + int y_only, int partial_frame); + +// Get the superblock lfm for a given mi_row, mi_col. +static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf, + const int mi_row, const int mi_col) { + return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)]; +} + +void vp9_build_mask(struct VP9Common *cm, const MODE_INFO *mi, int mi_row, + int mi_col, int bw, int bh); +void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row, + const int mi_col, LOOP_FILTER_MASK *lfm); +void vp9_build_mask_frame(struct VP9Common *cm, int frame_filter_level, + int partial_frame); +void vp9_reset_lfm(struct VP9Common *const cm); + +typedef struct LoopFilterWorkerData { + YV12_BUFFER_CONFIG *frame_buffer; + struct VP9Common *cm; + struct macroblockd_plane planes[MAX_MB_PLANE]; + + int start; + int stop; + int y_only; +} LFWorkerData; + +void vp9_loop_filter_data_reset( + LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, + struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]); + +// Operates on the rows described by 'lf_data'. +int vp9_loop_filter_worker(LFWorkerData *const lf_data, void *unused); +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_LOOPFILTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_mv.h b/thirdparty/libvpx/vp9/common/vp9_mv.h new file mode 100644 index 0000000000..5d89da8c25 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_mv.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_MV_H_ +#define VP9_COMMON_VP9_MV_H_ + +#include "vpx/vpx_integer.h" + +#include "vp9/common/vp9_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct mv { + int16_t row; + int16_t col; +} MV; + +typedef union int_mv { + uint32_t as_int; + MV as_mv; +} int_mv; /* facilitates faster equality tests and copies */ + +typedef struct mv32 { + int32_t row; + int32_t col; +} MV32; + +static INLINE int is_zero_mv(const MV *mv) { + return *((const uint32_t *)mv) == 0; +} + +static INLINE int is_equal_mv(const MV *a, const MV *b) { + return *((const uint32_t *)a) == *((const uint32_t *)b); +} + +static INLINE void clamp_mv(MV *mv, int min_col, int max_col, + int min_row, int max_row) { + mv->col = clamp(mv->col, min_col, max_col); + mv->row = clamp(mv->row, min_row, max_row); +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_MV_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_mvref_common.c b/thirdparty/libvpx/vp9/common/vp9_mvref_common.c new file mode 100644 index 0000000000..0eb01a51ba --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_mvref_common.c @@ -0,0 +1,201 @@ + +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_mvref_common.h" + +// This function searches the neighborhood of a given MB/SB +// to try and find candidate reference vectors. +static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, + int block, int mi_row, int mi_col, + uint8_t *mode_context) { + const int *ref_sign_bias = cm->ref_frame_sign_bias; + int i, refmv_count = 0; + const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type]; + int different_ref_found = 0; + int context_counter = 0; + const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? + cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; + const TileInfo *const tile = &xd->tile; + + // Blank the reference vector list + memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); + + // The nearest 2 blocks are treated differently + // if the size < 8x8 we get the mv from the bmi substructure, + // and we also need to keep a mode count. + for (i = 0; i < 2; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * + xd->mi_stride]; + // Keep counts for entropy encoding. + context_counter += mode_2_counter[candidate_mi->mode]; + different_ref_found = 1; + + if (candidate_mi->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), + refmv_count, mv_ref_list, Done); + else if (candidate_mi->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), + refmv_count, mv_ref_list, Done); + } + } + + // Check the rest of the neighbors in much the same way + // as before except we don't need to keep track of sub blocks or + // mode counts. + for (; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + different_ref_found = 1; + + if (candidate_mi->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done); + else if (candidate_mi->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST(candidate_mi->mv[1], refmv_count, mv_ref_list, Done); + } + } + + // Check the last frame's mode and mv info. + if (cm->use_prev_frame_mvs) { + if (prev_frame_mvs->ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); + } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { + ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); + } + } + + // Since we couldn't find 2 mvs from the same reference frame + // go back through the neighbors and find motion vectors from + // different reference frames. + if (different_ref_found) { + for (i = 0; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + + // If the candidate is INTRA we don't want to consider its mv. + IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias, + refmv_count, mv_ref_list, Done); + } + } + } + + // Since we still don't have a candidate we'll try the last frame. + if (cm->use_prev_frame_mvs) { + if (prev_frame_mvs->ref_frame[0] != ref_frame && + prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { + int_mv mv = prev_frame_mvs->mv[0]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); + } + + if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && + prev_frame_mvs->ref_frame[1] != ref_frame && + prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { + int_mv mv = prev_frame_mvs->mv[1]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); + } + } + + Done: + + mode_context[ref_frame] = counter_to_context[context_counter]; + + // Clamp vectors + for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) + clamp_mv_ref(&mv_ref_list[i].as_mv, xd); +} + +void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, + int mi_row, int mi_col, + uint8_t *mode_context) { + find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, + mi_row, mi_col, mode_context); +} + +void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, + int_mv *mvlist, int_mv *nearest_mv, + int_mv *near_mv) { + int i; + // Make sure all the candidates are properly clamped etc + for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { + lower_mv_precision(&mvlist[i].as_mv, allow_hp); + clamp_mv2(&mvlist[i].as_mv, xd); + } + *nearest_mv = mvlist[0]; + *near_mv = mvlist[1]; +} + +void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, + int block, int ref, int mi_row, int mi_col, + int_mv *nearest_mv, int_mv *near_mv, + uint8_t *mode_context) { + int_mv mv_list[MAX_MV_REF_CANDIDATES]; + MODE_INFO *const mi = xd->mi[0]; + b_mode_info *bmi = mi->bmi; + int n; + + assert(MAX_MV_REF_CANDIDATES == 2); + + find_mv_refs_idx(cm, xd, mi, mi->ref_frame[ref], mv_list, block, + mi_row, mi_col, mode_context); + + near_mv->as_int = 0; + switch (block) { + case 0: + nearest_mv->as_int = mv_list[0].as_int; + near_mv->as_int = mv_list[1].as_int; + break; + case 1: + case 2: + nearest_mv->as_int = bmi[0].as_mv[ref].as_int; + for (n = 0; n < MAX_MV_REF_CANDIDATES; ++n) + if (nearest_mv->as_int != mv_list[n].as_int) { + near_mv->as_int = mv_list[n].as_int; + break; + } + break; + case 3: { + int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; + candidates[0] = bmi[1].as_mv[ref]; + candidates[1] = bmi[0].as_mv[ref]; + candidates[2] = mv_list[0]; + candidates[3] = mv_list[1]; + + nearest_mv->as_int = bmi[2].as_mv[ref].as_int; + for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) + if (nearest_mv->as_int != candidates[n].as_int) { + near_mv->as_int = candidates[n].as_int; + break; + } + break; + } + default: + assert(0 && "Invalid block index."); + } +} diff --git a/thirdparty/libvpx/vp9/common/vp9_mvref_common.h b/thirdparty/libvpx/vp9/common/vp9_mvref_common.h new file mode 100644 index 0000000000..4380843e24 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_mvref_common.h @@ -0,0 +1,241 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VP9_COMMON_VP9_MVREF_COMMON_H_ +#define VP9_COMMON_VP9_MVREF_COMMON_H_ + +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3) +#define RIGHT_BOTTOM_MARGIN ((VP9_ENC_BORDER_IN_PIXELS -\ + VP9_INTERP_EXTEND) << 3) + +#define MVREF_NEIGHBOURS 8 + +typedef struct position { + int row; + int col; +} POSITION; + +typedef enum { + BOTH_ZERO = 0, + ZERO_PLUS_PREDICTED = 1, + BOTH_PREDICTED = 2, + NEW_PLUS_NON_INTRA = 3, + BOTH_NEW = 4, + INTRA_PLUS_NON_INTRA = 5, + BOTH_INTRA = 6, + INVALID_CASE = 9 +} motion_vector_context; + +// This is used to figure out a context for the ref blocks. The code flattens +// an array that would have 3 possible counts (0, 1 & 2) for 3 choices by +// adding 9 for each intra block, 3 for each zero mv and 1 for each new +// motion vector. This single number is then converted into a context +// with a single lookup ( counter_to_context ). +static const int mode_2_counter[MB_MODE_COUNT] = { + 9, // DC_PRED + 9, // V_PRED + 9, // H_PRED + 9, // D45_PRED + 9, // D135_PRED + 9, // D117_PRED + 9, // D153_PRED + 9, // D207_PRED + 9, // D63_PRED + 9, // TM_PRED + 0, // NEARESTMV + 0, // NEARMV + 3, // ZEROMV + 1, // NEWMV +}; + +// There are 3^3 different combinations of 3 counts that can be either 0,1 or +// 2. However the actual count can never be greater than 2 so the highest +// counter we need is 18. 9 is an invalid counter that's never used. +static const int counter_to_context[19] = { + BOTH_PREDICTED, // 0 + NEW_PLUS_NON_INTRA, // 1 + BOTH_NEW, // 2 + ZERO_PLUS_PREDICTED, // 3 + NEW_PLUS_NON_INTRA, // 4 + INVALID_CASE, // 5 + BOTH_ZERO, // 6 + INVALID_CASE, // 7 + INVALID_CASE, // 8 + INTRA_PLUS_NON_INTRA, // 9 + INTRA_PLUS_NON_INTRA, // 10 + INVALID_CASE, // 11 + INTRA_PLUS_NON_INTRA, // 12 + INVALID_CASE, // 13 + INVALID_CASE, // 14 + INVALID_CASE, // 15 + INVALID_CASE, // 16 + INVALID_CASE, // 17 + BOTH_INTRA // 18 +}; + +static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = { + // 4X4 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 4X8 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 8X4 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 8X8 + {{-1, 0}, {0, -1}, {-1, -1}, {-2, 0}, {0, -2}, {-2, -1}, {-1, -2}, {-2, -2}}, + // 8X16 + {{0, -1}, {-1, 0}, {1, -1}, {-1, -1}, {0, -2}, {-2, 0}, {-2, -1}, {-1, -2}}, + // 16X8 + {{-1, 0}, {0, -1}, {-1, 1}, {-1, -1}, {-2, 0}, {0, -2}, {-1, -2}, {-2, -1}}, + // 16X16 + {{-1, 0}, {0, -1}, {-1, 1}, {1, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + // 16X32 + {{0, -1}, {-1, 0}, {2, -1}, {-1, -1}, {-1, 1}, {0, -3}, {-3, 0}, {-3, -3}}, + // 32X16 + {{-1, 0}, {0, -1}, {-1, 2}, {-1, -1}, {1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + // 32X32 + {{-1, 1}, {1, -1}, {-1, 2}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-3, -3}}, + // 32X64 + {{0, -1}, {-1, 0}, {4, -1}, {-1, 2}, {-1, -1}, {0, -3}, {-3, 0}, {2, -1}}, + // 64X32 + {{-1, 0}, {0, -1}, {-1, 4}, {2, -1}, {-1, -1}, {-3, 0}, {0, -3}, {-1, 2}}, + // 64X64 + {{-1, 3}, {3, -1}, {-1, 4}, {4, -1}, {-1, -1}, {-1, 0}, {0, -1}, {-1, 6}} +}; + +static const int idx_n_column_to_subblock[4][2] = { + {1, 2}, + {1, 3}, + {3, 2}, + {3, 3} +}; + +// clamp_mv_ref +#define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units + +static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) { + clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER, + xd->mb_to_right_edge + MV_BORDER, + xd->mb_to_top_edge - MV_BORDER, + xd->mb_to_bottom_edge + MV_BORDER); +} + +// This function returns either the appropriate sub block or block's mv +// on whether the block_size < 8x8 and we have check_sub_blocks set. +static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv, + int search_col, int block_idx) { + return block_idx >= 0 && candidate->sb_type < BLOCK_8X8 + ? candidate->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] + .as_mv[which_mv] + : candidate->mv[which_mv]; +} + + +// Performs mv sign inversion if indicated by the reference frame combination. +static INLINE int_mv scale_mv(const MODE_INFO *mi, int ref, + const MV_REFERENCE_FRAME this_ref_frame, + const int *ref_sign_bias) { + int_mv mv = mi->mv[ref]; + if (ref_sign_bias[mi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + return mv; +} + +// This macro is used to add a motion vector mv_ref list if it isn't +// already in the list. If it's the second motion vector it will also +// skip all additional processing and jump to Done! +#define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \ + do { \ + if (refmv_count) { \ + if ((mv).as_int != (mv_ref_list)[0].as_int) { \ + (mv_ref_list)[(refmv_count)] = (mv); \ + goto Done; \ + } \ + } else { \ + (mv_ref_list)[(refmv_count)++] = (mv); \ + } \ + } while (0) + +// If either reference frame is different, not INTRA, and they +// are different from each other scale and add the mv to our list. +#define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \ + mv_ref_list, Done) \ + do { \ + if (is_inter_block(mbmi)) { \ + if ((mbmi)->ref_frame[0] != ref_frame) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, Done); \ + if (has_second_ref(mbmi) && \ + (mbmi)->ref_frame[1] != ref_frame && \ + (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ + ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, Done); \ + } \ + } while (0) + + +// Checks that the given mi_row, mi_col and search point +// are inside the borders of the tile. +static INLINE int is_inside(const TileInfo *const tile, + int mi_col, int mi_row, int mi_rows, + const POSITION *mi_pos) { + return !(mi_row + mi_pos->row < 0 || + mi_col + mi_pos->col < tile->mi_col_start || + mi_row + mi_pos->row >= mi_rows || + mi_col + mi_pos->col >= tile->mi_col_end); +} + +// TODO(jingning): this mv clamping function should be block size dependent. +static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { + clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN, + xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, + xd->mb_to_top_edge - LEFT_TOP_MARGIN, + xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); +} + +static INLINE void lower_mv_precision(MV *mv, int allow_hp) { + const int use_hp = allow_hp && use_mv_hp(mv); + if (!use_hp) { + if (mv->row & 1) + mv->row += (mv->row > 0 ? -1 : 1); + if (mv->col & 1) + mv->col += (mv->col > 0 ? -1 : 1); + } +} + +typedef void (*find_mv_refs_sync)(void *const data, int mi_row); +void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, + MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, + int_mv *mv_ref_list, int mi_row, int mi_col, + uint8_t *mode_context); + +// check a list of motion vectors by sad score using a number rows of pixels +// above and a number cols of pixels in the left to select the one with best +// score to use as ref motion vector +void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, + int_mv *mvlist, int_mv *nearest_mv, int_mv *near_mv); + +void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, + int block, int ref, int mi_row, int mi_col, + int_mv *nearest_mv, int_mv *near_mv, + uint8_t *mode_context); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_MVREF_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h b/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h new file mode 100644 index 0000000000..3fd935e628 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_onyxc_int.h @@ -0,0 +1,446 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_ONYXC_INT_H_ +#define VP9_COMMON_VP9_ONYXC_INT_H_ + +#include "./vpx_config.h" +#include "vpx/internal/vpx_codec_internal.h" +#include "vpx_util/vpx_thread.h" +#include "./vp9_rtcd.h" +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/common/vp9_loopfilter.h" +#include "vp9/common/vp9_entropymv.h" +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_frame_buffers.h" +#include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_tile_common.h" + +#if CONFIG_VP9_POSTPROC +#include "vp9/common/vp9_postproc.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define REFS_PER_FRAME 3 + +#define REF_FRAMES_LOG2 3 +#define REF_FRAMES (1 << REF_FRAMES_LOG2) + +// 4 scratch frames for the new frames to support a maximum of 4 cores decoding +// in parallel, 3 for scaled references on the encoder. +// TODO(hkuang): Add ondemand frame buffers instead of hardcoding the number +// of framebuffers. +// TODO(jkoleszar): These 3 extra references could probably come from the +// normal reference pool. +#define FRAME_BUFFERS (REF_FRAMES + 7) + +#define FRAME_CONTEXTS_LOG2 2 +#define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) + +#define NUM_PING_PONG_BUFFERS 2 + +extern const struct { + PARTITION_CONTEXT above; + PARTITION_CONTEXT left; +} partition_context_lookup[BLOCK_SIZES]; + + +typedef enum { + SINGLE_REFERENCE = 0, + COMPOUND_REFERENCE = 1, + REFERENCE_MODE_SELECT = 2, + REFERENCE_MODES = 3, +} REFERENCE_MODE; + +typedef struct { + int_mv mv[2]; + MV_REFERENCE_FRAME ref_frame[2]; +} MV_REF; + +typedef struct { + int ref_count; + MV_REF *mvs; + int mi_rows; + int mi_cols; + vpx_codec_frame_buffer_t raw_frame_buffer; + YV12_BUFFER_CONFIG buf; + + // The Following variables will only be used in frame parallel decode. + + // frame_worker_owner indicates which FrameWorker owns this buffer. NULL means + // that no FrameWorker owns, or is decoding, this buffer. + VPxWorker *frame_worker_owner; + + // row and col indicate which position frame has been decoded to in real + // pixel unit. They are reset to -1 when decoding begins and set to INT_MAX + // when the frame is fully decoded. + int row; + int col; +} RefCntBuffer; + +typedef struct BufferPool { + // Protect BufferPool from being accessed by several FrameWorkers at + // the same time during frame parallel decode. + // TODO(hkuang): Try to use atomic variable instead of locking the whole pool. +#if CONFIG_MULTITHREAD + pthread_mutex_t pool_mutex; +#endif + + // Private data associated with the frame buffer callbacks. + void *cb_priv; + + vpx_get_frame_buffer_cb_fn_t get_fb_cb; + vpx_release_frame_buffer_cb_fn_t release_fb_cb; + + RefCntBuffer frame_bufs[FRAME_BUFFERS]; + + // Frame buffers allocated internally by the codec. + InternalFrameBufferList int_frame_buffers; +} BufferPool; + +typedef struct VP9Common { + struct vpx_internal_error_info error; + vpx_color_space_t color_space; + vpx_color_range_t color_range; + int width; + int height; + int render_width; + int render_height; + int last_width; + int last_height; + + // TODO(jkoleszar): this implies chroma ss right now, but could vary per + // plane. Revisit as part of the future change to YV12_BUFFER_CONFIG to + // support additional planes. + int subsampling_x; + int subsampling_y; + +#if CONFIG_VP9_HIGHBITDEPTH + int use_highbitdepth; // Marks if we need to use 16bit frame buffers. +#endif + + YV12_BUFFER_CONFIG *frame_to_show; + RefCntBuffer *prev_frame; + + // TODO(hkuang): Combine this with cur_buf in macroblockd. + RefCntBuffer *cur_frame; + + int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */ + + // Prepare ref_frame_map for the next frame. + // Only used in frame parallel decode. + int next_ref_frame_map[REF_FRAMES]; + + // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and + // roll new_fb_idx into it. + + // Each frame can reference REFS_PER_FRAME buffers + RefBuffer frame_refs[REFS_PER_FRAME]; + + int new_fb_idx; + +#if CONFIG_VP9_POSTPROC + YV12_BUFFER_CONFIG post_proc_buffer; + YV12_BUFFER_CONFIG post_proc_buffer_int; +#endif + + FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ + FRAME_TYPE frame_type; + + int show_frame; + int last_show_frame; + int show_existing_frame; + + // Flag signaling that the frame is encoded using only INTRA modes. + uint8_t intra_only; + uint8_t last_intra_only; + + int allow_high_precision_mv; + + // Flag signaling that the frame context should be reset to default values. + // 0 or 1 implies don't reset, 2 reset just the context specified in the + // frame header, 3 reset all contexts. + int reset_frame_context; + + // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in + // MODE_INFO (8-pixel) units. + int MBs; + int mb_rows, mi_rows; + int mb_cols, mi_cols; + int mi_stride; + + /* profile settings */ + TX_MODE tx_mode; + + int base_qindex; + int y_dc_delta_q; + int uv_dc_delta_q; + int uv_ac_delta_q; + int16_t y_dequant[MAX_SEGMENTS][2]; + int16_t uv_dequant[MAX_SEGMENTS][2]; + + /* We allocate a MODE_INFO struct for each macroblock, together with + an extra row on top and column on the left to simplify prediction. */ + int mi_alloc_size; + MODE_INFO *mip; /* Base of allocated array */ + MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ + + // TODO(agrange): Move prev_mi into encoder structure. + // prev_mip and prev_mi will only be allocated in VP9 encoder. + MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ + MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ + + // Separate mi functions between encoder and decoder. + int (*alloc_mi)(struct VP9Common *cm, int mi_size); + void (*free_mi)(struct VP9Common *cm); + void (*setup_mi)(struct VP9Common *cm); + + // Grid of pointers to 8x8 MODE_INFO structs. Any 8x8 not in the visible + // area will be NULL. + MODE_INFO **mi_grid_base; + MODE_INFO **mi_grid_visible; + MODE_INFO **prev_mi_grid_base; + MODE_INFO **prev_mi_grid_visible; + + // Whether to use previous frame's motion vectors for prediction. + int use_prev_frame_mvs; + + // Persistent mb segment id map used in prediction. + int seg_map_idx; + int prev_seg_map_idx; + + uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS]; + uint8_t *last_frame_seg_map; + uint8_t *current_frame_seg_map; + int seg_map_alloc_size; + + INTERP_FILTER interp_filter; + + loop_filter_info_n lf_info; + + int refresh_frame_context; /* Two state 0 = NO, 1 = YES */ + + int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ + + struct loopfilter lf; + struct segmentation seg; + + // TODO(hkuang): Remove this as it is the same as frame_parallel_decode + // in pbi. + int frame_parallel_decode; // frame-based threading. + + // Context probabilities for reference frame prediction + MV_REFERENCE_FRAME comp_fixed_ref; + MV_REFERENCE_FRAME comp_var_ref[2]; + REFERENCE_MODE reference_mode; + + FRAME_CONTEXT *fc; /* this frame entropy */ + FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS + unsigned int frame_context_idx; /* Context to use/update */ + FRAME_COUNTS counts; + + unsigned int current_video_frame; + BITSTREAM_PROFILE profile; + + // VPX_BITS_8 in profile 0 or 1, VPX_BITS_10 or VPX_BITS_12 in profile 2 or 3. + vpx_bit_depth_t bit_depth; + vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer + +#if CONFIG_VP9_POSTPROC + struct postproc_state postproc_state; +#endif + + int error_resilient_mode; + int frame_parallel_decoding_mode; + + int log2_tile_cols, log2_tile_rows; + int byte_alignment; + int skip_loop_filter; + + // Private data associated with the frame buffer callbacks. + void *cb_priv; + vpx_get_frame_buffer_cb_fn_t get_fb_cb; + vpx_release_frame_buffer_cb_fn_t release_fb_cb; + + // Handles memory for the codec. + InternalFrameBufferList int_frame_buffers; + + // External BufferPool passed from outside. + BufferPool *buffer_pool; + + PARTITION_CONTEXT *above_seg_context; + ENTROPY_CONTEXT *above_context; + int above_context_alloc_cols; +} VP9_COMMON; + +// TODO(hkuang): Don't need to lock the whole pool after implementing atomic +// frame reference count. +void lock_buffer_pool(BufferPool *const pool); +void unlock_buffer_pool(BufferPool *const pool); + +static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP9_COMMON *cm, int index) { + if (index < 0 || index >= REF_FRAMES) + return NULL; + if (cm->ref_frame_map[index] < 0) + return NULL; + assert(cm->ref_frame_map[index] < FRAME_BUFFERS); + return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf; +} + +static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) { + return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf; +} + +static INLINE int get_free_fb(VP9_COMMON *cm) { + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + int i; + + lock_buffer_pool(cm->buffer_pool); + for (i = 0; i < FRAME_BUFFERS; ++i) + if (frame_bufs[i].ref_count == 0) + break; + + if (i != FRAME_BUFFERS) { + frame_bufs[i].ref_count = 1; + } else { + // Reset i to be INVALID_IDX to indicate no free buffer found. + i = INVALID_IDX; + } + + unlock_buffer_pool(cm->buffer_pool); + return i; +} + +static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) { + const int ref_index = *idx; + + if (ref_index >= 0 && bufs[ref_index].ref_count > 0) + bufs[ref_index].ref_count--; + + *idx = new_idx; + + bufs[new_idx].ref_count++; +} + +static INLINE int mi_cols_aligned_to_sb(int n_mis) { + return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2); +} + +static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) { + return cm->frame_type == KEY_FRAME || cm->intra_only; +} + +static INLINE void set_partition_probs(const VP9_COMMON *const cm, + MACROBLOCKD *const xd) { + xd->partition_probs = + frame_is_intra_only(cm) ? + &vp9_kf_partition_probs[0] : + (const vpx_prob (*)[PARTITION_TYPES - 1])cm->fc->partition_prob; +} + +static INLINE void vp9_init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd, + tran_low_t *dqcoeff) { + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + xd->plane[i].dqcoeff = dqcoeff; + xd->above_context[i] = cm->above_context + + i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); + + if (get_plane_type(i) == PLANE_TYPE_Y) { + memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant)); + } else { + memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant)); + } + xd->fc = cm->fc; + } + + xd->above_seg_context = cm->above_seg_context; + xd->mi_stride = cm->mi_stride; + xd->error_info = &cm->error; + + set_partition_probs(cm, xd); +} + +static INLINE const vpx_prob* get_partition_probs(const MACROBLOCKD *xd, + int ctx) { + return xd->partition_probs[ctx]; +} + +static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { + const int above_idx = mi_col * 2; + const int left_idx = (mi_row * 2) & 15; + int i; + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblockd_plane *const pd = &xd->plane[i]; + pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x]; + pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y]; + } +} + +static INLINE int calc_mi_size(int len) { + // len is in mi units. + return len + MI_BLOCK_SIZE; +} + +static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, + int mi_row, int bh, + int mi_col, int bw, + int mi_rows, int mi_cols) { + xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); + xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8; + xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); + xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8; + + // Are edges available for intra prediction? + xd->above_mi = (mi_row != 0) ? xd->mi[-xd->mi_stride] : NULL; + xd->left_mi = (mi_col > tile->mi_col_start) ? xd->mi[-1] : NULL; +} + +static INLINE void update_partition_context(MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE subsize, + BLOCK_SIZE bsize) { + PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; + PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + + // num_4x4_blocks_wide_lookup[bsize] / 2 + const int bs = num_8x8_blocks_wide_lookup[bsize]; + + // update the partition context at the end notes. set partition bits + // of block sizes larger than the current one to be one, and partition + // bits of smaller block sizes to be zero. + memset(above_ctx, partition_context_lookup[subsize].above, bs); + memset(left_ctx, partition_context_lookup[subsize].left, bs); +} + +static INLINE int partition_plane_context(const MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE bsize) { + const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; + const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + const int bsl = mi_width_log2_lookup[bsize]; + int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; + + assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); + assert(bsl >= 0); + + return (left * 2 + above) + bsl * PARTITION_PLOFFSET; +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_ONYXC_INT_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_ppflags.h b/thirdparty/libvpx/vp9/common/vp9_ppflags.h new file mode 100644 index 0000000000..12b989f43a --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_ppflags.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_PPFLAGS_H_ +#define VP9_COMMON_VP9_PPFLAGS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum { + VP9D_NOFILTERING = 0, + VP9D_DEBLOCK = 1 << 0, + VP9D_DEMACROBLOCK = 1 << 1, + VP9D_ADDNOISE = 1 << 2, + VP9D_DEBUG_TXT_FRAME_INFO = 1 << 3, + VP9D_DEBUG_TXT_MBLK_MODES = 1 << 4, + VP9D_DEBUG_TXT_DC_DIFF = 1 << 5, + VP9D_DEBUG_TXT_RATE_INFO = 1 << 6, + VP9D_DEBUG_DRAW_MV = 1 << 7, + VP9D_DEBUG_CLR_BLK_MODES = 1 << 8, + VP9D_DEBUG_CLR_FRM_REF_BLKS = 1 << 9, + VP9D_MFQE = 1 << 10 +}; + +typedef struct { + int post_proc_flag; + int deblocking_level; + int noise_level; +} vp9_ppflags_t; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_PPFLAGS_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_pred_common.c b/thirdparty/libvpx/vp9/common/vp9_pred_common.c new file mode 100644 index 0000000000..8f90e70e73 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_pred_common.c @@ -0,0 +1,314 @@ + +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_pred_common.h" +#include "vp9/common/vp9_seg_common.h" + +// Returns a context number for the given MB prediction signal +int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + const MODE_INFO *const left_mi = xd->left_mi; + const int left_type = left_mi && is_inter_block(left_mi) ? + left_mi->interp_filter : SWITCHABLE_FILTERS; + const MODE_INFO *const above_mi = xd->above_mi; + const int above_type = above_mi && is_inter_block(above_mi) ? + above_mi->interp_filter : SWITCHABLE_FILTERS; + + if (left_type == above_type) + return left_type; + else if (left_type == SWITCHABLE_FILTERS) + return above_type; + else if (above_type == SWITCHABLE_FILTERS) + return left_type; + else + return SWITCHABLE_FILTERS; +} + +int vp9_get_reference_mode_context(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + int ctx; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + if (has_above && has_left) { // both edges available + if (!has_second_ref(above_mi) && !has_second_ref(left_mi)) + // neither edge uses comp pred (0/1) + ctx = (above_mi->ref_frame[0] == cm->comp_fixed_ref) ^ + (left_mi->ref_frame[0] == cm->comp_fixed_ref); + else if (!has_second_ref(above_mi)) + // one of two edges uses comp pred (2/3) + ctx = 2 + (above_mi->ref_frame[0] == cm->comp_fixed_ref || + !is_inter_block(above_mi)); + else if (!has_second_ref(left_mi)) + // one of two edges uses comp pred (2/3) + ctx = 2 + (left_mi->ref_frame[0] == cm->comp_fixed_ref || + !is_inter_block(left_mi)); + else // both edges use comp pred (4) + ctx = 4; + } else if (has_above || has_left) { // one edge available + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; + + if (!has_second_ref(edge_mi)) + // edge does not use comp pred (0/1) + ctx = edge_mi->ref_frame[0] == cm->comp_fixed_ref; + else + // edge uses comp pred (3) + ctx = 3; + } else { // no edges available (1) + ctx = 1; + } + assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS); + return ctx; +} + +// Returns a context number for the given MB prediction signal +int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + int pred_context; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int above_in_image = !!above_mi; + const int left_in_image = !!left_mi; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + const int var_ref_idx = !fix_ref_idx; + + if (above_in_image && left_in_image) { // both edges available + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); + + if (above_intra && left_intra) { // intra/intra (2) + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; + + if (!has_second_ref(edge_mi)) // single pred (1/3) + pred_context = 1 + 2 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); + else // comp pred (1/3) + pred_context = 1 + 2 * (edge_mi->ref_frame[var_ref_idx] + != cm->comp_var_ref[1]); + } else { // inter/inter + const int l_sg = !has_second_ref(left_mi); + const int a_sg = !has_second_ref(above_mi); + const MV_REFERENCE_FRAME vrfa = a_sg ? above_mi->ref_frame[0] + : above_mi->ref_frame[var_ref_idx]; + const MV_REFERENCE_FRAME vrfl = l_sg ? left_mi->ref_frame[0] + : left_mi->ref_frame[var_ref_idx]; + + if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) { + pred_context = 0; + } else if (l_sg && a_sg) { // single/single + if ((vrfa == cm->comp_fixed_ref && vrfl == cm->comp_var_ref[0]) || + (vrfl == cm->comp_fixed_ref && vrfa == cm->comp_var_ref[0])) + pred_context = 4; + else if (vrfa == vrfl) + pred_context = 3; + else + pred_context = 1; + } else if (l_sg || a_sg) { // single/comp + const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl; + const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl; + if (vrfc == cm->comp_var_ref[1] && rfs != cm->comp_var_ref[1]) + pred_context = 1; + else if (rfs == cm->comp_var_ref[1] && vrfc != cm->comp_var_ref[1]) + pred_context = 2; + else + pred_context = 4; + } else if (vrfa == vrfl) { // comp/comp + pred_context = 4; + } else { + pred_context = 2; + } + } + } else if (above_in_image || left_in_image) { // one edge available + const MODE_INFO *edge_mi = above_in_image ? above_mi : left_mi; + + if (!is_inter_block(edge_mi)) { + pred_context = 2; + } else { + if (has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[var_ref_idx] + != cm->comp_var_ref[1]); + else + pred_context = 3 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); + } + } else { // no edges available (2) + pred_context = 2; + } + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + + return pred_context; +} + +int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { + int pred_context; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); + + if (above_intra && left_intra) { // intra/intra + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter or inter/intra + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; + if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); + else + pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || + edge_mi->ref_frame[1] == LAST_FRAME); + } else { // inter/inter + const int above_has_second = has_second_ref(above_mi); + const int left_has_second = has_second_ref(left_mi); + const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; + + if (above_has_second && left_has_second) { + pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME || + left0 == LAST_FRAME || left1 == LAST_FRAME); + } else if (above_has_second || left_has_second) { + const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; + + if (rfs == LAST_FRAME) + pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME); + else + pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME); + } else { + pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME); + } + } + } else if (has_above || has_left) { // one edge available + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; + if (!is_inter_block(edge_mi)) { // intra + pred_context = 2; + } else { // inter + if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); + else + pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || + edge_mi->ref_frame[1] == LAST_FRAME); + } + } else { // no edges available + pred_context = 2; + } + + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + return pred_context; +} + +int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { + int pred_context; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); + + if (above_intra && left_intra) { // intra/intra + pred_context = 2; + } else if (above_intra || left_intra) { // intra/inter or inter/intra + const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; + if (!has_second_ref(edge_mi)) { + if (edge_mi->ref_frame[0] == LAST_FRAME) + pred_context = 3; + else + pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); + } else { + pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || + edge_mi->ref_frame[1] == GOLDEN_FRAME); + } + } else { // inter/inter + const int above_has_second = has_second_ref(above_mi); + const int left_has_second = has_second_ref(left_mi); + const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; + const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; + const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; + const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; + + if (above_has_second && left_has_second) { + if (above0 == left0 && above1 == left1) + pred_context = 3 * (above0 == GOLDEN_FRAME || + above1 == GOLDEN_FRAME || + left0 == GOLDEN_FRAME || + left1 == GOLDEN_FRAME); + else + pred_context = 2; + } else if (above_has_second || left_has_second) { + const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; + const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; + + if (rfs == GOLDEN_FRAME) + pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); + else if (rfs == ALTREF_FRAME) + pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME; + else + pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); + } else { + if (above0 == LAST_FRAME && left0 == LAST_FRAME) { + pred_context = 3; + } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) { + const MV_REFERENCE_FRAME edge0 = (above0 == LAST_FRAME) ? left0 + : above0; + pred_context = 4 * (edge0 == GOLDEN_FRAME); + } else { + pred_context = 2 * (above0 == GOLDEN_FRAME) + + 2 * (left0 == GOLDEN_FRAME); + } + } + } + } else if (has_above || has_left) { // one edge available + const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; + + if (!is_inter_block(edge_mi) || + (edge_mi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mi))) + pred_context = 2; + else if (!has_second_ref(edge_mi)) + pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); + else + pred_context = 3 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || + edge_mi->ref_frame[1] == GOLDEN_FRAME); + } else { // no edges available (2) + pred_context = 2; + } + assert(pred_context >= 0 && pred_context < REF_CONTEXTS); + return pred_context; +} diff --git a/thirdparty/libvpx/vp9/common/vp9_pred_common.h b/thirdparty/libvpx/vp9/common/vp9_pred_common.h new file mode 100644 index 0000000000..f3c676e953 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_pred_common.h @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_PRED_COMMON_H_ +#define VP9_COMMON_VP9_PRED_COMMON_H_ + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vpx_dsp/vpx_dsp_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static INLINE int get_segment_id(const VP9_COMMON *cm, + const uint8_t *segment_ids, + BLOCK_SIZE bsize, int mi_row, int mi_col) { + const int mi_offset = mi_row * cm->mi_cols + mi_col; + const int bw = num_8x8_blocks_wide_lookup[bsize]; + const int bh = num_8x8_blocks_high_lookup[bsize]; + const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); + const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); + int x, y, segment_id = MAX_SEGMENTS; + + for (y = 0; y < ymis; ++y) + for (x = 0; x < xmis; ++x) + segment_id = + VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); + + assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); + return segment_id; +} + +static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) { + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int above_sip = (above_mi != NULL) ? + above_mi->seg_id_predicted : 0; + const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0; + + return above_sip + left_sip; +} + +static INLINE vpx_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg, + const MACROBLOCKD *xd) { + return seg->pred_probs[vp9_get_pred_context_seg_id(xd)]; +} + +static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) { + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int above_skip = (above_mi != NULL) ? above_mi->skip : 0; + const int left_skip = (left_mi != NULL) ? left_mi->skip : 0; + return above_skip + left_skip; +} + +static INLINE vpx_prob vp9_get_skip_prob(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->skip_probs[vp9_get_skip_context(xd)]; +} + +int vp9_get_pred_context_switchable_interp(const MACROBLOCKD *xd); + +// The mode info data structure has a one element border above and to the +// left of the entries corresponding to real macroblocks. +// The prediction flags in these dummy entries are initialized to 0. +// 0 - inter/inter, inter/--, --/inter, --/-- +// 1 - intra/inter, inter/intra +// 2 - intra/--, --/intra +// 3 - intra/intra +static INLINE int get_intra_inter_context(const MACROBLOCKD *xd) { + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + + if (has_above && has_left) { // both edges available + const int above_intra = !is_inter_block(above_mi); + const int left_intra = !is_inter_block(left_mi); + return left_intra && above_intra ? 3 : left_intra || above_intra; + } else if (has_above || has_left) { // one edge available + return 2 * !is_inter_block(has_above ? above_mi : left_mi); + } + return 0; +} + +static INLINE vpx_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->intra_inter_prob[get_intra_inter_context(xd)]; +} + +int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd); + +static INLINE vpx_prob vp9_get_reference_mode_prob(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->comp_inter_prob[vp9_get_reference_mode_context(cm, xd)]; +} + +int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, + const MACROBLOCKD *xd); + +static INLINE vpx_prob vp9_get_pred_prob_comp_ref_p(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + const int pred_context = vp9_get_pred_context_comp_ref_p(cm, xd); + return cm->fc->comp_ref_prob[pred_context]; +} + +int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd); + +static INLINE vpx_prob vp9_get_pred_prob_single_ref_p1(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p1(xd)][0]; +} + +int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd); + +static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm, + const MACROBLOCKD *xd) { + return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p2(xd)][1]; +} + +// Returns a context number for the given MB prediction signal +// The mode info data structure has a one element border above and to the +// left of the entries corresponding to real blocks. +// The prediction flags in these dummy entries are initialized to 0. +static INLINE int get_tx_size_context(const MACROBLOCKD *xd) { + const int max_tx_size = max_txsize_lookup[xd->mi[0]->sb_type]; + const MODE_INFO *const above_mi = xd->above_mi; + const MODE_INFO *const left_mi = xd->left_mi; + const int has_above = !!above_mi; + const int has_left = !!left_mi; + int above_ctx = (has_above && !above_mi->skip) ? (int)above_mi->tx_size + : max_tx_size; + int left_ctx = (has_left && !left_mi->skip) ? (int)left_mi->tx_size + : max_tx_size; + if (!has_left) + left_ctx = above_ctx; + + if (!has_above) + above_ctx = left_ctx; + + return (above_ctx + left_ctx) > max_tx_size; +} + +static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx, + const struct tx_probs *tx_probs) { + switch (max_tx_size) { + case TX_8X8: + return tx_probs->p8x8[ctx]; + case TX_16X16: + return tx_probs->p16x16[ctx]; + case TX_32X32: + return tx_probs->p32x32[ctx]; + default: + assert(0 && "Invalid max_tx_size."); + return NULL; + } +} + +static INLINE const vpx_prob *get_tx_probs2(TX_SIZE max_tx_size, + const MACROBLOCKD *xd, + const struct tx_probs *tx_probs) { + return get_tx_probs(max_tx_size, get_tx_size_context(xd), tx_probs); +} + +static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx, + struct tx_counts *tx_counts) { + switch (max_tx_size) { + case TX_8X8: + return tx_counts->p8x8[ctx]; + case TX_16X16: + return tx_counts->p16x16[ctx]; + case TX_32X32: + return tx_counts->p32x32[ctx]; + default: + assert(0 && "Invalid max_tx_size."); + return NULL; + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_PRED_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_quant_common.c b/thirdparty/libvpx/vp9/common/vp9_quant_common.c new file mode 100644 index 0000000000..d83f3c1a2f --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_quant_common.c @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_seg_common.h" + +static const int16_t dc_qlookup[QINDEX_RANGE] = { + 4, 8, 8, 9, 10, 11, 12, 12, + 13, 14, 15, 16, 17, 18, 19, 19, + 20, 21, 22, 23, 24, 25, 26, 26, + 27, 28, 29, 30, 31, 32, 32, 33, + 34, 35, 36, 37, 38, 38, 39, 40, + 41, 42, 43, 43, 44, 45, 46, 47, + 48, 48, 49, 50, 51, 52, 53, 53, + 54, 55, 56, 57, 57, 58, 59, 60, + 61, 62, 62, 63, 64, 65, 66, 66, + 67, 68, 69, 70, 70, 71, 72, 73, + 74, 74, 75, 76, 77, 78, 78, 79, + 80, 81, 81, 82, 83, 84, 85, 85, + 87, 88, 90, 92, 93, 95, 96, 98, + 99, 101, 102, 104, 105, 107, 108, 110, + 111, 113, 114, 116, 117, 118, 120, 121, + 123, 125, 127, 129, 131, 134, 136, 138, + 140, 142, 144, 146, 148, 150, 152, 154, + 156, 158, 161, 164, 166, 169, 172, 174, + 177, 180, 182, 185, 187, 190, 192, 195, + 199, 202, 205, 208, 211, 214, 217, 220, + 223, 226, 230, 233, 237, 240, 243, 247, + 250, 253, 257, 261, 265, 269, 272, 276, + 280, 284, 288, 292, 296, 300, 304, 309, + 313, 317, 322, 326, 330, 335, 340, 344, + 349, 354, 359, 364, 369, 374, 379, 384, + 389, 395, 400, 406, 411, 417, 423, 429, + 435, 441, 447, 454, 461, 467, 475, 482, + 489, 497, 505, 513, 522, 530, 539, 549, + 559, 569, 579, 590, 602, 614, 626, 640, + 654, 668, 684, 700, 717, 736, 755, 775, + 796, 819, 843, 869, 896, 925, 955, 988, + 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336, +}; + +#if CONFIG_VP9_HIGHBITDEPTH +static const int16_t dc_qlookup_10[QINDEX_RANGE] = { + 4, 9, 10, 13, 15, 17, 20, 22, + 25, 28, 31, 34, 37, 40, 43, 47, + 50, 53, 57, 60, 64, 68, 71, 75, + 78, 82, 86, 90, 93, 97, 101, 105, + 109, 113, 116, 120, 124, 128, 132, 136, + 140, 143, 147, 151, 155, 159, 163, 166, + 170, 174, 178, 182, 185, 189, 193, 197, + 200, 204, 208, 212, 215, 219, 223, 226, + 230, 233, 237, 241, 244, 248, 251, 255, + 259, 262, 266, 269, 273, 276, 280, 283, + 287, 290, 293, 297, 300, 304, 307, 310, + 314, 317, 321, 324, 327, 331, 334, 337, + 343, 350, 356, 362, 369, 375, 381, 387, + 394, 400, 406, 412, 418, 424, 430, 436, + 442, 448, 454, 460, 466, 472, 478, 484, + 490, 499, 507, 516, 525, 533, 542, 550, + 559, 567, 576, 584, 592, 601, 609, 617, + 625, 634, 644, 655, 666, 676, 687, 698, + 708, 718, 729, 739, 749, 759, 770, 782, + 795, 807, 819, 831, 844, 856, 868, 880, + 891, 906, 920, 933, 947, 961, 975, 988, + 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105, + 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, + 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, + 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, + 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, + 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, + 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197, + 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, + 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102, + 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, + 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347, +}; + +static const int16_t dc_qlookup_12[QINDEX_RANGE] = { + 4, 12, 18, 25, 33, 41, 50, 60, + 70, 80, 91, 103, 115, 127, 140, 153, + 166, 180, 194, 208, 222, 237, 251, 266, + 281, 296, 312, 327, 343, 358, 374, 390, + 405, 421, 437, 453, 469, 484, 500, 516, + 532, 548, 564, 580, 596, 611, 627, 643, + 659, 674, 690, 706, 721, 737, 752, 768, + 783, 798, 814, 829, 844, 859, 874, 889, + 904, 919, 934, 949, 964, 978, 993, 1008, + 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122, + 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, + 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, + 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544, + 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, + 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, + 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, + 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467, + 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788, + 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, + 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517, + 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, + 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, + 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, + 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, + 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149, + 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, + 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, + 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788, + 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, + 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, + 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812, + 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387, +}; +#endif + +static const int16_t ac_qlookup[QINDEX_RANGE] = { + 4, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, + 23, 24, 25, 26, 27, 28, 29, 30, + 31, 32, 33, 34, 35, 36, 37, 38, + 39, 40, 41, 42, 43, 44, 45, 46, + 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 57, 58, 59, 60, 61, 62, + 63, 64, 65, 66, 67, 68, 69, 70, + 71, 72, 73, 74, 75, 76, 77, 78, + 79, 80, 81, 82, 83, 84, 85, 86, + 87, 88, 89, 90, 91, 92, 93, 94, + 95, 96, 97, 98, 99, 100, 101, 102, + 104, 106, 108, 110, 112, 114, 116, 118, + 120, 122, 124, 126, 128, 130, 132, 134, + 136, 138, 140, 142, 144, 146, 148, 150, + 152, 155, 158, 161, 164, 167, 170, 173, + 176, 179, 182, 185, 188, 191, 194, 197, + 200, 203, 207, 211, 215, 219, 223, 227, + 231, 235, 239, 243, 247, 251, 255, 260, + 265, 270, 275, 280, 285, 290, 295, 300, + 305, 311, 317, 323, 329, 335, 341, 347, + 353, 359, 366, 373, 380, 387, 394, 401, + 408, 416, 424, 432, 440, 448, 456, 465, + 474, 483, 492, 501, 510, 520, 530, 540, + 550, 560, 571, 582, 593, 604, 615, 627, + 639, 651, 663, 676, 689, 702, 715, 729, + 743, 757, 771, 786, 801, 816, 832, 848, + 864, 881, 898, 915, 933, 951, 969, 988, + 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, + 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, + 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, + 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, +}; + +#if CONFIG_VP9_HIGHBITDEPTH +static const int16_t ac_qlookup_10[QINDEX_RANGE] = { + 4, 9, 11, 13, 16, 18, 21, 24, + 27, 30, 33, 37, 40, 44, 48, 51, + 55, 59, 63, 67, 71, 75, 79, 83, + 88, 92, 96, 100, 105, 109, 114, 118, + 122, 127, 131, 136, 140, 145, 149, 154, + 158, 163, 168, 172, 177, 181, 186, 190, + 195, 199, 204, 208, 213, 217, 222, 226, + 231, 235, 240, 244, 249, 253, 258, 262, + 267, 271, 275, 280, 284, 289, 293, 297, + 302, 306, 311, 315, 319, 324, 328, 332, + 337, 341, 345, 349, 354, 358, 362, 367, + 371, 375, 379, 384, 388, 392, 396, 401, + 409, 417, 425, 433, 441, 449, 458, 466, + 474, 482, 490, 498, 506, 514, 523, 531, + 539, 547, 555, 563, 571, 579, 588, 596, + 604, 616, 628, 640, 652, 664, 676, 688, + 700, 713, 725, 737, 749, 761, 773, 785, + 797, 809, 825, 841, 857, 873, 889, 905, + 922, 938, 954, 970, 986, 1002, 1018, 1038, + 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198, + 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, + 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603, + 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859, + 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, + 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507, + 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, + 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, + 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952, + 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, + 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372, + 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268, + 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312, +}; + +static const int16_t ac_qlookup_12[QINDEX_RANGE] = { + 4, 13, 19, 27, 35, 44, 54, 64, + 75, 87, 99, 112, 126, 139, 154, 168, + 183, 199, 214, 230, 247, 263, 280, 297, + 314, 331, 349, 366, 384, 402, 420, 438, + 456, 475, 493, 511, 530, 548, 567, 586, + 604, 623, 642, 660, 679, 698, 716, 735, + 753, 772, 791, 809, 828, 846, 865, 884, + 902, 920, 939, 957, 976, 994, 1012, 1030, + 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175, + 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317, + 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, + 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, + 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856, + 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118, + 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, + 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750, + 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137, + 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619, + 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, + 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791, + 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544, + 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, + 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, + 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635, + 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028, + 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, + 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565, + 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806, + 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414, + 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, + 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070, + 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247, +}; +#endif + +int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { +#if CONFIG_VP9_HIGHBITDEPTH + switch (bit_depth) { + case VPX_BITS_8: + return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_10: + return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_12: + return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; + default: + assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); + return -1; + } +#else + (void) bit_depth; + return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; +#endif +} + +int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { +#if CONFIG_VP9_HIGHBITDEPTH + switch (bit_depth) { + case VPX_BITS_8: + return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_10: + return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; + case VPX_BITS_12: + return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; + default: + assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); + return -1; + } +#else + (void) bit_depth; + return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; +#endif +} + +int vp9_get_qindex(const struct segmentation *seg, int segment_id, + int base_qindex) { + if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) { + const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); + const int seg_qindex = seg->abs_delta == SEGMENT_ABSDATA ? + data : base_qindex + data; + return clamp(seg_qindex, 0, MAXQ); + } else { + return base_qindex; + } +} + diff --git a/thirdparty/libvpx/vp9/common/vp9_quant_common.h b/thirdparty/libvpx/vp9/common/vp9_quant_common.h new file mode 100644 index 0000000000..4bae4a8967 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_quant_common.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_QUANT_COMMON_H_ +#define VP9_COMMON_VP9_QUANT_COMMON_H_ + +#include "vpx/vpx_codec.h" +#include "vp9/common/vp9_seg_common.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MINQ 0 +#define MAXQ 255 +#define QINDEX_RANGE (MAXQ - MINQ + 1) +#define QINDEX_BITS 8 + +int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); +int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); + +int vp9_get_qindex(const struct segmentation *seg, int segment_id, + int base_qindex); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_QUANT_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_reconinter.c b/thirdparty/libvpx/vp9/common/vp9_reconinter.c new file mode 100644 index 0000000000..84718e9703 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_reconinter.c @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> + +#include "./vpx_scale_rtcd.h" +#include "./vpx_config.h" + +#include "vpx/vpx_integer.h" + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_reconintra.h" + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const MV *src_mv, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + enum mv_precision precision, + int x, int y, int bd) { + const int is_q4 = precision == MV_PRECISION_Q4; + const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, + is_q4 ? src_mv->col : src_mv->col * 2 }; + MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf); + const int subpel_x = mv.col & SUBPEL_MASK; + const int subpel_y = mv.row & SUBPEL_MASK; + + src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); + + highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, + sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, + bd); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +void vp9_build_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const MV *src_mv, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + enum mv_precision precision, + int x, int y) { + const int is_q4 = precision == MV_PRECISION_Q4; + const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, + is_q4 ? src_mv->col : src_mv->col * 2 }; + MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf); + const int subpel_x = mv.col & SUBPEL_MASK; + const int subpel_y = mv.row & SUBPEL_MASK; + + src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); + + inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, + sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4); +} + +static INLINE int round_mv_comp_q4(int value) { + return (value < 0 ? value - 2 : value + 2) / 4; +} + +static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) { + MV res = { round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row + + mi->bmi[1].as_mv[idx].as_mv.row + + mi->bmi[2].as_mv[idx].as_mv.row + + mi->bmi[3].as_mv[idx].as_mv.row), + round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + + mi->bmi[1].as_mv[idx].as_mv.col + + mi->bmi[2].as_mv[idx].as_mv.col + + mi->bmi[3].as_mv[idx].as_mv.col) }; + return res; +} + +static INLINE int round_mv_comp_q2(int value) { + return (value < 0 ? value - 1 : value + 1) / 2; +} + +static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) { + MV res = { round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.row + + mi->bmi[block1].as_mv[idx].as_mv.row), + round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.col + + mi->bmi[block1].as_mv[idx].as_mv.col) }; + return res; +} + +// TODO(jkoleszar): yet another mv clamping function :-( +MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, + int bw, int bh, int ss_x, int ss_y) { + // If the MV points so far into the UMV border that no visible pixels + // are used for reconstruction, the subpel part of the MV can be + // discarded and the MV limited to 16 pixels with equivalent results. + const int spel_left = (VP9_INTERP_EXTEND + bw) << SUBPEL_BITS; + const int spel_right = spel_left - SUBPEL_SHIFTS; + const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS; + const int spel_bottom = spel_top - SUBPEL_SHIFTS; + MV clamped_mv = { + src_mv->row * (1 << (1 - ss_y)), + src_mv->col * (1 << (1 - ss_x)) + }; + assert(ss_x <= 1); + assert(ss_y <= 1); + + clamp_mv(&clamped_mv, + xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left, + xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right, + xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top, + xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom); + + return clamped_mv; +} + +MV average_split_mvs(const struct macroblockd_plane *pd, + const MODE_INFO *mi, int ref, int block) { + const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0); + MV res = {0, 0}; + switch (ss_idx) { + case 0: + res = mi->bmi[block].as_mv[ref].as_mv; + break; + case 1: + res = mi_mv_pred_q2(mi, ref, block, block + 2); + break; + case 2: + res = mi_mv_pred_q2(mi, ref, block, block + 1); + break; + case 3: + res = mi_mv_pred_q4(mi, ref); + break; + default: + assert(ss_idx <= 3 && ss_idx >= 0); + } + return res; +} + +static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, + int bw, int bh, + int x, int y, int w, int h, + int mi_x, int mi_y) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const MODE_INFO *mi = xd->mi[0]; + const int is_compound = has_second_ref(mi); + const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; + int ref; + + for (ref = 0; ref < 1 + is_compound; ++ref) { + const struct scale_factors *const sf = &xd->block_refs[ref]->sf; + struct buf_2d *const pre_buf = &pd->pre[ref]; + struct buf_2d *const dst_buf = &pd->dst; + uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; + const MV mv = mi->sb_type < BLOCK_8X8 + ? average_split_mvs(pd, mi, ref, block) + : mi->mv[ref].as_mv; + + // TODO(jkoleszar): This clamping is done in the incorrect place for the + // scaling case. It needs to be done on the scaled MV, not the pre-scaling + // MV. Note however that it performs the subsampling aware scaling so + // that the result is always q4. + // mv_precision precision is MV_PRECISION_Q4. + const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, &mv, bw, bh, + pd->subsampling_x, + pd->subsampling_y); + + uint8_t *pre; + MV32 scaled_mv; + int xs, ys, subpel_x, subpel_y; + const int is_scaled = vp9_is_scaled(sf); + + if (is_scaled) { + // Co-ordinate of containing block to pixel precision. + const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); + const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); +#if CONFIG_BETTER_HW_COMPATIBILITY + assert(xd->mi[0]->sb_type != BLOCK_4X8 && + xd->mi[0]->sb_type != BLOCK_8X4); + assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) && + mv_q4.col == mv.col * (1 << (1 - pd->subsampling_x))); +#endif + if (plane == 0) + pre_buf->buf = xd->block_refs[ref]->buf->y_buffer; + else if (plane == 1) + pre_buf->buf = xd->block_refs[ref]->buf->u_buffer; + else + pre_buf->buf = xd->block_refs[ref]->buf->v_buffer; + + pre_buf->buf += scaled_buffer_offset(x_start + x, y_start + y, + pre_buf->stride, sf); + pre = pre_buf->buf; + scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); + xs = sf->x_step_q4; + ys = sf->y_step_q4; + } else { + pre = pre_buf->buf + (y * pre_buf->stride + x); + scaled_mv.row = mv_q4.row; + scaled_mv.col = mv_q4.col; + xs = ys = 16; + } + subpel_x = scaled_mv.col & SUBPEL_MASK; + subpel_y = scaled_mv.row & SUBPEL_MASK; + pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + + (scaled_mv.col >> SUBPEL_BITS); + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + highbd_inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, + xd->bd); + } else { + inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); + } +#else + inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, + subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); +#endif // CONFIG_VP9_HIGHBITDEPTH + } +} + +static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, + int mi_row, int mi_col, + int plane_from, int plane_to) { + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + for (plane = plane_from; plane <= plane_to; ++plane) { + const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, + &xd->plane[plane]); + const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; + const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; + const int bw = 4 * num_4x4_w; + const int bh = 4 * num_4x4_h; + + if (xd->mi[0]->sb_type < BLOCK_8X8) { + int i = 0, x, y; + assert(bsize == BLOCK_8X8); + for (y = 0; y < num_4x4_h; ++y) + for (x = 0; x < num_4x4_w; ++x) + build_inter_predictors(xd, plane, i++, bw, bh, + 4 * x, 4 * y, 4, 4, mi_x, mi_y); + } else { + build_inter_predictors(xd, plane, 0, bw, bh, + 0, 0, bw, bh, mi_x, mi_y); + } + } +} + +void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0); +} + +void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize, int plane) { + build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, plane, plane); +} + +void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1, + MAX_MB_PLANE - 1); +} + +void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize) { + build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, + MAX_MB_PLANE - 1); +} + +void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col) { + uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, + src->v_buffer}; + const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, + src->uv_stride}; + int i; + + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblockd_plane *const pd = &planes[i]; + setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL, + pd->subsampling_x, pd->subsampling_y); + } +} + +void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col, + const struct scale_factors *sf) { + if (src != NULL) { + int i; + uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, + src->v_buffer}; + const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, + src->uv_stride}; + for (i = 0; i < MAX_MB_PLANE; ++i) { + struct macroblockd_plane *const pd = &xd->plane[i]; + setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col, + sf, pd->subsampling_x, pd->subsampling_y); + } + } +} diff --git a/thirdparty/libvpx/vp9/common/vp9_reconinter.h b/thirdparty/libvpx/vp9/common/vp9_reconinter.h new file mode 100644 index 0000000000..07745e3aaa --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_reconinter.h @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_RECONINTER_H_ +#define VP9_COMMON_VP9_RECONINTER_H_ + +#include "vp9/common/vp9_filter.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vpx/vpx_integer.h" +#include "vpx_dsp/vpx_filter.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static INLINE void inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + int xs, int ys) { + sf->predict[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static INLINE void highbd_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const int subpel_x, + const int subpel_y, + const struct scale_factors *sf, + int w, int h, int ref, + const InterpKernel *kernel, + int xs, int ys, int bd) { + sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( + src, src_stride, dst, dst_stride, + kernel[subpel_x], xs, kernel[subpel_y], ys, w, h, bd); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi, + int ref, int block); + +MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, + int bw, int bh, int ss_x, int ss_y); + +void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize); + +void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize, int plane); + +void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize); + +void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, + BLOCK_SIZE bsize); + +void vp9_build_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const MV *mv_q3, + const struct scale_factors *sf, + int w, int h, int do_avg, + const InterpKernel *kernel, + enum mv_precision precision, + int x, int y); + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_highbd_build_inter_predictor(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + const MV *mv_q3, + const struct scale_factors *sf, + int w, int h, int do_avg, + const InterpKernel *kernel, + enum mv_precision precision, + int x, int y, int bd); +#endif + +static INLINE int scaled_buffer_offset(int x_offset, int y_offset, int stride, + const struct scale_factors *sf) { + const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset; + const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset; + return y * stride + x; +} + +static INLINE void setup_pred_plane(struct buf_2d *dst, + uint8_t *src, int stride, + int mi_row, int mi_col, + const struct scale_factors *scale, + int subsampling_x, int subsampling_y) { + const int x = (MI_SIZE * mi_col) >> subsampling_x; + const int y = (MI_SIZE * mi_row) >> subsampling_y; + dst->buf = src + scaled_buffer_offset(x, y, stride, scale); + dst->stride = stride; +} + +void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], + const YV12_BUFFER_CONFIG *src, + int mi_row, int mi_col); + +void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, + const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, + const struct scale_factors *sf); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_RECONINTER_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_reconintra.c b/thirdparty/libvpx/vp9/common/vp9_reconintra.c new file mode 100644 index 0000000000..445785835a --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_reconintra.c @@ -0,0 +1,445 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "./vpx_dsp_rtcd.h" + +#if CONFIG_VP9_HIGHBITDEPTH +#include "vpx_dsp/vpx_dsp_common.h" +#endif // CONFIG_VP9_HIGHBITDEPTH +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" +#include "vpx_ports/vpx_once.h" + +#include "vp9/common/vp9_reconintra.h" +#include "vp9/common/vp9_onyxc_int.h" + +const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { + DCT_DCT, // DC + ADST_DCT, // V + DCT_ADST, // H + DCT_DCT, // D45 + ADST_ADST, // D135 + ADST_DCT, // D117 + DCT_ADST, // D153 + DCT_ADST, // D207 + ADST_DCT, // D63 + ADST_ADST, // TM +}; + +enum { + NEED_LEFT = 1 << 1, + NEED_ABOVE = 1 << 2, + NEED_ABOVERIGHT = 1 << 3, +}; + +static const uint8_t extend_modes[INTRA_MODES] = { + NEED_ABOVE | NEED_LEFT, // DC + NEED_ABOVE, // V + NEED_LEFT, // H + NEED_ABOVERIGHT, // D45 + NEED_LEFT | NEED_ABOVE, // D135 + NEED_LEFT | NEED_ABOVE, // D117 + NEED_LEFT | NEED_ABOVE, // D153 + NEED_LEFT, // D207 + NEED_ABOVERIGHT, // D63 + NEED_LEFT | NEED_ABOVE, // TM +}; + +typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, + const uint8_t *above, const uint8_t *left); + +static intra_pred_fn pred[INTRA_MODES][TX_SIZES]; +static intra_pred_fn dc_pred[2][2][TX_SIZES]; + +#if CONFIG_VP9_HIGHBITDEPTH +typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride, + const uint16_t *above, const uint16_t *left, + int bd); +static intra_high_pred_fn pred_high[INTRA_MODES][4]; +static intra_high_pred_fn dc_pred_high[2][2][4]; +#endif // CONFIG_VP9_HIGHBITDEPTH + +static void vp9_init_intra_predictors_internal(void) { +#define INIT_ALL_SIZES(p, type) \ + p[TX_4X4] = vpx_##type##_predictor_4x4; \ + p[TX_8X8] = vpx_##type##_predictor_8x8; \ + p[TX_16X16] = vpx_##type##_predictor_16x16; \ + p[TX_32X32] = vpx_##type##_predictor_32x32 + + INIT_ALL_SIZES(pred[V_PRED], v); + INIT_ALL_SIZES(pred[H_PRED], h); + INIT_ALL_SIZES(pred[D207_PRED], d207); + INIT_ALL_SIZES(pred[D45_PRED], d45); + INIT_ALL_SIZES(pred[D63_PRED], d63); + INIT_ALL_SIZES(pred[D117_PRED], d117); + INIT_ALL_SIZES(pred[D135_PRED], d135); + INIT_ALL_SIZES(pred[D153_PRED], d153); + INIT_ALL_SIZES(pred[TM_PRED], tm); + + INIT_ALL_SIZES(dc_pred[0][0], dc_128); + INIT_ALL_SIZES(dc_pred[0][1], dc_top); + INIT_ALL_SIZES(dc_pred[1][0], dc_left); + INIT_ALL_SIZES(dc_pred[1][1], dc); + +#if CONFIG_VP9_HIGHBITDEPTH + INIT_ALL_SIZES(pred_high[V_PRED], highbd_v); + INIT_ALL_SIZES(pred_high[H_PRED], highbd_h); + INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207); + INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45); + INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63); + INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117); + INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135); + INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153); + INIT_ALL_SIZES(pred_high[TM_PRED], highbd_tm); + + INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128); + INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top); + INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left); + INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc); +#endif // CONFIG_VP9_HIGHBITDEPTH + +#undef intra_pred_allsizes +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void build_intra_predictors_high(const MACROBLOCKD *xd, + const uint8_t *ref8, + int ref_stride, + uint8_t *dst8, + int dst_stride, + PREDICTION_MODE mode, + TX_SIZE tx_size, + int up_available, + int left_available, + int right_available, + int x, int y, + int plane, int bd) { + int i; + uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); + uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); + DECLARE_ALIGNED(16, uint16_t, left_col[32]); + DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]); + uint16_t *above_row = above_data + 16; + const uint16_t *const_above_row = above_row; + const int bs = 4 << tx_size; + int frame_width, frame_height; + int x0, y0; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const int need_left = extend_modes[mode] & NEED_LEFT; + const int need_above = extend_modes[mode] & NEED_ABOVE; + const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT; + int base = 128 << (bd - 8); + // 127 127 127 .. 127 127 127 127 127 127 + // 129 A B .. Y Z + // 129 C D .. W X + // 129 E F .. U V + // 129 G H .. S T T T T T + // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1. + + // Get current frame pointer, width and height. + if (plane == 0) { + frame_width = xd->cur_buf->y_width; + frame_height = xd->cur_buf->y_height; + } else { + frame_width = xd->cur_buf->uv_width; + frame_height = xd->cur_buf->uv_height; + } + + // Get block position in current frame. + x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; + y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + + // NEED_LEFT + if (need_left) { + if (left_available) { + if (xd->mb_to_bottom_edge < 0) { + /* slower path if the block needs border extension */ + if (y0 + bs <= frame_height) { + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } else { + const int extend_bottom = frame_height - y0; + for (i = 0; i < extend_bottom; ++i) + left_col[i] = ref[i * ref_stride - 1]; + for (; i < bs; ++i) + left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; + } + } else { + /* faster path if the block does not need extension */ + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } + } else { + vpx_memset16(left_col, base + 1, bs); + } + } + + // NEED_ABOVE + if (need_above) { + if (up_available) { + const uint16_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + bs <= frame_width) { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r * sizeof(above_row[0])); + vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width); + } + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + } + } + above_row[-1] = left_available ? above_ref[-1] : (base + 1); + } else { + vpx_memset16(above_row, base - 1, bs); + above_row[-1] = base - 1; + } + } + + // NEED_ABOVERIGHT + if (need_aboveright) { + if (up_available) { + const uint16_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + 2 * bs <= frame_width) { + if (right_available && bs == 4) { + memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0])); + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 + bs <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + memcpy(above_row, above_ref, r * sizeof(above_row[0])); + vpx_memset16(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r * sizeof(above_row[0])); + vpx_memset16(above_row + r, above_row[r - 1], + x0 + 2 * bs - frame_width); + } + above_row[-1] = left_available ? above_ref[-1] : (base + 1); + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; + } else { + memcpy(above_row, above_ref, bs * sizeof(above_row[0])); + if (bs == 4 && right_available) + memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0])); + else + vpx_memset16(above_row + bs, above_row[bs - 1], bs); + above_row[-1] = left_available ? above_ref[-1] : (base + 1); + } + } + } else { + vpx_memset16(above_row, base - 1, bs * 2); + above_row[-1] = base - 1; + } + } + + // predict + if (mode == DC_PRED) { + dc_pred_high[left_available][up_available][tx_size](dst, dst_stride, + const_above_row, + left_col, xd->bd); + } else { + pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col, + xd->bd); + } +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, + int ref_stride, uint8_t *dst, int dst_stride, + PREDICTION_MODE mode, TX_SIZE tx_size, + int up_available, int left_available, + int right_available, int x, int y, + int plane) { + int i; + DECLARE_ALIGNED(16, uint8_t, left_col[32]); + DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]); + uint8_t *above_row = above_data + 16; + const uint8_t *const_above_row = above_row; + const int bs = 4 << tx_size; + int frame_width, frame_height; + int x0, y0; + const struct macroblockd_plane *const pd = &xd->plane[plane]; + + // 127 127 127 .. 127 127 127 127 127 127 + // 129 A B .. Y Z + // 129 C D .. W X + // 129 E F .. U V + // 129 G H .. S T T T T T + // .. + + // Get current frame pointer, width and height. + if (plane == 0) { + frame_width = xd->cur_buf->y_width; + frame_height = xd->cur_buf->y_height; + } else { + frame_width = xd->cur_buf->uv_width; + frame_height = xd->cur_buf->uv_height; + } + + // Get block position in current frame. + x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; + y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + + // NEED_LEFT + if (extend_modes[mode] & NEED_LEFT) { + if (left_available) { + if (xd->mb_to_bottom_edge < 0) { + /* slower path if the block needs border extension */ + if (y0 + bs <= frame_height) { + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } else { + const int extend_bottom = frame_height - y0; + for (i = 0; i < extend_bottom; ++i) + left_col[i] = ref[i * ref_stride - 1]; + for (; i < bs; ++i) + left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; + } + } else { + /* faster path if the block does not need extension */ + for (i = 0; i < bs; ++i) + left_col[i] = ref[i * ref_stride - 1]; + } + } else { + memset(left_col, 129, bs); + } + } + + // NEED_ABOVE + if (extend_modes[mode] & NEED_ABOVE) { + if (up_available) { + const uint8_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + bs <= frame_width) { + memcpy(above_row, above_ref, bs); + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r); + memset(above_row + r, above_row[r - 1], x0 + bs - frame_width); + } + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; + } else { + memcpy(above_row, above_ref, bs); + } + } + above_row[-1] = left_available ? above_ref[-1] : 129; + } else { + memset(above_row, 127, bs); + above_row[-1] = 127; + } + } + + // NEED_ABOVERIGHT + if (extend_modes[mode] & NEED_ABOVERIGHT) { + if (up_available) { + const uint8_t *above_ref = ref - ref_stride; + if (xd->mb_to_right_edge < 0) { + /* slower path if the block needs border extension */ + if (x0 + 2 * bs <= frame_width) { + if (right_available && bs == 4) { + memcpy(above_row, above_ref, 2 * bs); + } else { + memcpy(above_row, above_ref, bs); + memset(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 + bs <= frame_width) { + const int r = frame_width - x0; + if (right_available && bs == 4) { + memcpy(above_row, above_ref, r); + memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); + } else { + memcpy(above_row, above_ref, bs); + memset(above_row + bs, above_row[bs - 1], bs); + } + } else if (x0 <= frame_width) { + const int r = frame_width - x0; + memcpy(above_row, above_ref, r); + memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); + } + } else { + /* faster path if the block does not need extension */ + if (bs == 4 && right_available && left_available) { + const_above_row = above_ref; + } else { + memcpy(above_row, above_ref, bs); + if (bs == 4 && right_available) + memcpy(above_row + bs, above_ref + bs, bs); + else + memset(above_row + bs, above_row[bs - 1], bs); + } + } + above_row[-1] = left_available ? above_ref[-1] : 129; + } else { + memset(above_row, 127, bs * 2); + above_row[-1] = 127; + } + } + + // predict + if (mode == DC_PRED) { + dc_pred[left_available][up_available][tx_size](dst, dst_stride, + const_above_row, left_col); + } else { + pred[mode][tx_size](dst, dst_stride, const_above_row, left_col); + } +} + +void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, + TX_SIZE tx_size, PREDICTION_MODE mode, + const uint8_t *ref, int ref_stride, + uint8_t *dst, int dst_stride, + int aoff, int loff, int plane) { + const int bw = (1 << bwl_in); + const int txw = (1 << tx_size); + const int have_top = loff || (xd->above_mi != NULL); + const int have_left = aoff || (xd->left_mi != NULL); + const int have_right = (aoff + txw) < bw; + const int x = aoff * 4; + const int y = loff * 4; + +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode, + tx_size, have_top, have_left, have_right, + x, y, plane, xd->bd); + return; + } +#endif + build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, + have_top, have_left, have_right, x, y, plane); +} + +void vp9_init_intra_predictors(void) { + once(vp9_init_intra_predictors_internal); +} diff --git a/thirdparty/libvpx/vp9/common/vp9_reconintra.h b/thirdparty/libvpx/vp9/common/vp9_reconintra.h new file mode 100644 index 0000000000..de453808b7 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_reconintra.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_RECONINTRA_H_ +#define VP9_COMMON_VP9_RECONINTRA_H_ + +#include "vpx/vpx_integer.h" +#include "vp9/common/vp9_blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp9_init_intra_predictors(void); + +void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, + TX_SIZE tx_size, PREDICTION_MODE mode, + const uint8_t *ref, int ref_stride, + uint8_t *dst, int dst_stride, + int aoff, int loff, int plane); +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_RECONINTRA_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_rtcd.c b/thirdparty/libvpx/vp9/common/vp9_rtcd.c new file mode 100644 index 0000000000..2dfa09f50e --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_rtcd.c @@ -0,0 +1,19 @@ +/* + * Copyright (c) 2011 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#include "./vpx_config.h" +#define RTCD_C +#include "./vp9_rtcd.h" +#include "vpx_ports/vpx_once.h" + +void vp9_rtcd() { + // TODO(JBB): Remove this once, by insuring that both the encoder and + // decoder setup functions are protected by once(); + once(setup_rtcd_internal); +} diff --git a/thirdparty/libvpx/vp9/common/vp9_scale.c b/thirdparty/libvpx/vp9/common/vp9_scale.c new file mode 100644 index 0000000000..b763b925b3 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_scale.c @@ -0,0 +1,175 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_dsp_rtcd.h" +#include "vp9/common/vp9_filter.h" +#include "vp9/common/vp9_scale.h" +#include "vpx_dsp/vpx_filter.h" + +static INLINE int scaled_x(int val, const struct scale_factors *sf) { + return (int)((int64_t)val * sf->x_scale_fp >> REF_SCALE_SHIFT); +} + +static INLINE int scaled_y(int val, const struct scale_factors *sf) { + return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT); +} + +static int unscaled_value(int val, const struct scale_factors *sf) { + (void) sf; + return val; +} + +static int get_fixed_point_scale_factor(int other_size, int this_size) { + // Calculate scaling factor once for each reference frame + // and use fixed point scaling factors in decoding and encoding routines. + // Hardware implementations can calculate scale factor in device driver + // and use multiplication and shifting on hardware instead of division. + return (other_size << REF_SCALE_SHIFT) / this_size; +} + +MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) { + const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK; + const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK; + const MV32 res = { + scaled_y(mv->row, sf) + y_off_q4, + scaled_x(mv->col, sf) + x_off_q4 + }; + return res; +} + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, + int other_w, int other_h, + int this_w, int this_h, + int use_highbd) { +#else +void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, + int other_w, int other_h, + int this_w, int this_h) { +#endif + if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) { + sf->x_scale_fp = REF_INVALID_SCALE; + sf->y_scale_fp = REF_INVALID_SCALE; + return; + } + + sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w); + sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h); + sf->x_step_q4 = scaled_x(16, sf); + sf->y_step_q4 = scaled_y(16, sf); + + if (vp9_is_scaled(sf)) { + sf->scale_value_x = scaled_x; + sf->scale_value_y = scaled_y; + } else { + sf->scale_value_x = unscaled_value; + sf->scale_value_y = unscaled_value; + } + + // TODO(agrange): Investigate the best choice of functions to use here + // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what + // to do at full-pel offsets. The current selection, where the filter is + // applied in one direction only, and not at all for 0,0, seems to give the + // best quality, but it may be worth trying an additional mode that does + // do the filtering on full-pel. + + if (sf->x_step_q4 == 16) { + if (sf->y_step_q4 == 16) { + // No scaling in either direction. + sf->predict[0][0][0] = vpx_convolve_copy; + sf->predict[0][0][1] = vpx_convolve_avg; + sf->predict[0][1][0] = vpx_convolve8_vert; + sf->predict[0][1][1] = vpx_convolve8_avg_vert; + sf->predict[1][0][0] = vpx_convolve8_horiz; + sf->predict[1][0][1] = vpx_convolve8_avg_horiz; + } else { + // No scaling in x direction. Must always scale in the y direction. + sf->predict[0][0][0] = vpx_scaled_vert; + sf->predict[0][0][1] = vpx_scaled_avg_vert; + sf->predict[0][1][0] = vpx_scaled_vert; + sf->predict[0][1][1] = vpx_scaled_avg_vert; + sf->predict[1][0][0] = vpx_scaled_2d; + sf->predict[1][0][1] = vpx_scaled_avg_2d; + } + } else { + if (sf->y_step_q4 == 16) { + // No scaling in the y direction. Must always scale in the x direction. + sf->predict[0][0][0] = vpx_scaled_horiz; + sf->predict[0][0][1] = vpx_scaled_avg_horiz; + sf->predict[0][1][0] = vpx_scaled_2d; + sf->predict[0][1][1] = vpx_scaled_avg_2d; + sf->predict[1][0][0] = vpx_scaled_horiz; + sf->predict[1][0][1] = vpx_scaled_avg_horiz; + } else { + // Must always scale in both directions. + sf->predict[0][0][0] = vpx_scaled_2d; + sf->predict[0][0][1] = vpx_scaled_avg_2d; + sf->predict[0][1][0] = vpx_scaled_2d; + sf->predict[0][1][1] = vpx_scaled_avg_2d; + sf->predict[1][0][0] = vpx_scaled_2d; + sf->predict[1][0][1] = vpx_scaled_avg_2d; + } + } + + // 2D subpel motion always gets filtered in both directions + + if ((sf->x_step_q4 != 16) || (sf->y_step_q4 != 16)) { + sf->predict[1][1][0] = vpx_scaled_2d; + sf->predict[1][1][1] = vpx_scaled_avg_2d; + } else { + sf->predict[1][1][0] = vpx_convolve8; + sf->predict[1][1][1] = vpx_convolve8_avg; + } + +#if CONFIG_VP9_HIGHBITDEPTH + if (use_highbd) { + if (sf->x_step_q4 == 16) { + if (sf->y_step_q4 == 16) { + // No scaling in either direction. + sf->highbd_predict[0][0][0] = vpx_highbd_convolve_copy; + sf->highbd_predict[0][0][1] = vpx_highbd_convolve_avg; + sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert; + sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert; + sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz; + sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz; + } else { + // No scaling in x direction. Must always scale in the y direction. + sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_vert; + sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_vert; + sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert; + sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert; + sf->highbd_predict[1][0][0] = vpx_highbd_convolve8; + sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg; + } + } else { + if (sf->y_step_q4 == 16) { + // No scaling in the y direction. Must always scale in the x direction. + sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_horiz; + sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_horiz; + sf->highbd_predict[0][1][0] = vpx_highbd_convolve8; + sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg; + sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz; + sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz; + } else { + // Must always scale in both directions. + sf->highbd_predict[0][0][0] = vpx_highbd_convolve8; + sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg; + sf->highbd_predict[0][1][0] = vpx_highbd_convolve8; + sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg; + sf->highbd_predict[1][0][0] = vpx_highbd_convolve8; + sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg; + } + } + // 2D subpel motion always gets filtered in both directions. + sf->highbd_predict[1][1][0] = vpx_highbd_convolve8; + sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg; + } +#endif +} diff --git a/thirdparty/libvpx/vp9/common/vp9_scale.h b/thirdparty/libvpx/vp9/common/vp9_scale.h new file mode 100644 index 0000000000..5e91041079 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_scale.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_SCALE_H_ +#define VP9_COMMON_VP9_SCALE_H_ + +#include "vp9/common/vp9_mv.h" +#include "vpx_dsp/vpx_convolve.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define REF_SCALE_SHIFT 14 +#define REF_NO_SCALE (1 << REF_SCALE_SHIFT) +#define REF_INVALID_SCALE -1 + +struct scale_factors { + int x_scale_fp; // horizontal fixed point scale factor + int y_scale_fp; // vertical fixed point scale factor + int x_step_q4; + int y_step_q4; + + int (*scale_value_x)(int val, const struct scale_factors *sf); + int (*scale_value_y)(int val, const struct scale_factors *sf); + + convolve_fn_t predict[2][2][2]; // horiz, vert, avg +#if CONFIG_VP9_HIGHBITDEPTH + highbd_convolve_fn_t highbd_predict[2][2][2]; // horiz, vert, avg +#endif +}; + +MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf); + +#if CONFIG_VP9_HIGHBITDEPTH +void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, + int other_w, int other_h, + int this_w, int this_h, + int use_high); +#else +void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, + int other_w, int other_h, + int this_w, int this_h); +#endif + +static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) { + return sf->x_scale_fp != REF_INVALID_SCALE && + sf->y_scale_fp != REF_INVALID_SCALE; +} + +static INLINE int vp9_is_scaled(const struct scale_factors *sf) { + return vp9_is_valid_scale(sf) && + (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE); +} + +static INLINE int valid_ref_frame_size(int ref_width, int ref_height, + int this_width, int this_height) { + return 2 * this_width >= ref_width && + 2 * this_height >= ref_height && + this_width <= 16 * ref_width && + this_height <= 16 * ref_height; +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_SCALE_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_scan.c b/thirdparty/libvpx/vp9/common/vp9_scan.c new file mode 100644 index 0000000000..8b8b09f4a3 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_scan.c @@ -0,0 +1,725 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> + +#include "vp9/common/vp9_scan.h" + +DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = { + 0, 4, 1, 5, + 8, 2, 12, 9, + 3, 6, 13, 10, + 7, 14, 11, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4[16]) = { + 0, 4, 8, 1, + 12, 5, 9, 2, + 13, 6, 10, 3, + 7, 14, 11, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4[16]) = { + 0, 1, 4, 2, + 5, 3, 6, 8, + 9, 7, 12, 10, + 13, 11, 14, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = { + 0, 8, 1, 16, 9, 2, 17, 24, + 10, 3, 18, 25, 32, 11, 4, 26, + 33, 19, 40, 12, 34, 27, 5, 41, + 20, 48, 13, 35, 42, 28, 21, 6, + 49, 56, 36, 43, 29, 7, 14, 50, + 57, 44, 22, 37, 15, 51, 58, 30, + 45, 23, 52, 59, 38, 31, 60, 53, + 46, 39, 61, 54, 47, 62, 55, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8[64]) = { + 0, 8, 16, 1, 24, 9, 32, 17, + 2, 40, 25, 10, 33, 18, 48, 3, + 26, 41, 11, 56, 19, 34, 4, 49, + 27, 42, 12, 35, 20, 57, 50, 28, + 5, 43, 13, 36, 58, 51, 21, 44, + 6, 29, 59, 37, 14, 52, 22, 7, + 45, 60, 30, 15, 38, 53, 23, 46, + 31, 61, 39, 54, 47, 62, 55, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8[64]) = { + 0, 1, 2, 8, 9, 3, 16, 10, + 4, 17, 11, 24, 5, 18, 25, 12, + 19, 26, 32, 6, 13, 20, 33, 27, + 7, 34, 40, 21, 28, 41, 14, 35, + 48, 42, 29, 36, 49, 22, 43, 15, + 56, 37, 50, 44, 30, 57, 23, 51, + 58, 45, 38, 52, 31, 59, 53, 46, + 60, 39, 61, 47, 54, 55, 62, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = { + 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80, + 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52, + 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69, + 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146, + 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25, + 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119, + 26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194, + 180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59, + 12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13, + 226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, + 242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, + 77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, + 230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, + 63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, + 219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, + 251, + 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, + 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16[256]) = { + 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81, + 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4, + 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21, + 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85, + 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179, + 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24, + 87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227, + 88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167, + 213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229, + 74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, + 200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, + 60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, + 233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, + 62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, + 126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, + 236, + 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, + 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16[256]) = { + 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20, + 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52, + 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69, + 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100, + 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102, + 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160, + 89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176, + 75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136, + 165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166, + 167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, + 197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, + 124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, + 156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, + 157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, + 158, + 188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, + 175, + 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, + 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = { + 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, + 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, + 68, 131, 37, 100, + 225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, + 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, + 102, 352, 8, 197, + 71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, + 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, + 41, 417, 199, 136, + 262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, + 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, + 295, 420, 106, 451, + 481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, + 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, + 453, 139, 44, 234, + 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108, + 546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577, + 486, 77, 204, 362, + 608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, + 610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, + 111, 238, 48, 143, + 80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51, + 83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424, + 393, 300, 269, 176, 145, + 52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301, + 270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581, + 550, 519, 488, 457, 426, 395, + 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737, + 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241, + 210, 179, 117, 86, 55, 738, 707, + 614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491, + 367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676, + 645, 552, 521, 428, 397, 304, + 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553, + 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26, + 864, 833, 802, 771, 740, 709, + 678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306, + 275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741, + 710, 679, 617, 586, 555, 493, + 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, + 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, + 743, 619, 495, 371, 247, 123, + 896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680, + 649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929, + 898, 836, 805, 774, 712, 681, + 650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, + 92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, + 651, 620, 589, 558, 527, + 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, + 93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590, + 559, 497, 466, 435, 373, + 342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, + 622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, + 499, 375, 251, 127, + 900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560, + 529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716, + 685, 654, 592, 561, + 530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, + 872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, + 438, 407, 376, 345, + 314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718, + 687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998, + 967, 874, 843, 750, + 719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, + 379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, + 564, 533, 440, 409, + 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534, + 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783, + 752, 721, 690, 659, + 628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970, + 939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, + 350, 319, 1002, 971, + 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631, + 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568, + 537, 444, 413, 972, + 941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414, + 1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601, + 570, 539, 508, 477, + 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571, + 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479, + 1007, 883, 759, 635, 511, + 912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945, + 914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915, + 884, 853, 822, 791, + 760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, + 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607, + 1011, 887, 763, 639, + 916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825, + 794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733, + 702, 671, 1013, 982, + 951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, + 891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, + 1016, 985, 954, 923, + 892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863, + 1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021, + 990, 959, 1022, 991, 1023, +}; + +// Neighborhood 2-tuples for various scans and blocksizes, +// in {top, left} order for each position in corresponding scan order. +DECLARE_ALIGNED(16, static const int16_t, + default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9, + 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 4, 4, 0, 0, 8, 8, 1, 1, 5, 5, 1, 1, 9, 9, 2, 2, 6, 6, 2, 2, 3, + 3, 10, 10, 7, 7, 11, 11, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 0, 0, 1, 1, 4, 4, 2, 2, 5, 5, 4, 4, 8, 8, 6, 6, 8, 8, 9, 9, 12, + 12, 10, 10, 13, 13, 14, 14, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 8, 8, 0, 0, 16, 16, 1, 1, 24, 24, 9, 9, 1, 1, 32, 32, 17, 17, 2, + 2, 25, 25, 10, 10, 40, 40, 2, 2, 18, 18, 33, 33, 3, 3, 48, 48, 11, 11, 26, + 26, 3, 3, 41, 41, 19, 19, 34, 34, 4, 4, 27, 27, 12, 12, 49, 49, 42, 42, 20, + 20, 4, 4, 35, 35, 5, 5, 28, 28, 50, 50, 43, 43, 13, 13, 36, 36, 5, 5, 21, 21, + 51, 51, 29, 29, 6, 6, 44, 44, 14, 14, 6, 6, 37, 37, 52, 52, 22, 22, 7, 7, 30, + 30, 45, 45, 15, 15, 38, 38, 23, 23, 53, 53, 31, 31, 46, 46, 39, 39, 54, 54, + 47, 47, 55, 55, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 1, 1, 0, 0, 8, 8, 2, 2, 8, 8, 9, 9, 3, 3, 16, 16, 10, 10, 16, 16, + 4, 4, 17, 17, 24, 24, 11, 11, 18, 18, 25, 25, 24, 24, 5, 5, 12, 12, 19, 19, + 32, 32, 26, 26, 6, 6, 33, 33, 32, 32, 20, 20, 27, 27, 40, 40, 13, 13, 34, 34, + 40, 40, 41, 41, 28, 28, 35, 35, 48, 48, 21, 21, 42, 42, 14, 14, 48, 48, 36, + 36, 49, 49, 43, 43, 29, 29, 56, 56, 22, 22, 50, 50, 57, 57, 44, 44, 37, 37, + 51, 51, 30, 30, 58, 58, 52, 52, 45, 45, 59, 59, 38, 38, 60, 60, 46, 46, 53, + 53, 54, 54, 61, 61, 62, 62, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 0, 0, 8, 8, 1, 8, 1, 1, 9, 16, 16, 16, 2, 9, 2, 2, 10, 17, 17, + 24, 24, 24, 3, 10, 3, 3, 18, 25, 25, 32, 11, 18, 32, 32, 4, 11, 26, 33, 19, + 26, 4, 4, 33, 40, 12, 19, 40, 40, 5, 12, 27, 34, 34, 41, 20, 27, 13, 20, 5, + 5, 41, 48, 48, 48, 28, 35, 35, 42, 21, 28, 6, 6, 6, 13, 42, 49, 49, 56, 36, + 43, 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22, 29, 37, 44, 15, 22, 44, 51, + 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45, 31, 38, 53, 60, 46, 53, 39, + 46, 54, 61, 47, 54, 55, 62, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 16, 16, 32, 32, 0, 0, 48, 48, 1, 1, 64, 64, + 17, 17, 80, 80, 33, 33, 1, 1, 49, 49, 96, 96, 2, 2, 65, 65, + 18, 18, 112, 112, 34, 34, 81, 81, 2, 2, 50, 50, 128, 128, 3, 3, + 97, 97, 19, 19, 66, 66, 144, 144, 82, 82, 35, 35, 113, 113, 3, 3, + 51, 51, 160, 160, 4, 4, 98, 98, 129, 129, 67, 67, 20, 20, 83, 83, + 114, 114, 36, 36, 176, 176, 4, 4, 145, 145, 52, 52, 99, 99, 5, 5, + 130, 130, 68, 68, 192, 192, 161, 161, 21, 21, 115, 115, 84, 84, 37, 37, + 146, 146, 208, 208, 53, 53, 5, 5, 100, 100, 177, 177, 131, 131, 69, 69, + 6, 6, 224, 224, 116, 116, 22, 22, 162, 162, 85, 85, 147, 147, 38, 38, + 193, 193, 101, 101, 54, 54, 6, 6, 132, 132, 178, 178, 70, 70, 163, 163, + 209, 209, 7, 7, 117, 117, 23, 23, 148, 148, 7, 7, 86, 86, 194, 194, + 225, 225, 39, 39, 179, 179, 102, 102, 133, 133, 55, 55, 164, 164, 8, 8, + 71, 71, 210, 210, 118, 118, 149, 149, 195, 195, 24, 24, 87, 87, 40, 40, + 56, 56, 134, 134, 180, 180, 226, 226, 103, 103, 8, 8, 165, 165, 211, 211, + 72, 72, 150, 150, 9, 9, 119, 119, 25, 25, 88, 88, 196, 196, 41, 41, + 135, 135, 181, 181, 104, 104, 57, 57, 227, 227, 166, 166, 120, 120, 151, 151, + 197, 197, 73, 73, 9, 9, 212, 212, 89, 89, 136, 136, 182, 182, 10, 10, + 26, 26, 105, 105, 167, 167, 228, 228, 152, 152, 42, 42, 121, 121, 213, 213, + 58, 58, 198, 198, 74, 74, 137, 137, 183, 183, 168, 168, 10, 10, 90, 90, + 229, 229, 11, 11, 106, 106, 214, 214, 153, 153, 27, 27, 199, 199, 43, 43, + 184, 184, 122, 122, 169, 169, 230, 230, 59, 59, 11, 11, 75, 75, 138, 138, + 200, 200, 215, 215, 91, 91, 12, 12, 28, 28, 185, 185, 107, 107, 154, 154, + 44, 44, 231, 231, 216, 216, 60, 60, 123, 123, 12, 12, 76, 76, 201, 201, + 170, 170, 232, 232, 139, 139, 92, 92, 13, 13, 108, 108, 29, 29, 186, 186, + 217, 217, 155, 155, 45, 45, 13, 13, 61, 61, 124, 124, 14, 14, 233, 233, + 77, 77, 14, 14, 171, 171, 140, 140, 202, 202, 30, 30, 93, 93, 109, 109, + 46, 46, 156, 156, 62, 62, 187, 187, 15, 15, 125, 125, 218, 218, 78, 78, + 31, 31, 172, 172, 47, 47, 141, 141, 94, 94, 234, 234, 203, 203, 63, 63, + 110, 110, 188, 188, 157, 157, 126, 126, 79, 79, 173, 173, 95, 95, 219, 219, + 142, 142, 204, 204, 235, 235, 111, 111, 158, 158, 127, 127, 189, 189, 220, + 220, 143, 143, 174, 174, 205, 205, 236, 236, 159, 159, 190, 190, 221, 221, + 175, 175, 237, 237, 206, 206, 222, 222, 191, 191, 238, 238, 207, 207, 223, + 223, 239, 239, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 16, 16, 3, 3, 17, 17, + 16, 16, 4, 4, 32, 32, 18, 18, 5, 5, 33, 33, 32, 32, 19, 19, + 48, 48, 6, 6, 34, 34, 20, 20, 49, 49, 48, 48, 7, 7, 35, 35, + 64, 64, 21, 21, 50, 50, 36, 36, 64, 64, 8, 8, 65, 65, 51, 51, + 22, 22, 37, 37, 80, 80, 66, 66, 9, 9, 52, 52, 23, 23, 81, 81, + 67, 67, 80, 80, 38, 38, 10, 10, 53, 53, 82, 82, 96, 96, 68, 68, + 24, 24, 97, 97, 83, 83, 39, 39, 96, 96, 54, 54, 11, 11, 69, 69, + 98, 98, 112, 112, 84, 84, 25, 25, 40, 40, 55, 55, 113, 113, 99, 99, + 12, 12, 70, 70, 112, 112, 85, 85, 26, 26, 114, 114, 100, 100, 128, 128, + 41, 41, 56, 56, 71, 71, 115, 115, 13, 13, 86, 86, 129, 129, 101, 101, + 128, 128, 72, 72, 130, 130, 116, 116, 27, 27, 57, 57, 14, 14, 87, 87, + 42, 42, 144, 144, 102, 102, 131, 131, 145, 145, 117, 117, 73, 73, 144, 144, + 88, 88, 132, 132, 103, 103, 28, 28, 58, 58, 146, 146, 118, 118, 43, 43, + 160, 160, 147, 147, 89, 89, 104, 104, 133, 133, 161, 161, 119, 119, 160, 160, + 74, 74, 134, 134, 148, 148, 29, 29, 59, 59, 162, 162, 176, 176, 44, 44, + 120, 120, 90, 90, 105, 105, 163, 163, 177, 177, 149, 149, 176, 176, 135, 135, + 164, 164, 178, 178, 30, 30, 150, 150, 192, 192, 75, 75, 121, 121, 60, 60, + 136, 136, 193, 193, 106, 106, 151, 151, 179, 179, 192, 192, 45, 45, 165, 165, + 166, 166, 194, 194, 91, 91, 180, 180, 137, 137, 208, 208, 122, 122, 152, 152, + 208, 208, 195, 195, 76, 76, 167, 167, 209, 209, 181, 181, 224, 224, 107, 107, + 196, 196, 61, 61, 153, 153, 224, 224, 182, 182, 168, 168, 210, 210, 46, 46, + 138, 138, 92, 92, 183, 183, 225, 225, 211, 211, 240, 240, 197, 197, 169, 169, + 123, 123, 154, 154, 198, 198, 77, 77, 212, 212, 184, 184, 108, 108, 226, 226, + 199, 199, 62, 62, 227, 227, 241, 241, 139, 139, 213, 213, 170, 170, 185, 185, + 155, 155, 228, 228, 242, 242, 124, 124, 93, 93, 200, 200, 243, 243, 214, 214, + 215, 215, 229, 229, 140, 140, 186, 186, 201, 201, 78, 78, 171, 171, 109, 109, + 156, 156, 244, 244, 216, 216, 230, 230, 94, 94, 245, 245, 231, 231, 125, 125, + 202, 202, 246, 246, 232, 232, 172, 172, 217, 217, 141, 141, 110, 110, 157, + 157, 187, 187, 247, 247, 126, 126, 233, 233, 218, 218, 248, 248, 188, 188, + 203, 203, 142, 142, 173, 173, 158, 158, 249, 249, 234, 234, 204, 204, 219, + 219, 174, 174, 189, 189, 250, 250, 220, 220, 190, 190, 205, 205, 235, 235, + 206, 206, 236, 236, 251, 251, 221, 221, 252, 252, 222, 222, 237, 237, 238, + 238, 253, 253, 254, 254, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 0, 0, 16, 16, 1, 16, 1, 1, 32, 32, 17, 32, + 2, 17, 2, 2, 48, 48, 18, 33, 33, 48, 3, 18, 49, 64, 64, 64, + 34, 49, 3, 3, 19, 34, 50, 65, 4, 19, 65, 80, 80, 80, 35, 50, + 4, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 96, 5, 20, 36, 51, + 82, 97, 21, 36, 67, 82, 97, 112, 5, 5, 52, 67, 112, 112, 37, 52, + 6, 21, 83, 98, 98, 113, 68, 83, 6, 6, 113, 128, 22, 37, 53, 68, + 84, 99, 99, 114, 128, 128, 114, 129, 69, 84, 38, 53, 7, 22, 7, 7, + 129, 144, 23, 38, 54, 69, 100, 115, 85, 100, 115, 130, 144, 144, 130, 145, + 39, 54, 70, 85, 8, 23, 55, 70, 116, 131, 101, 116, 145, 160, 24, 39, + 8, 8, 86, 101, 131, 146, 160, 160, 146, 161, 71, 86, 40, 55, 9, 24, + 117, 132, 102, 117, 161, 176, 132, 147, 56, 71, 87, 102, 25, 40, 147, 162, + 9, 9, 176, 176, 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, + 10, 25, 148, 163, 57, 72, 88, 103, 177, 192, 26, 41, 163, 178, 192, 192, + 10, 10, 119, 134, 73, 88, 149, 164, 104, 119, 134, 149, 42, 57, 178, 193, + 164, 179, 11, 26, 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42, + 74, 89, 208, 208, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43, 58, + 11, 11, 136, 151, 90, 105, 151, 166, 180, 195, 59, 74, 121, 136, 209, 224, + 195, 210, 224, 224, 166, 181, 106, 121, 75, 90, 12, 27, 181, 196, 12, 12, + 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211, 122, 137, 91, 106, + 225, 240, 44, 59, 13, 28, 107, 122, 182, 197, 168, 183, 211, 226, 153, 168, + 226, 241, 60, 75, 197, 212, 138, 153, 29, 44, 76, 91, 13, 13, 183, 198, + 123, 138, 45, 60, 212, 227, 198, 213, 154, 169, 169, 184, 227, 242, 92, 107, + 61, 76, 139, 154, 14, 29, 14, 14, 184, 199, 213, 228, 108, 123, 199, 214, + 228, 243, 77, 92, 30, 45, 170, 185, 155, 170, 185, 200, 93, 108, 124, 139, + 214, 229, 46, 61, 200, 215, 229, 244, 15, 30, 109, 124, 62, 77, 140, 155, + 215, 230, 31, 46, 171, 186, 186, 201, 201, 216, 78, 93, 230, 245, 125, 140, + 47, 62, 216, 231, 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, + 187, 202, 110, 125, 217, 232, 172, 187, 232, 247, 79, 94, 157, 172, 126, 141, + 203, 218, 95, 110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, + 234, 249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, + 250, 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, + 236, 251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, + 238, 239, 254, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, + default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { + 0, 0, 0, 0, 0, 0, 32, 32, 1, 32, 1, 1, 64, 64, 33, 64, + 2, 33, 96, 96, 2, 2, 65, 96, 34, 65, 128, 128, 97, 128, 3, 34, + 66, 97, 3, 3, 35, 66, 98, 129, 129, 160, 160, 160, 4, 35, 67, 98, + 192, 192, 4, 4, 130, 161, 161, 192, 36, 67, 99, 130, 5, 36, 68, 99, + 193, 224, 162, 193, 224, 224, 131, 162, 37, 68, 100, 131, 5, 5, 194, 225, + 225, 256, 256, 256, 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 6, 6, + 195, 226, 257, 288, 101, 132, 288, 288, 38, 69, 164, 195, 133, 164, 258, 289, + 227, 258, 196, 227, 7, 38, 289, 320, 70, 101, 320, 320, 7, 7, 165, 196, + 39, 70, 102, 133, 290, 321, 259, 290, 228, 259, 321, 352, 352, 352, 197, 228, + 134, 165, 71, 102, 8, 39, 322, 353, 291, 322, 260, 291, 103, 134, 353, 384, + 166, 197, 229, 260, 40, 71, 8, 8, 384, 384, 135, 166, 354, 385, 323, 354, + 198, 229, 292, 323, 72, 103, 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, + 230, 261, 355, 386, 416, 416, 293, 324, 324, 355, 9, 9, 41, 72, 386, 417, + 199, 230, 136, 167, 417, 448, 262, 293, 356, 387, 73, 104, 387, 418, 231, 262, + 10, 41, 168, 199, 325, 356, 418, 449, 105, 136, 448, 448, 42, 73, 294, 325, + 200, 231, 10, 10, 357, 388, 137, 168, 263, 294, 388, 419, 74, 105, 419, 450, + 449, 480, 326, 357, 232, 263, 295, 326, 169, 200, 11, 42, 106, 137, 480, 480, + 450, 481, 358, 389, 264, 295, 201, 232, 138, 169, 389, 420, 43, 74, 420, 451, + 327, 358, 11, 11, 481, 512, 233, 264, 451, 482, 296, 327, 75, 106, 170, 201, + 482, 513, 512, 512, 390, 421, 359, 390, 421, 452, 107, 138, 12, 43, 202, 233, + 452, 483, 265, 296, 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265, + 297, 328, 422, 453, 12, 12, 391, 422, 171, 202, 76, 107, 514, 545, 453, 484, + 544, 544, 266, 297, 203, 234, 108, 139, 329, 360, 298, 329, 140, 171, 515, + 546, 13, 44, 423, 454, 235, 266, 545, 576, 454, 485, 45, 76, 172, 203, 330, + 361, 576, 576, 13, 13, 267, 298, 546, 577, 77, 108, 204, 235, 455, 486, 577, + 608, 299, 330, 109, 140, 547, 578, 14, 45, 14, 14, 141, 172, 578, 609, 331, + 362, 46, 77, 173, 204, 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, + 142, 173, 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, 16, 47, 111, 142, + 48, 79, 143, 174, 80, 111, 175, 206, 17, 48, 17, 17, 207, 238, 49, 80, + 81, 112, 18, 18, 18, 49, 50, 81, 82, 113, 19, 50, 51, 82, 83, 114, 608, 608, + 484, 515, 360, 391, 236, 267, 112, 143, 19, 19, 640, 640, 609, 640, 516, 547, + 485, 516, 392, 423, 361, 392, 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, + 20, 20, 672, 672, 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, 424, 455, + 393, 424, 362, 393, 300, 331, 269, 300, 238, 269, 176, 207, 145, 176, 114, + 145, 52, 83, 21, 52, 21, 21, 704, 704, 673, 704, 642, 673, 611, 642, 580, + 611, 549, 580, 518, 549, 487, 518, 456, 487, 425, 456, 394, 425, 363, 394, + 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208, 146, 177, 115, + 146, 84, 115, 53, 84, 22, 53, 22, 22, 705, 736, 674, 705, 643, 674, 581, 612, + 550, 581, 519, 550, 457, 488, 426, 457, 395, 426, 333, 364, 302, 333, 271, + 302, 209, 240, 178, 209, 147, 178, 85, 116, 54, 85, 23, 54, 706, 737, 675, + 706, 582, 613, 551, 582, 458, 489, 427, 458, 334, 365, 303, 334, 210, 241, + 179, 210, 86, 117, 55, 86, 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, + 87, 118, 736, 736, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, 23, 23, + 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, 489, 520, 396, 427, 365, + 396, 272, 303, 241, 272, 148, 179, 117, 148, 24, 55, 24, 24, 800, 800, 769, + 800, 738, 769, 676, 707, 645, 676, 614, 645, 552, 583, 521, 552, 490, 521, + 428, 459, 397, 428, 366, 397, 304, 335, 273, 304, 242, 273, 180, 211, 149, + 180, 118, 149, 56, 87, 25, 56, 25, 25, 832, 832, 801, 832, 770, 801, 739, + 770, 708, 739, 677, 708, 646, 677, 615, 646, 584, 615, 553, 584, 522, 553, + 491, 522, 460, 491, 429, 460, 398, 429, 367, 398, 336, 367, 305, 336, 274, + 305, 243, 274, 212, 243, 181, 212, 150, 181, 119, 150, 88, 119, 57, 88, 26, + 57, 26, 26, 833, 864, 802, 833, 771, 802, 709, 740, 678, 709, 647, 678, 585, + 616, 554, 585, 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337, + 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, 27, 58, 834, 865, + 803, 834, 710, 741, 679, 710, 586, 617, 555, 586, 462, 493, 431, 462, 338, + 369, 307, 338, 214, 245, 183, 214, 90, 121, 59, 90, 835, 866, 711, 742, 587, + 618, 463, 494, 339, 370, 215, 246, 91, 122, 864, 864, 740, 771, 616, 647, + 492, 523, 368, 399, 244, 275, 120, 151, 27, 27, 896, 896, 865, 896, 772, 803, + 741, 772, 648, 679, 617, 648, 524, 555, 493, 524, 400, 431, 369, 400, 276, + 307, 245, 276, 152, 183, 121, 152, 28, 59, 28, 28, 928, 928, 897, 928, 866, + 897, 804, 835, 773, 804, 742, 773, 680, 711, 649, 680, 618, 649, 556, 587, + 525, 556, 494, 525, 432, 463, 401, 432, 370, 401, 308, 339, 277, 308, 246, + 277, 184, 215, 153, 184, 122, 153, 60, 91, 29, 60, 29, 29, 960, 960, 929, + 960, 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774, 712, 743, + 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, 526, 557, 495, 526, 464, + 495, 433, 464, 402, 433, 371, 402, 340, 371, 309, 340, 278, 309, 247, 278, + 216, 247, 185, 216, 154, 185, 123, 154, 92, 123, 61, 92, 30, 61, 30, 30, + 961, 992, 930, 961, 899, 930, 837, 868, 806, 837, 775, 806, 713, 744, 682, + 713, 651, 682, 589, 620, 558, 589, 527, 558, 465, 496, 434, 465, 403, 434, + 341, 372, 310, 341, 279, 310, 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, + 31, 62, 962, 993, 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, 590, 621, + 559, 590, 466, 497, 435, 466, 342, 373, 311, 342, 218, 249, 187, 218, 94, + 125, 63, 94, 963, 994, 839, 870, 715, 746, 591, 622, 467, 498, 343, 374, 219, + 250, 95, 126, 868, 899, 744, 775, 620, 651, 496, 527, 372, 403, 248, 279, + 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683, 621, 652, 528, + 559, 497, 528, 404, 435, 373, 404, 280, 311, 249, 280, 156, 187, 125, 156, + 932, 963, 901, 932, 870, 901, 808, 839, 777, 808, 746, 777, 684, 715, 653, + 684, 622, 653, 560, 591, 529, 560, 498, 529, 436, 467, 405, 436, 374, 405, + 312, 343, 281, 312, 250, 281, 188, 219, 157, 188, 126, 157, 964, 995, 933, + 964, 902, 933, 871, 902, 840, 871, 809, 840, 778, 809, 747, 778, 716, 747, + 685, 716, 654, 685, 623, 654, 592, 623, 561, 592, 530, 561, 499, 530, 468, + 499, 437, 468, 406, 437, 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, + 220, 251, 189, 220, 158, 189, 127, 158, 965, 996, 934, 965, 903, 934, 841, + 872, 810, 841, 779, 810, 717, 748, 686, 717, 655, 686, 593, 624, 562, 593, + 531, 562, 469, 500, 438, 469, 407, 438, 345, 376, 314, 345, 283, 314, 221, + 252, 190, 221, 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749, + 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, 315, 346, 222, + 253, 191, 222, 967, 998, 843, 874, 719, 750, 595, 626, 471, 502, 347, 378, + 223, 254, 872, 903, 748, 779, 624, 655, 500, 531, 376, 407, 252, 283, 904, + 935, 873, 904, 780, 811, 749, 780, 656, 687, 625, 656, 532, 563, 501, 532, + 408, 439, 377, 408, 284, 315, 253, 284, 936, 967, 905, 936, 874, 905, 812, + 843, 781, 812, 750, 781, 688, 719, 657, 688, 626, 657, 564, 595, 533, 564, + 502, 533, 440, 471, 409, 440, 378, 409, 316, 347, 285, 316, 254, 285, 968, + 999, 937, 968, 906, 937, 875, 906, 844, 875, 813, 844, 782, 813, 751, 782, + 720, 751, 689, 720, 658, 689, 627, 658, 596, 627, 565, 596, 534, 565, 503, + 534, 472, 503, 441, 472, 410, 441, 379, 410, 348, 379, 317, 348, 286, 317, + 255, 286, 969, 1000, 938, 969, 907, 938, 845, 876, 814, 845, 783, 814, 721, + 752, 690, 721, 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, + 411, 442, 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, 846, 877, 815, + 846, 722, 753, 691, 722, 598, 629, 567, 598, 474, 505, 443, 474, 350, 381, + 319, 350, 971, 1002, 847, 878, 723, 754, 599, 630, 475, 506, 351, 382, 876, + 907, 752, 783, 628, 659, 504, 535, 380, 411, 908, 939, 877, 908, 784, 815, + 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443, 381, 412, 940, + 971, 909, 940, 878, 909, 816, 847, 785, 816, 754, 785, 692, 723, 661, 692, + 630, 661, 568, 599, 537, 568, 506, 537, 444, 475, 413, 444, 382, 413, 972, + 1003, 941, 972, 910, 941, 879, 910, 848, 879, 817, 848, 786, 817, 755, 786, + 724, 755, 693, 724, 662, 693, 631, 662, 600, 631, 569, 600, 538, 569, 507, + 538, 476, 507, 445, 476, 414, 445, 383, 414, 973, 1004, 942, 973, 911, 942, + 849, 880, 818, 849, 787, 818, 725, 756, 694, 725, 663, 694, 601, 632, 570, + 601, 539, 570, 477, 508, 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, + 819, 850, 726, 757, 695, 726, 602, 633, 571, 602, 478, 509, 447, 478, 975, + 1006, 851, 882, 727, 758, 603, 634, 479, 510, 880, 911, 756, 787, 632, 663, + 508, 539, 912, 943, 881, 912, 788, 819, 757, 788, 664, 695, 633, 664, 540, + 571, 509, 540, 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789, + 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, 976, 1007, 945, + 976, 914, 945, 883, 914, 852, 883, 821, 852, 790, 821, 759, 790, 728, 759, + 697, 728, 666, 697, 635, 666, 604, 635, 573, 604, 542, 573, 511, 542, 977, + 1008, 946, 977, 915, 946, 853, 884, 822, 853, 791, 822, 729, 760, 698, 729, + 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009, 947, 978, 854, 885, 823, + 854, 730, 761, 699, 730, 606, 637, 575, 606, 979, 1010, 855, 886, 731, 762, + 607, 638, 884, 915, 760, 791, 636, 667, 916, 947, 885, 916, 792, 823, 761, + 792, 668, 699, 637, 668, 948, 979, 917, 948, 886, 917, 824, 855, 793, 824, + 762, 793, 700, 731, 669, 700, 638, 669, 980, 1011, 949, 980, 918, 949, 887, + 918, 856, 887, 825, 856, 794, 825, 763, 794, 732, 763, 701, 732, 670, 701, + 639, 670, 981, 1012, 950, 981, 919, 950, 857, 888, 826, 857, 795, 826, 733, + 764, 702, 733, 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, + 703, 734, 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, 920, 951, 889, + 920, 796, 827, 765, 796, 952, 983, 921, 952, 890, 921, 828, 859, 797, 828, + 766, 797, 984, 1015, 953, 984, 922, 953, 891, 922, 860, 891, 829, 860, 798, + 829, 767, 798, 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861, 799, 830, + 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894, 892, 923, 924, + 955, 893, 924, 956, 987, 925, 956, 894, 925, 988, 1019, 957, 988, 926, 957, + 895, 926, 989, 1020, 958, 989, 927, 958, 990, 1021, 959, 990, 991, 1022, 0, 0, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_4x4[16]) = { + 0, 2, 5, 8, 1, 3, 9, 12, 4, 7, 11, 14, 6, 10, 13, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_4x4[16]) = { + 0, 3, 7, 11, 1, 5, 9, 12, 2, 6, 10, 14, 4, 8, 13, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_4x4[16]) = { + 0, 1, 3, 5, 2, 4, 6, 9, 7, 8, 11, 13, 10, 12, 14, 15, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_8x8[64]) = { + 0, 3, 8, 15, 22, 32, 40, 47, 1, 5, 11, 18, 26, 34, 44, 51, + 2, 7, 13, 20, 28, 38, 46, 54, 4, 10, 16, 24, 31, 41, 50, 56, + 6, 12, 21, 27, 35, 43, 52, 58, 9, 17, 25, 33, 39, 48, 55, 60, + 14, 23, 30, 37, 45, 53, 59, 62, 19, 29, 36, 42, 49, 57, 61, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_8x8[64]) = { + 0, 1, 2, 5, 8, 12, 19, 24, 3, 4, 7, 10, 15, 20, 30, 39, + 6, 9, 13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52, + 18, 22, 25, 31, 35, 41, 50, 57, 26, 29, 33, 38, 43, 49, 55, 59, + 32, 36, 42, 47, 51, 54, 60, 61, 40, 45, 48, 53, 56, 58, 62, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_8x8[64]) = { + 0, 2, 5, 9, 14, 22, 31, 37, 1, 4, 8, 13, 19, 26, 38, 44, + 3, 6, 10, 17, 24, 30, 42, 49, 7, 11, 15, 21, 29, 36, 47, 53, + 12, 16, 20, 27, 34, 43, 52, 57, 18, 23, 28, 35, 41, 48, 56, 60, + 25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_16x16[256]) = { + 0, 4, 11, 20, 31, 43, 59, 75, 85, 109, 130, 150, 165, 181, 195, 198, + 1, 6, 14, 23, 34, 47, 64, 81, 95, 114, 135, 153, 171, 188, 201, 212, + 2, 8, 16, 25, 38, 52, 67, 83, 101, 116, 136, 157, 172, 190, 205, 216, + 3, 10, 18, 29, 41, 55, 71, 89, 103, 119, 141, 159, 176, 194, 208, 218, + 5, 12, 21, 32, 45, 58, 74, 93, 104, 123, 144, 164, 179, 196, 210, 223, + 7, 15, 26, 37, 49, 63, 78, 96, 112, 129, 146, 166, 182, 200, 215, 228, + 9, 19, 28, 39, 54, 69, 86, 102, 117, 132, 151, 170, 187, 206, 220, 230, + 13, 24, 35, 46, 60, 73, 91, 108, 122, 137, 154, 174, 189, 207, 224, 235, + 17, 30, 40, 53, 66, 82, 98, 115, 126, 142, 161, 180, 197, 213, 227, 237, + 22, 36, 48, 62, 76, 92, 105, 120, 133, 147, 167, 186, 203, 219, 232, 240, + 27, 44, 56, 70, 84, 99, 113, 127, 140, 156, 175, 193, 209, 226, 236, 244, + 33, 51, 68, 79, 94, 110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247, + 42, 61, 77, 90, 106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251, + 50, 72, 87, 100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253, + 57, 80, 97, 111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254, + 65, 88, 107, 124, 139, 152, 163, 177, 185, 199, 221, 234, 243, 248, 252, 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_16x16[256]) = { + 0, 1, 2, 4, 6, 9, 12, 17, 22, 29, 36, 43, 54, 64, 76, 86, + 3, 5, 7, 11, 15, 19, 25, 32, 38, 48, 59, 68, 84, 99, 115, 130, + 8, 10, 13, 18, 23, 27, 33, 42, 51, 60, 72, 88, 103, 119, 142, 167, + 14, 16, 20, 26, 31, 37, 44, 53, 61, 73, 85, 100, 116, 135, 161, 185, + 21, 24, 30, 35, 40, 47, 55, 65, 74, 81, 94, 112, 133, 154, 179, 205, + 28, 34, 39, 45, 50, 58, 67, 77, 87, 96, 106, 121, 146, 169, 196, 212, + 41, 46, 49, 56, 63, 70, 79, 90, 98, 107, 122, 138, 159, 182, 207, 222, + 52, 57, 62, 69, 75, 83, 93, 102, 110, 120, 134, 150, 176, 195, 215, 226, + 66, 71, 78, 82, 91, 97, 108, 113, 127, 136, 148, 168, 188, 202, 221, 232, + 80, 89, 92, 101, 105, 114, 125, 131, 139, 151, 162, 177, 192, 208, 223, 234, + 95, 104, 109, 117, 123, 128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, + 111, 118, 124, 129, 140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, + 243, 126, 132, 137, 145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, + 244, 246, 141, 149, 156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, + 242, 249, 251, 152, 163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, + 245, 247, 252, 253, 158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, + 241, 248, 250, 254, 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_16x16[256]) = { + 0, 2, 5, 9, 17, 24, 36, 44, 55, 72, 88, 104, 128, 143, 166, 179, + 1, 4, 8, 13, 20, 30, 40, 54, 66, 79, 96, 113, 141, 154, 178, 196, + 3, 7, 11, 18, 25, 33, 46, 57, 71, 86, 101, 119, 148, 164, 186, 201, + 6, 12, 16, 23, 31, 39, 53, 64, 78, 92, 110, 127, 153, 169, 193, 208, + 10, 14, 19, 28, 37, 47, 58, 67, 84, 98, 114, 133, 161, 176, 198, 214, + 15, 21, 26, 34, 43, 52, 65, 77, 91, 106, 120, 140, 165, 185, 205, 221, + 22, 27, 32, 41, 48, 60, 73, 85, 99, 116, 130, 151, 175, 190, 211, 225, + 29, 35, 42, 49, 59, 69, 81, 95, 108, 125, 139, 155, 182, 197, 217, 229, + 38, 45, 51, 61, 68, 80, 93, 105, 118, 134, 150, 168, 191, 207, 223, 234, + 50, 56, 63, 74, 83, 94, 109, 117, 129, 147, 163, 177, 199, 213, 228, 238, + 62, 70, 76, 87, 97, 107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, + 75, 82, 90, 102, 112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, + 89, 100, 111, 123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, + 103, 115, 126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, + 252, 121, 135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, + 251, 254, 137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, + 249, 253, 255, +}; + +DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_32x32[1024]) = { + 0, 2, 5, 10, 17, 25, 38, 47, 62, 83, 101, 121, 145, 170, 193, 204, + 210, 219, 229, 233, 245, 257, 275, 299, 342, 356, 377, 405, 455, 471, 495, + 527, 1, 4, 8, 15, 22, 30, 45, 58, 74, 92, 112, 133, 158, 184, 203, 215, 222, + 228, 234, 237, 256, 274, 298, 317, 355, 376, 404, 426, 470, 494, 526, 551, + 3, 7, 12, 18, 28, 36, 52, 64, 82, 102, 118, 142, 164, 189, 208, 217, 224, + 231, 235, 238, 273, 297, 316, 329, 375, 403, 425, 440, 493, 525, 550, 567, + 6, 11, 16, 23, 31, 43, 60, 73, 90, 109, 126, 150, 173, 196, 211, 220, 226, + 232, 236, 239, 296, 315, 328, 335, 402, 424, 439, 447, 524, 549, 566, 575, + 9, 14, 19, 29, 37, 50, 65, 78, 95, 116, 134, 157, 179, 201, 214, 223, 244, + 255, 272, 295, 341, 354, 374, 401, 454, 469, 492, 523, 582, 596, 617, 645, + 13, 20, 26, 35, 44, 54, 72, 85, 105, 123, 140, 163, 182, 205, 216, 225, + 254, 271, 294, 314, 353, 373, 400, 423, 468, 491, 522, 548, 595, 616, 644, + 666, 21, 27, 33, 42, 53, 63, 80, 94, 113, 132, 151, 172, 190, 209, 218, 227, + 270, 293, 313, 327, 372, 399, 422, 438, 490, 521, 547, 565, 615, 643, 665, + 680, 24, 32, 39, 48, 57, 71, 88, 104, 120, 139, 159, 178, 197, 212, 221, 230, + 292, 312, 326, 334, 398, 421, 437, 446, 520, 546, 564, 574, 642, 664, 679, + 687, 34, 40, 46, 56, 68, 81, 96, 111, 130, 147, 167, 186, 243, 253, 269, 291, + 340, 352, 371, 397, 453, 467, 489, 519, 581, 594, 614, 641, 693, 705, 723, + 747, 41, 49, 55, 67, 77, 91, 107, 124, 138, 161, 177, 194, 252, 268, 290, + 311, 351, 370, 396, 420, 466, 488, 518, 545, 593, 613, 640, 663, 704, 722, + 746, 765, 51, 59, 66, 76, 89, 99, 119, 131, 149, 168, 181, 200, 267, 289, + 310, 325, 369, 395, 419, 436, 487, 517, 544, 563, 612, 639, 662, 678, 721, + 745, 764, 777, 61, 69, 75, 87, 100, 114, 129, 144, 162, 180, 191, 207, 288, + 309, 324, 333, 394, 418, 435, 445, 516, 543, 562, 573, 638, 661, 677, 686, + 744, 763, 776, 783, 70, 79, 86, 97, 108, 122, 137, 155, 242, 251, 266, 287, + 339, 350, 368, 393, 452, 465, 486, 515, 580, 592, 611, 637, 692, 703, 720, + 743, 788, 798, 813, 833, 84, 93, 103, 110, 125, 141, 154, 171, 250, 265, 286, + 308, 349, 367, 392, 417, 464, 485, 514, 542, 591, 610, 636, 660, 702, 719, + 742, 762, 797, 812, 832, 848, 98, 106, 115, 127, 143, 156, 169, 185, 264, + 285, 307, 323, 366, 391, 416, 434, 484, 513, 541, 561, 609, 635, 659, 676, + 718, 741, 761, 775, 811, 831, 847, 858, 117, 128, 136, 148, 160, 175, 188, + 198, 284, 306, 322, 332, 390, 415, 433, 444, 512, 540, 560, 572, 634, 658, + 675, 685, 740, 760, 774, 782, 830, 846, 857, 863, 135, 146, 152, 165, 241, + 249, 263, 283, 338, 348, 365, 389, 451, 463, 483, 511, 579, 590, 608, 633, + 691, 701, 717, 739, 787, 796, 810, 829, 867, 875, 887, 903, 153, 166, 174, + 183, 248, 262, 282, 305, 347, 364, 388, 414, 462, 482, 510, 539, 589, 607, + 632, 657, 700, 716, 738, 759, 795, 809, 828, 845, 874, 886, 902, 915, 176, + 187, 195, 202, 261, 281, 304, 321, 363, 387, 413, 432, 481, 509, 538, 559, + 606, 631, 656, 674, 715, 737, 758, 773, 808, 827, 844, 856, 885, 901, 914, + 923, 192, 199, 206, 213, 280, 303, 320, 331, 386, 412, 431, 443, 508, 537, + 558, 571, 630, 655, 673, 684, 736, 757, 772, 781, 826, 843, 855, 862, 900, + 913, 922, 927, 240, 247, 260, 279, 337, 346, 362, 385, 450, 461, 480, 507, + 578, 588, 605, 629, 690, 699, 714, 735, 786, 794, 807, 825, 866, 873, 884, + 899, 930, 936, 945, 957, 246, 259, 278, 302, 345, 361, 384, 411, 460, 479, + 506, 536, 587, 604, 628, 654, 698, 713, 734, 756, 793, 806, 824, 842, 872, + 883, 898, 912, 935, 944, 956, 966, 258, 277, 301, 319, 360, 383, 410, 430, + 478, 505, 535, 557, 603, 627, 653, 672, 712, 733, 755, 771, 805, 823, 841, + 854, 882, 897, 911, 921, 943, 955, 965, 972, 276, 300, 318, 330, 382, 409, + 429, 442, 504, 534, 556, 570, 626, 652, 671, 683, 732, 754, 770, 780, 822, + 840, 853, 861, 896, 910, 920, 926, 954, 964, 971, 975, 336, 344, 359, 381, + 449, 459, 477, 503, 577, 586, 602, 625, 689, 697, 711, 731, 785, 792, 804, + 821, 865, 871, 881, 895, 929, 934, 942, 953, 977, 981, 987, 995, 343, 358, + 380, 408, 458, 476, 502, 533, 585, 601, 624, 651, 696, 710, 730, 753, 791, + 803, 820, 839, 870, 880, 894, 909, 933, 941, 952, 963, 980, 986, 994, 1001, + 357, 379, 407, 428, 475, 501, 532, 555, 600, 623, 650, 670, 709, 729, 752, + 769, 802, 819, 838, 852, 879, 893, 908, 919, 940, 951, 962, 970, 985, 993, + 1000, 1005, 378, 406, 427, 441, 500, 531, 554, 569, 622, 649, 669, 682, 728, + 751, 768, 779, 818, 837, 851, 860, 892, 907, 918, 925, 950, 961, 969, 974, + 992, 999, 1004, 1007, 448, 457, 474, 499, 576, 584, 599, 621, 688, 695, 708, + 727, 784, 790, 801, 817, 864, 869, 878, 891, 928, 932, 939, 949, 976, 979, + 984, 991, 1008, 1010, 1013, 1017, 456, 473, 498, 530, 583, 598, 620, 648, + 694, 707, 726, 750, 789, 800, 816, 836, 868, 877, 890, 906, 931, 938, 948, + 960, 978, 983, 990, 998, 1009, 1012, 1016, 1020, 472, 497, 529, 553, 597, + 619, 647, 668, 706, 725, 749, 767, 799, 815, 835, 850, 876, 889, 905, 917, + 937, 947, 959, 968, 982, 989, 997, 1003, 1011, 1015, 1019, 1022, 496, 528, + 552, 568, 618, 646, 667, 681, 724, 748, 766, 778, 814, 834, 849, 859, 888, + 904, 916, 924, 946, 958, 967, 973, 988, 996, 1002, 1006, 1014, 1018, 1021, + 1023, +}; + +const scan_order vp9_default_scan_orders[TX_SIZES] = { + {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors}, + {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors}, + {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors}, + {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, +}; + +const scan_order vp9_scan_orders[TX_SIZES][TX_TYPES] = { + { // TX_4X4 + {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors}, + {row_scan_4x4, vp9_row_iscan_4x4, row_scan_4x4_neighbors}, + {col_scan_4x4, vp9_col_iscan_4x4, col_scan_4x4_neighbors}, + {default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors} + }, { // TX_8X8 + {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors}, + {row_scan_8x8, vp9_row_iscan_8x8, row_scan_8x8_neighbors}, + {col_scan_8x8, vp9_col_iscan_8x8, col_scan_8x8_neighbors}, + {default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors} + }, { // TX_16X16 + {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors}, + {row_scan_16x16, vp9_row_iscan_16x16, row_scan_16x16_neighbors}, + {col_scan_16x16, vp9_col_iscan_16x16, col_scan_16x16_neighbors}, + {default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors} + }, { // TX_32X32 + {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, + {default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors}, + } +}; diff --git a/thirdparty/libvpx/vp9/common/vp9_scan.h b/thirdparty/libvpx/vp9/common/vp9_scan.h new file mode 100644 index 0000000000..4c1ee8107c --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_scan.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_SCAN_H_ +#define VP9_COMMON_VP9_SCAN_H_ + +#include "vpx/vpx_integer.h" +#include "vpx_ports/mem.h" + +#include "vp9/common/vp9_enums.h" +#include "vp9/common/vp9_blockd.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define MAX_NEIGHBORS 2 + +typedef struct { + const int16_t *scan; + const int16_t *iscan; + const int16_t *neighbors; +} scan_order; + +extern const scan_order vp9_default_scan_orders[TX_SIZES]; +extern const scan_order vp9_scan_orders[TX_SIZES][TX_TYPES]; + +static INLINE int get_coef_context(const int16_t *neighbors, + const uint8_t *token_cache, int c) { + return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] + + token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; +} + +static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, + PLANE_TYPE type, int block_idx) { + const MODE_INFO *const mi = xd->mi[0]; + + if (is_inter_block(mi) || type != PLANE_TYPE_Y || xd->lossless) { + return &vp9_default_scan_orders[tx_size]; + } else { + const PREDICTION_MODE mode = get_y_mode(mi, block_idx); + return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]]; + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_SCAN_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_seg_common.c b/thirdparty/libvpx/vp9/common/vp9_seg_common.c new file mode 100644 index 0000000000..7af61629a0 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_seg_common.c @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_loopfilter.h" +#include "vp9/common/vp9_seg_common.h" +#include "vp9/common/vp9_quant_common.h" + +static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 }; + +static const int seg_feature_data_max[SEG_LVL_MAX] = { + MAXQ, MAX_LOOP_FILTER, 3, 0 }; + +// These functions provide access to new segment level features. +// Eventually these function may be "optimized out" but for the moment, +// the coding mechanism is still subject to change so these provide a +// convenient single point of change. + +void vp9_clearall_segfeatures(struct segmentation *seg) { + vp9_zero(seg->feature_data); + vp9_zero(seg->feature_mask); + seg->aq_av_offset = 0; +} + +void vp9_enable_segfeature(struct segmentation *seg, int segment_id, + SEG_LVL_FEATURES feature_id) { + seg->feature_mask[segment_id] |= 1 << feature_id; +} + +int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id) { + return seg_feature_data_max[feature_id]; +} + +int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id) { + return seg_feature_data_signed[feature_id]; +} + +void vp9_set_segdata(struct segmentation *seg, int segment_id, + SEG_LVL_FEATURES feature_id, int seg_data) { + assert(seg_data <= seg_feature_data_max[feature_id]); + if (seg_data < 0) { + assert(seg_feature_data_signed[feature_id]); + assert(-seg_data <= seg_feature_data_max[feature_id]); + } + + seg->feature_data[segment_id][feature_id] = seg_data; +} + +const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = { + 2, 4, 6, 8, 10, 12, + 0, -1, -2, -3, -4, -5, -6, -7 +}; + + +// TBD? Functions to read and write segment data with range / validity checking diff --git a/thirdparty/libvpx/vp9/common/vp9_seg_common.h b/thirdparty/libvpx/vp9/common/vp9_seg_common.h new file mode 100644 index 0000000000..99a9440c17 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_seg_common.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_SEG_COMMON_H_ +#define VP9_COMMON_VP9_SEG_COMMON_H_ + +#include "vpx_dsp/prob.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#define SEGMENT_DELTADATA 0 +#define SEGMENT_ABSDATA 1 + +#define MAX_SEGMENTS 8 +#define SEG_TREE_PROBS (MAX_SEGMENTS-1) + +#define PREDICTION_PROBS 3 + +// Segment level features. +typedef enum { + SEG_LVL_ALT_Q = 0, // Use alternate Quantizer .... + SEG_LVL_ALT_LF = 1, // Use alternate loop filter value... + SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame + SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode + SEG_LVL_MAX = 4 // Number of features supported +} SEG_LVL_FEATURES; + + +struct segmentation { + uint8_t enabled; + uint8_t update_map; + uint8_t update_data; + uint8_t abs_delta; + uint8_t temporal_update; + + vpx_prob tree_probs[SEG_TREE_PROBS]; + vpx_prob pred_probs[PREDICTION_PROBS]; + + int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; + uint32_t feature_mask[MAX_SEGMENTS]; + int aq_av_offset; +}; + +static INLINE int segfeature_active(const struct segmentation *seg, + int segment_id, + SEG_LVL_FEATURES feature_id) { + return seg->enabled && + (seg->feature_mask[segment_id] & (1 << feature_id)); +} + +void vp9_clearall_segfeatures(struct segmentation *seg); + +void vp9_enable_segfeature(struct segmentation *seg, + int segment_id, + SEG_LVL_FEATURES feature_id); + +int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id); + +int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id); + +void vp9_set_segdata(struct segmentation *seg, + int segment_id, + SEG_LVL_FEATURES feature_id, + int seg_data); + +static INLINE int get_segdata(const struct segmentation *seg, int segment_id, + SEG_LVL_FEATURES feature_id) { + return seg->feature_data[segment_id][feature_id]; +} + +extern const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)]; + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_SEG_COMMON_H_ + diff --git a/thirdparty/libvpx/vp9/common/vp9_thread_common.c b/thirdparty/libvpx/vp9/common/vp9_thread_common.c new file mode 100644 index 0000000000..db78d6be89 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_thread_common.c @@ -0,0 +1,435 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_thread_common.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_loopfilter.h" + +#if CONFIG_MULTITHREAD +static INLINE void mutex_lock(pthread_mutex_t *const mutex) { + const int kMaxTryLocks = 4000; + int locked = 0; + int i; + + for (i = 0; i < kMaxTryLocks; ++i) { + if (!pthread_mutex_trylock(mutex)) { + locked = 1; + break; + } + } + + if (!locked) + pthread_mutex_lock(mutex); +} +#endif // CONFIG_MULTITHREAD + +static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) { +#if CONFIG_MULTITHREAD + const int nsync = lf_sync->sync_range; + + if (r && !(c & (nsync - 1))) { + pthread_mutex_t *const mutex = &lf_sync->mutex_[r - 1]; + mutex_lock(mutex); + + while (c > lf_sync->cur_sb_col[r - 1] - nsync) { + pthread_cond_wait(&lf_sync->cond_[r - 1], mutex); + } + pthread_mutex_unlock(mutex); + } +#else + (void)lf_sync; + (void)r; + (void)c; +#endif // CONFIG_MULTITHREAD +} + +static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c, + const int sb_cols) { +#if CONFIG_MULTITHREAD + const int nsync = lf_sync->sync_range; + int cur; + // Only signal when there are enough filtered SB for next row to run. + int sig = 1; + + if (c < sb_cols - 1) { + cur = c; + if (c % nsync) + sig = 0; + } else { + cur = sb_cols + nsync; + } + + if (sig) { + mutex_lock(&lf_sync->mutex_[r]); + + lf_sync->cur_sb_col[r] = cur; + + pthread_cond_signal(&lf_sync->cond_[r]); + pthread_mutex_unlock(&lf_sync->mutex_[r]); + } +#else + (void)lf_sync; + (void)r; + (void)c; + (void)sb_cols; +#endif // CONFIG_MULTITHREAD +} + +// Implement row loopfiltering for each thread. +static INLINE +void thread_loop_filter_rows(const YV12_BUFFER_CONFIG *const frame_buffer, + VP9_COMMON *const cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int start, int stop, int y_only, + VP9LfSync *const lf_sync) { + const int num_planes = y_only ? 1 : MAX_MB_PLANE; + const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; + int mi_row, mi_col; + enum lf_path path; + if (y_only) + path = LF_PATH_444; + else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) + path = LF_PATH_420; + else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) + path = LF_PATH_444; + else + path = LF_PATH_SLOW; + + for (mi_row = start; mi_row < stop; + mi_row += lf_sync->num_workers * MI_BLOCK_SIZE) { + MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; + LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); + + for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { + const int r = mi_row >> MI_BLOCK_SIZE_LOG2; + const int c = mi_col >> MI_BLOCK_SIZE_LOG2; + int plane; + + sync_read(lf_sync, r, c); + + vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); + + vp9_adjust_mask(cm, mi_row, mi_col, lfm); + + vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); + for (plane = 1; plane < num_planes; ++plane) { + switch (path) { + case LF_PATH_420: + vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); + break; + case LF_PATH_444: + vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); + break; + case LF_PATH_SLOW: + vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, + mi_row, mi_col); + break; + } + } + + sync_write(lf_sync, r, c, sb_cols); + } + } +} + +// Row-based multi-threaded loopfilter hook +static int loop_filter_row_worker(VP9LfSync *const lf_sync, + LFWorkerData *const lf_data) { + thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, + lf_data->start, lf_data->stop, lf_data->y_only, + lf_sync); + return 1; +} + +static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, + VP9_COMMON *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int start, int stop, int y_only, + VPxWorker *workers, int nworkers, + VP9LfSync *lf_sync) { + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + // Number of superblock rows and cols + const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; + // Decoder may allocate more threads than number of tiles based on user's + // input. + const int tile_cols = 1 << cm->log2_tile_cols; + const int num_workers = VPXMIN(nworkers, tile_cols); + int i; + + if (!lf_sync->sync_range || sb_rows != lf_sync->rows || + num_workers > lf_sync->num_workers) { + vp9_loop_filter_dealloc(lf_sync); + vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); + } + + // Initialize cur_sb_col to -1 for all SB rows. + memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); + + // Set up loopfilter thread data. + // The decoder is capping num_workers because it has been observed that using + // more threads on the loopfilter than there are cores will hurt performance + // on Android. This is because the system will only schedule the tile decode + // workers on cores equal to the number of tile columns. Then if the decoder + // tries to use more threads for the loopfilter, it will hurt performance + // because of contention. If the multithreading code changes in the future + // then the number of workers used by the loopfilter should be revisited. + for (i = 0; i < num_workers; ++i) { + VPxWorker *const worker = &workers[i]; + LFWorkerData *const lf_data = &lf_sync->lfdata[i]; + + worker->hook = (VPxWorkerHook)loop_filter_row_worker; + worker->data1 = lf_sync; + worker->data2 = lf_data; + + // Loopfilter data + vp9_loop_filter_data_reset(lf_data, frame, cm, planes); + lf_data->start = start + i * MI_BLOCK_SIZE; + lf_data->stop = stop; + lf_data->y_only = y_only; + + // Start loopfiltering + if (i == num_workers - 1) { + winterface->execute(worker); + } else { + winterface->launch(worker); + } + } + + // Wait till all rows are finished + for (i = 0; i < num_workers; ++i) { + winterface->sync(&workers[i]); + } +} + +void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, + VP9_COMMON *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int frame_filter_level, + int y_only, int partial_frame, + VPxWorker *workers, int num_workers, + VP9LfSync *lf_sync) { + int start_mi_row, end_mi_row, mi_rows_to_filter; + + if (!frame_filter_level) return; + + start_mi_row = 0; + mi_rows_to_filter = cm->mi_rows; + if (partial_frame && cm->mi_rows > 8) { + start_mi_row = cm->mi_rows >> 1; + start_mi_row &= 0xfffffff8; + mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); + } + end_mi_row = start_mi_row + mi_rows_to_filter; + vp9_loop_filter_frame_init(cm, frame_filter_level); + + loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, + y_only, workers, num_workers, lf_sync); +} + +// Set up nsync by width. +static INLINE int get_sync_range(int width) { + // nsync numbers are picked by testing. For example, for 4k + // video, using 4 gives best performance. + if (width < 640) + return 1; + else if (width <= 1280) + return 2; + else if (width <= 4096) + return 4; + else + return 8; +} + +// Allocate memory for lf row synchronization +void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, + int width, int num_workers) { + lf_sync->rows = rows; +#if CONFIG_MULTITHREAD + { + int i; + + CHECK_MEM_ERROR(cm, lf_sync->mutex_, + vpx_malloc(sizeof(*lf_sync->mutex_) * rows)); + if (lf_sync->mutex_) { + for (i = 0; i < rows; ++i) { + pthread_mutex_init(&lf_sync->mutex_[i], NULL); + } + } + + CHECK_MEM_ERROR(cm, lf_sync->cond_, + vpx_malloc(sizeof(*lf_sync->cond_) * rows)); + if (lf_sync->cond_) { + for (i = 0; i < rows; ++i) { + pthread_cond_init(&lf_sync->cond_[i], NULL); + } + } + } +#endif // CONFIG_MULTITHREAD + + CHECK_MEM_ERROR(cm, lf_sync->lfdata, + vpx_malloc(num_workers * sizeof(*lf_sync->lfdata))); + lf_sync->num_workers = num_workers; + + CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, + vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); + + // Set up nsync. + lf_sync->sync_range = get_sync_range(width); +} + +// Deallocate lf synchronization related mutex and data +void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { + if (lf_sync != NULL) { +#if CONFIG_MULTITHREAD + int i; + + if (lf_sync->mutex_ != NULL) { + for (i = 0; i < lf_sync->rows; ++i) { + pthread_mutex_destroy(&lf_sync->mutex_[i]); + } + vpx_free(lf_sync->mutex_); + } + if (lf_sync->cond_ != NULL) { + for (i = 0; i < lf_sync->rows; ++i) { + pthread_cond_destroy(&lf_sync->cond_[i]); + } + vpx_free(lf_sync->cond_); + } +#endif // CONFIG_MULTITHREAD + vpx_free(lf_sync->lfdata); + vpx_free(lf_sync->cur_sb_col); + // clear the structure as the source of this call may be a resize in which + // case this call will be followed by an _alloc() which may fail. + vp9_zero(*lf_sync); + } +} + +// Accumulate frame counts. +void vp9_accumulate_frame_counts(FRAME_COUNTS *accum, + const FRAME_COUNTS *counts, int is_dec) { + int i, j, k, l, m; + + for (i = 0; i < BLOCK_SIZE_GROUPS; i++) + for (j = 0; j < INTRA_MODES; j++) + accum->y_mode[i][j] += counts->y_mode[i][j]; + + for (i = 0; i < INTRA_MODES; i++) + for (j = 0; j < INTRA_MODES; j++) + accum->uv_mode[i][j] += counts->uv_mode[i][j]; + + for (i = 0; i < PARTITION_CONTEXTS; i++) + for (j = 0; j < PARTITION_TYPES; j++) + accum->partition[i][j] += counts->partition[i][j]; + + if (is_dec) { + int n; + for (i = 0; i < TX_SIZES; i++) + for (j = 0; j < PLANE_TYPES; j++) + for (k = 0; k < REF_TYPES; k++) + for (l = 0; l < COEF_BANDS; l++) + for (m = 0; m < COEFF_CONTEXTS; m++) { + accum->eob_branch[i][j][k][l][m] += + counts->eob_branch[i][j][k][l][m]; + for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) + accum->coef[i][j][k][l][m][n] += + counts->coef[i][j][k][l][m][n]; + } + } else { + for (i = 0; i < TX_SIZES; i++) + for (j = 0; j < PLANE_TYPES; j++) + for (k = 0; k < REF_TYPES; k++) + for (l = 0; l < COEF_BANDS; l++) + for (m = 0; m < COEFF_CONTEXTS; m++) + accum->eob_branch[i][j][k][l][m] += + counts->eob_branch[i][j][k][l][m]; + // In the encoder, coef is only updated at frame + // level, so not need to accumulate it here. + // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) + // accum->coef[i][j][k][l][m][n] += + // counts->coef[i][j][k][l][m][n]; + } + + for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) + for (j = 0; j < SWITCHABLE_FILTERS; j++) + accum->switchable_interp[i][j] += counts->switchable_interp[i][j]; + + for (i = 0; i < INTER_MODE_CONTEXTS; i++) + for (j = 0; j < INTER_MODES; j++) + accum->inter_mode[i][j] += counts->inter_mode[i][j]; + + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) + for (j = 0; j < 2; j++) + accum->intra_inter[i][j] += counts->intra_inter[i][j]; + + for (i = 0; i < COMP_INTER_CONTEXTS; i++) + for (j = 0; j < 2; j++) + accum->comp_inter[i][j] += counts->comp_inter[i][j]; + + for (i = 0; i < REF_CONTEXTS; i++) + for (j = 0; j < 2; j++) + for (k = 0; k < 2; k++) + accum->single_ref[i][j][k] += counts->single_ref[i][j][k]; + + for (i = 0; i < REF_CONTEXTS; i++) + for (j = 0; j < 2; j++) + accum->comp_ref[i][j] += counts->comp_ref[i][j]; + + for (i = 0; i < TX_SIZE_CONTEXTS; i++) { + for (j = 0; j < TX_SIZES; j++) + accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j]; + + for (j = 0; j < TX_SIZES - 1; j++) + accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j]; + + for (j = 0; j < TX_SIZES - 2; j++) + accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j]; + } + + for (i = 0; i < TX_SIZES; i++) + accum->tx.tx_totals[i] += counts->tx.tx_totals[i]; + + for (i = 0; i < SKIP_CONTEXTS; i++) + for (j = 0; j < 2; j++) + accum->skip[i][j] += counts->skip[i][j]; + + for (i = 0; i < MV_JOINTS; i++) + accum->mv.joints[i] += counts->mv.joints[i]; + + for (k = 0; k < 2; k++) { + nmv_component_counts *const comps = &accum->mv.comps[k]; + const nmv_component_counts *const comps_t = &counts->mv.comps[k]; + + for (i = 0; i < 2; i++) { + comps->sign[i] += comps_t->sign[i]; + comps->class0_hp[i] += comps_t->class0_hp[i]; + comps->hp[i] += comps_t->hp[i]; + } + + for (i = 0; i < MV_CLASSES; i++) + comps->classes[i] += comps_t->classes[i]; + + for (i = 0; i < CLASS0_SIZE; i++) { + comps->class0[i] += comps_t->class0[i]; + for (j = 0; j < MV_FP_SIZE; j++) + comps->class0_fp[i][j] += comps_t->class0_fp[i][j]; + } + + for (i = 0; i < MV_OFFSET_BITS; i++) + for (j = 0; j < 2; j++) + comps->bits[i][j] += comps_t->bits[i][j]; + + for (i = 0; i < MV_FP_SIZE; i++) + comps->fp[i] += comps_t->fp[i]; + } +} diff --git a/thirdparty/libvpx/vp9/common/vp9_thread_common.h b/thirdparty/libvpx/vp9/common/vp9_thread_common.h new file mode 100644 index 0000000000..b3b60c253f --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_thread_common.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_THREAD_COMMON_H_ +#define VP9_COMMON_VP9_THREAD_COMMON_H_ +#include "./vpx_config.h" +#include "vp9/common/vp9_loopfilter.h" +#include "vpx_util/vpx_thread.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9Common; +struct FRAME_COUNTS; + +// Loopfilter row synchronization +typedef struct VP9LfSyncData { +#if CONFIG_MULTITHREAD + pthread_mutex_t *mutex_; + pthread_cond_t *cond_; +#endif + // Allocate memory to store the loop-filtered superblock index in each row. + int *cur_sb_col; + // The optimal sync_range for different resolution and platform should be + // determined by testing. Currently, it is chosen to be a power-of-2 number. + int sync_range; + int rows; + + // Row-based parallel loopfilter data + LFWorkerData *lfdata; + int num_workers; +} VP9LfSync; + +// Allocate memory for loopfilter row synchronization. +void vp9_loop_filter_alloc(VP9LfSync *lf_sync, struct VP9Common *cm, int rows, + int width, int num_workers); + +// Deallocate loopfilter synchronization related mutex and data. +void vp9_loop_filter_dealloc(VP9LfSync *lf_sync); + +// Multi-threaded loopfilter that uses the tile threads. +void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, + struct VP9Common *cm, + struct macroblockd_plane planes[MAX_MB_PLANE], + int frame_filter_level, + int y_only, int partial_frame, + VPxWorker *workers, int num_workers, + VP9LfSync *lf_sync); + +void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, + const struct FRAME_COUNTS *counts, int is_dec); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_THREAD_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/vp9_tile_common.c b/thirdparty/libvpx/vp9/common/vp9_tile_common.c new file mode 100644 index 0000000000..9fcb97c854 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_tile_common.c @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vp9/common/vp9_tile_common.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vpx_dsp/vpx_dsp_common.h" + +#define MIN_TILE_WIDTH_B64 4 +#define MAX_TILE_WIDTH_B64 64 + +static int get_tile_offset(int idx, int mis, int log2) { + const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2; + const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2; + return VPXMIN(offset, mis); +} + +void vp9_tile_set_row(TileInfo *tile, const VP9_COMMON *cm, int row) { + tile->mi_row_start = get_tile_offset(row, cm->mi_rows, cm->log2_tile_rows); + tile->mi_row_end = get_tile_offset(row + 1, cm->mi_rows, cm->log2_tile_rows); +} + +void vp9_tile_set_col(TileInfo *tile, const VP9_COMMON *cm, int col) { + tile->mi_col_start = get_tile_offset(col, cm->mi_cols, cm->log2_tile_cols); + tile->mi_col_end = get_tile_offset(col + 1, cm->mi_cols, cm->log2_tile_cols); +} + +void vp9_tile_init(TileInfo *tile, const VP9_COMMON *cm, int row, int col) { + vp9_tile_set_row(tile, cm, row); + vp9_tile_set_col(tile, cm, col); +} + +static int get_min_log2_tile_cols(const int sb64_cols) { + int min_log2 = 0; + while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols) + ++min_log2; + return min_log2; +} + +static int get_max_log2_tile_cols(const int sb64_cols) { + int max_log2 = 1; + while ((sb64_cols >> max_log2) >= MIN_TILE_WIDTH_B64) + ++max_log2; + return max_log2 - 1; +} + +void vp9_get_tile_n_bits(int mi_cols, + int *min_log2_tile_cols, int *max_log2_tile_cols) { + const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2; + *min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols); + *max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols); + assert(*min_log2_tile_cols <= *max_log2_tile_cols); +} diff --git a/thirdparty/libvpx/vp9/common/vp9_tile_common.h b/thirdparty/libvpx/vp9/common/vp9_tile_common.h new file mode 100644 index 0000000000..ae58805de1 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/vp9_tile_common.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_COMMON_VP9_TILE_COMMON_H_ +#define VP9_COMMON_VP9_TILE_COMMON_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9Common; + +typedef struct TileInfo { + int mi_row_start, mi_row_end; + int mi_col_start, mi_col_end; +} TileInfo; + +// initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on +// 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)' +void vp9_tile_init(TileInfo *tile, const struct VP9Common *cm, + int row, int col); + +void vp9_tile_set_row(TileInfo *tile, const struct VP9Common *cm, int row); +void vp9_tile_set_col(TileInfo *tile, const struct VP9Common *cm, int col); + +void vp9_get_tile_n_bits(int mi_cols, + int *min_log2_tile_cols, int *max_log2_tile_cols); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_COMMON_VP9_TILE_COMMON_H_ diff --git a/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c b/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c new file mode 100644 index 0000000000..1c77b57ff1 --- /dev/null +++ b/thirdparty/libvpx/vp9/common/x86/vp9_idct_intrin_sse2.c @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2012 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vp9_rtcd.h" +#include "vpx_dsp/x86/inv_txfm_sse2.h" +#include "vpx_dsp/x86/txfm_common_sse2.h" +#include "vpx_ports/mem.h" + +void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + __m128i in[2]; + const __m128i zero = _mm_setzero_si128(); + const __m128i eight = _mm_set1_epi16(8); + + in[0] = load_input_data(input); + in[1] = load_input_data(input + 8); + + switch (tx_type) { + case 0: // DCT_DCT + idct4_sse2(in); + idct4_sse2(in); + break; + case 1: // ADST_DCT + idct4_sse2(in); + iadst4_sse2(in); + break; + case 2: // DCT_ADST + iadst4_sse2(in); + idct4_sse2(in); + break; + case 3: // ADST_ADST + iadst4_sse2(in); + iadst4_sse2(in); + break; + default: + assert(0); + break; + } + + // Final round and shift + in[0] = _mm_add_epi16(in[0], eight); + in[1] = _mm_add_epi16(in[1], eight); + + in[0] = _mm_srai_epi16(in[0], 4); + in[1] = _mm_srai_epi16(in[1], 4); + + // Reconstruction and Store + { + __m128i d0 = _mm_cvtsi32_si128(*(const int *)(dest)); + __m128i d2 = _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)); + d0 = _mm_unpacklo_epi32(d0, + _mm_cvtsi32_si128(*(const int *)(dest + stride))); + d2 = _mm_unpacklo_epi32( + d2, _mm_cvtsi32_si128(*(const int *)(dest + stride * 3))); + d0 = _mm_unpacklo_epi8(d0, zero); + d2 = _mm_unpacklo_epi8(d2, zero); + d0 = _mm_add_epi16(d0, in[0]); + d2 = _mm_add_epi16(d2, in[1]); + d0 = _mm_packus_epi16(d0, d2); + // store result[0] + *(int *)dest = _mm_cvtsi128_si32(d0); + // store result[1] + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride) = _mm_cvtsi128_si32(d0); + // store result[2] + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d0); + // store result[3] + d0 = _mm_srli_si128(d0, 4); + *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d0); + } +} + +void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, + int tx_type) { + __m128i in[8]; + const __m128i zero = _mm_setzero_si128(); + const __m128i final_rounding = _mm_set1_epi16(1 << 4); + + // load input data + in[0] = load_input_data(input); + in[1] = load_input_data(input + 8 * 1); + in[2] = load_input_data(input + 8 * 2); + in[3] = load_input_data(input + 8 * 3); + in[4] = load_input_data(input + 8 * 4); + in[5] = load_input_data(input + 8 * 5); + in[6] = load_input_data(input + 8 * 6); + in[7] = load_input_data(input + 8 * 7); + + switch (tx_type) { + case 0: // DCT_DCT + idct8_sse2(in); + idct8_sse2(in); + break; + case 1: // ADST_DCT + idct8_sse2(in); + iadst8_sse2(in); + break; + case 2: // DCT_ADST + iadst8_sse2(in); + idct8_sse2(in); + break; + case 3: // ADST_ADST + iadst8_sse2(in); + iadst8_sse2(in); + break; + default: + assert(0); + break; + } + + // Final rounding and shift + in[0] = _mm_adds_epi16(in[0], final_rounding); + in[1] = _mm_adds_epi16(in[1], final_rounding); + in[2] = _mm_adds_epi16(in[2], final_rounding); + in[3] = _mm_adds_epi16(in[3], final_rounding); + in[4] = _mm_adds_epi16(in[4], final_rounding); + in[5] = _mm_adds_epi16(in[5], final_rounding); + in[6] = _mm_adds_epi16(in[6], final_rounding); + in[7] = _mm_adds_epi16(in[7], final_rounding); + + in[0] = _mm_srai_epi16(in[0], 5); + in[1] = _mm_srai_epi16(in[1], 5); + in[2] = _mm_srai_epi16(in[2], 5); + in[3] = _mm_srai_epi16(in[3], 5); + in[4] = _mm_srai_epi16(in[4], 5); + in[5] = _mm_srai_epi16(in[5], 5); + in[6] = _mm_srai_epi16(in[6], 5); + in[7] = _mm_srai_epi16(in[7], 5); + + RECON_AND_STORE(dest + 0 * stride, in[0]); + RECON_AND_STORE(dest + 1 * stride, in[1]); + RECON_AND_STORE(dest + 2 * stride, in[2]); + RECON_AND_STORE(dest + 3 * stride, in[3]); + RECON_AND_STORE(dest + 4 * stride, in[4]); + RECON_AND_STORE(dest + 5 * stride, in[5]); + RECON_AND_STORE(dest + 6 * stride, in[6]); + RECON_AND_STORE(dest + 7 * stride, in[7]); +} + +void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, + int stride, int tx_type) { + __m128i in0[16], in1[16]; + + load_buffer_8x16(input, in0); + input += 8; + load_buffer_8x16(input, in1); + + switch (tx_type) { + case 0: // DCT_DCT + idct16_sse2(in0, in1); + idct16_sse2(in0, in1); + break; + case 1: // ADST_DCT + idct16_sse2(in0, in1); + iadst16_sse2(in0, in1); + break; + case 2: // DCT_ADST + iadst16_sse2(in0, in1); + idct16_sse2(in0, in1); + break; + case 3: // ADST_ADST + iadst16_sse2(in0, in1); + iadst16_sse2(in0, in1); + break; + default: + assert(0); + break; + } + + write_buffer_8x16(dest, in0, stride); + dest += 8; + write_buffer_8x16(dest, in1, stride); +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c new file mode 100644 index 0000000000..d63912932c --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.c @@ -0,0 +1,2271 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <stdlib.h> // qsort() + +#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "./vpx_scale_rtcd.h" + +#include "vpx_dsp/bitreader_buffer.h" +#include "vpx_dsp/bitreader.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" +#include "vpx_ports/mem_ops.h" +#include "vpx_scale/vpx_scale.h" +#include "vpx_util/vpx_thread.h" + +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_idct.h" +#include "vp9/common/vp9_thread_common.h" +#include "vp9/common/vp9_pred_common.h" +#include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_reconintra.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_seg_common.h" +#include "vp9/common/vp9_tile_common.h" + +#include "vp9/decoder/vp9_decodeframe.h" +#include "vp9/decoder/vp9_detokenize.h" +#include "vp9/decoder/vp9_decodemv.h" +#include "vp9/decoder/vp9_decoder.h" +#include "vp9/decoder/vp9_dsubexp.h" + +#define MAX_VP9_HEADER_SIZE 80 + +static int is_compound_reference_allowed(const VP9_COMMON *cm) { + int i; + for (i = 1; i < REFS_PER_FRAME; ++i) + if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) + return 1; + + return 0; +} + +static void setup_compound_reference_mode(VP9_COMMON *cm) { + if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[GOLDEN_FRAME]) { + cm->comp_fixed_ref = ALTREF_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = GOLDEN_FRAME; + } else if (cm->ref_frame_sign_bias[LAST_FRAME] == + cm->ref_frame_sign_bias[ALTREF_FRAME]) { + cm->comp_fixed_ref = GOLDEN_FRAME; + cm->comp_var_ref[0] = LAST_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } else { + cm->comp_fixed_ref = LAST_FRAME; + cm->comp_var_ref[0] = GOLDEN_FRAME; + cm->comp_var_ref[1] = ALTREF_FRAME; + } +} + +static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) { + return len != 0 && len <= (size_t)(end - start); +} + +static int decode_unsigned_max(struct vpx_read_bit_buffer *rb, int max) { + const int data = vpx_rb_read_literal(rb, get_unsigned_bits(max)); + return data > max ? max : data; +} + +static TX_MODE read_tx_mode(vpx_reader *r) { + TX_MODE tx_mode = vpx_read_literal(r, 2); + if (tx_mode == ALLOW_32X32) + tx_mode += vpx_read_bit(r); + return tx_mode; +} + +static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) { + int i, j; + + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) + for (j = 0; j < TX_SIZES - 3; ++j) + vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); + + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) + for (j = 0; j < TX_SIZES - 2; ++j) + vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); + + for (i = 0; i < TX_SIZE_CONTEXTS; ++i) + for (j = 0; j < TX_SIZES - 1; ++j) + vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); +} + +static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vpx_reader *r) { + int i, j; + for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) + for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) + vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); +} + +static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) { + int i, j; + for (i = 0; i < INTER_MODE_CONTEXTS; ++i) + for (j = 0; j < INTER_MODES - 1; ++j) + vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); +} + +static REFERENCE_MODE read_frame_reference_mode(const VP9_COMMON *cm, + vpx_reader *r) { + if (is_compound_reference_allowed(cm)) { + return vpx_read_bit(r) ? (vpx_read_bit(r) ? REFERENCE_MODE_SELECT + : COMPOUND_REFERENCE) + : SINGLE_REFERENCE; + } else { + return SINGLE_REFERENCE; + } +} + +static void read_frame_reference_mode_probs(VP9_COMMON *cm, vpx_reader *r) { + FRAME_CONTEXT *const fc = cm->fc; + int i; + + if (cm->reference_mode == REFERENCE_MODE_SELECT) + for (i = 0; i < COMP_INTER_CONTEXTS; ++i) + vp9_diff_update_prob(r, &fc->comp_inter_prob[i]); + + if (cm->reference_mode != COMPOUND_REFERENCE) + for (i = 0; i < REF_CONTEXTS; ++i) { + vp9_diff_update_prob(r, &fc->single_ref_prob[i][0]); + vp9_diff_update_prob(r, &fc->single_ref_prob[i][1]); + } + + if (cm->reference_mode != SINGLE_REFERENCE) + for (i = 0; i < REF_CONTEXTS; ++i) + vp9_diff_update_prob(r, &fc->comp_ref_prob[i]); +} + +static void update_mv_probs(vpx_prob *p, int n, vpx_reader *r) { + int i; + for (i = 0; i < n; ++i) + if (vpx_read(r, MV_UPDATE_PROB)) + p[i] = (vpx_read_literal(r, 7) << 1) | 1; +} + +static void read_mv_probs(nmv_context *ctx, int allow_hp, vpx_reader *r) { + int i, j; + + update_mv_probs(ctx->joints, MV_JOINTS - 1, r); + + for (i = 0; i < 2; ++i) { + nmv_component *const comp_ctx = &ctx->comps[i]; + update_mv_probs(&comp_ctx->sign, 1, r); + update_mv_probs(comp_ctx->classes, MV_CLASSES - 1, r); + update_mv_probs(comp_ctx->class0, CLASS0_SIZE - 1, r); + update_mv_probs(comp_ctx->bits, MV_OFFSET_BITS, r); + } + + for (i = 0; i < 2; ++i) { + nmv_component *const comp_ctx = &ctx->comps[i]; + for (j = 0; j < CLASS0_SIZE; ++j) + update_mv_probs(comp_ctx->class0_fp[j], MV_FP_SIZE - 1, r); + update_mv_probs(comp_ctx->fp, 3, r); + } + + if (allow_hp) { + for (i = 0; i < 2; ++i) { + nmv_component *const comp_ctx = &ctx->comps[i]; + update_mv_probs(&comp_ctx->class0_hp, 1, r); + update_mv_probs(&comp_ctx->hp, 1, r); + } + } +} + +static void inverse_transform_block_inter(MACROBLOCKD* xd, int plane, + const TX_SIZE tx_size, + uint8_t *dst, int stride, + int eob) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + tran_low_t *const dqcoeff = pd->dqcoeff; + assert(eob > 0); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + if (xd->lossless) { + vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); + } else { + switch (tx_size) { + case TX_4X4: + vp9_highbd_idct4x4_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_8X8: + vp9_highbd_idct8x8_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_16X16: + vp9_highbd_idct16x16_add(dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_32X32: + vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); + break; + default: + assert(0 && "Invalid transform size"); + } + } + } else { + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_idct4x4_add(dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_idct8x8_add(dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_idct16x16_add(dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } + } +#else + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_idct4x4_add(dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_idct8x8_add(dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_idct16x16_add(dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (eob == 1) { + dqcoeff[0] = 0; + } else { + if (tx_size <= TX_16X16 && eob <= 10) + memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); + else if (tx_size == TX_32X32 && eob <= 34) + memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); + else + memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); + } +} + +static void inverse_transform_block_intra(MACROBLOCKD* xd, int plane, + const TX_TYPE tx_type, + const TX_SIZE tx_size, + uint8_t *dst, int stride, + int eob) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + tran_low_t *const dqcoeff = pd->dqcoeff; + assert(eob > 0); +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + if (xd->lossless) { + vp9_highbd_iwht4x4_add(dqcoeff, dst, stride, eob, xd->bd); + } else { + switch (tx_size) { + case TX_4X4: + vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_8X8: + vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_16X16: + vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst, stride, eob, xd->bd); + break; + case TX_32X32: + vp9_highbd_idct32x32_add(dqcoeff, dst, stride, eob, xd->bd); + break; + default: + assert(0 && "Invalid transform size"); + } + } + } else { + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } + } +#else + if (xd->lossless) { + vp9_iwht4x4_add(dqcoeff, dst, stride, eob); + } else { + switch (tx_size) { + case TX_4X4: + vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_8X8: + vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_16X16: + vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); + break; + case TX_32X32: + vp9_idct32x32_add(dqcoeff, dst, stride, eob); + break; + default: + assert(0 && "Invalid transform size"); + return; + } + } +#endif // CONFIG_VP9_HIGHBITDEPTH + + if (eob == 1) { + dqcoeff[0] = 0; + } else { + if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) + memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); + else if (tx_size == TX_32X32 && eob <= 34) + memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); + else + memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); + } +} + +static void predict_and_reconstruct_intra_block(MACROBLOCKD *const xd, + vpx_reader *r, + MODE_INFO *const mi, + int plane, + int row, int col, + TX_SIZE tx_size) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; + uint8_t *dst; + dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; + + if (mi->sb_type < BLOCK_8X8) + if (plane == 0) + mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; + + vp9_predict_intra_block(xd, pd->n4_wl, tx_size, mode, + dst, pd->dst.stride, dst, pd->dst.stride, + col, row, plane); + + if (!mi->skip) { + const TX_TYPE tx_type = (plane || xd->lossless) ? + DCT_DCT : intra_mode_to_tx_type_lookup[mode]; + const scan_order *sc = (plane || xd->lossless) ? + &vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type]; + const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, + r, mi->segment_id); + if (eob > 0) { + inverse_transform_block_intra(xd, plane, tx_type, tx_size, + dst, pd->dst.stride, eob); + } + } +} + +static int reconstruct_inter_block(MACROBLOCKD *const xd, vpx_reader *r, + MODE_INFO *const mi, int plane, + int row, int col, TX_SIZE tx_size) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const scan_order *sc = &vp9_default_scan_orders[tx_size]; + const int eob = vp9_decode_block_tokens(xd, plane, sc, col, row, tx_size, r, + mi->segment_id); + + if (eob > 0) { + inverse_transform_block_inter( + xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], + pd->dst.stride, eob); + } + return eob; +} + +static void build_mc_border(const uint8_t *src, int src_stride, + uint8_t *dst, int dst_stride, + int x, int y, int b_w, int b_h, int w, int h) { + // Get a pointer to the start of the real data for this row. + const uint8_t *ref_row = src - x - y * src_stride; + + if (y >= h) + ref_row += (h - 1) * src_stride; + else if (y > 0) + ref_row += y * src_stride; + + do { + int right = 0, copy; + int left = x < 0 ? -x : 0; + + if (left > b_w) + left = b_w; + + if (x + b_w > w) + right = x + b_w - w; + + if (right > b_w) + right = b_w; + + copy = b_w - left - right; + + if (left) + memset(dst, ref_row[0], left); + + if (copy) + memcpy(dst + left, ref_row + x + left, copy); + + if (right) + memset(dst + left + copy, ref_row[w - 1], right); + + dst += dst_stride; + ++y; + + if (y > 0 && y < h) + ref_row += src_stride; + } while (--b_h); +} + +#if CONFIG_VP9_HIGHBITDEPTH +static void high_build_mc_border(const uint8_t *src8, int src_stride, + uint16_t *dst, int dst_stride, + int x, int y, int b_w, int b_h, + int w, int h) { + // Get a pointer to the start of the real data for this row. + const uint16_t *src = CONVERT_TO_SHORTPTR(src8); + const uint16_t *ref_row = src - x - y * src_stride; + + if (y >= h) + ref_row += (h - 1) * src_stride; + else if (y > 0) + ref_row += y * src_stride; + + do { + int right = 0, copy; + int left = x < 0 ? -x : 0; + + if (left > b_w) + left = b_w; + + if (x + b_w > w) + right = x + b_w - w; + + if (right > b_w) + right = b_w; + + copy = b_w - left - right; + + if (left) + vpx_memset16(dst, ref_row[0], left); + + if (copy) + memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t)); + + if (right) + vpx_memset16(dst + left + copy, ref_row[w - 1], right); + + dst += dst_stride; + ++y; + + if (y > 0 && y < h) + ref_row += src_stride; + } while (--b_h); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +#if CONFIG_VP9_HIGHBITDEPTH +static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, + int x0, int y0, int b_w, int b_h, + int frame_width, int frame_height, + int border_offset, + uint8_t *const dst, int dst_buf_stride, + int subpel_x, int subpel_y, + const InterpKernel *kernel, + const struct scale_factors *sf, + MACROBLOCKD *xd, + int w, int h, int ref, int xs, int ys) { + DECLARE_ALIGNED(16, uint16_t, mc_buf_high[80 * 2 * 80 * 2]); + const uint8_t *buf_ptr; + + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, + x0, y0, b_w, b_h, frame_width, frame_height); + buf_ptr = CONVERT_TO_BYTEPTR(mc_buf_high) + border_offset; + } else { + build_mc_border(buf_ptr1, pre_buf_stride, (uint8_t *)mc_buf_high, b_w, + x0, y0, b_w, b_h, frame_width, frame_height); + buf_ptr = ((uint8_t *)mc_buf_high) + border_offset; + } + + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + highbd_inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); + } else { + inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); + } +} +#else +static void extend_and_predict(const uint8_t *buf_ptr1, int pre_buf_stride, + int x0, int y0, int b_w, int b_h, + int frame_width, int frame_height, + int border_offset, + uint8_t *const dst, int dst_buf_stride, + int subpel_x, int subpel_y, + const InterpKernel *kernel, + const struct scale_factors *sf, + int w, int h, int ref, int xs, int ys) { + DECLARE_ALIGNED(16, uint8_t, mc_buf[80 * 2 * 80 * 2]); + const uint8_t *buf_ptr; + + build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, + x0, y0, b_w, b_h, frame_width, frame_height); + buf_ptr = mc_buf + border_offset; + + inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); +} +#endif // CONFIG_VP9_HIGHBITDEPTH + +static void dec_build_inter_predictors(VPxWorker *const worker, MACROBLOCKD *xd, + int plane, int bw, int bh, int x, + int y, int w, int h, int mi_x, int mi_y, + const InterpKernel *kernel, + const struct scale_factors *sf, + struct buf_2d *pre_buf, + struct buf_2d *dst_buf, const MV* mv, + RefCntBuffer *ref_frame_buf, + int is_scaled, int ref) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; + MV32 scaled_mv; + int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, + buf_stride, subpel_x, subpel_y; + uint8_t *ref_frame, *buf_ptr; + + // Get reference frame pointer, width and height. + if (plane == 0) { + frame_width = ref_frame_buf->buf.y_crop_width; + frame_height = ref_frame_buf->buf.y_crop_height; + ref_frame = ref_frame_buf->buf.y_buffer; + } else { + frame_width = ref_frame_buf->buf.uv_crop_width; + frame_height = ref_frame_buf->buf.uv_crop_height; + ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer + : ref_frame_buf->buf.v_buffer; + } + + if (is_scaled) { + const MV mv_q4 = clamp_mv_to_umv_border_sb(xd, mv, bw, bh, + pd->subsampling_x, + pd->subsampling_y); + // Co-ordinate of containing block to pixel precision. + int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); + int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); +#if CONFIG_BETTER_HW_COMPATIBILITY + assert(xd->mi[0]->sb_type != BLOCK_4X8 && + xd->mi[0]->sb_type != BLOCK_8X4); + assert(mv_q4.row == mv->row * (1 << (1 - pd->subsampling_y)) && + mv_q4.col == mv->col * (1 << (1 - pd->subsampling_x))); +#endif + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = (x_start + x) << SUBPEL_BITS; + y0_16 = (y_start + y) << SUBPEL_BITS; + + // Co-ordinate of current block in reference frame + // to 1/16th pixel precision. + x0_16 = sf->scale_value_x(x0_16, sf); + y0_16 = sf->scale_value_y(y0_16, sf); + + // Map the top left corner of the block into the reference frame. + x0 = sf->scale_value_x(x_start + x, sf); + y0 = sf->scale_value_y(y_start + y, sf); + + // Scale the MV and incorporate the sub-pixel offset of the block + // in the reference frame. + scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); + xs = sf->x_step_q4; + ys = sf->y_step_q4; + } else { + // Co-ordinate of containing block to pixel precision. + x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; + y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; + + // Co-ordinate of the block to 1/16th pixel precision. + x0_16 = x0 << SUBPEL_BITS; + y0_16 = y0 << SUBPEL_BITS; + + scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y)); + scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x)); + xs = ys = 16; + } + subpel_x = scaled_mv.col & SUBPEL_MASK; + subpel_y = scaled_mv.row & SUBPEL_MASK; + + // Calculate the top left corner of the best matching block in the + // reference frame. + x0 += scaled_mv.col >> SUBPEL_BITS; + y0 += scaled_mv.row >> SUBPEL_BITS; + x0_16 += scaled_mv.col; + y0_16 += scaled_mv.row; + + // Get reference block pointer. + buf_ptr = ref_frame + y0 * pre_buf->stride + x0; + buf_stride = pre_buf->stride; + + // Do border extension if there is motion or the + // width/height is not a multiple of 8 pixels. + if (is_scaled || scaled_mv.col || scaled_mv.row || + (frame_width & 0x7) || (frame_height & 0x7)) { + int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1; + + // Get reference block bottom right horizontal coordinate. + int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; + int x_pad = 0, y_pad = 0; + + if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) { + x0 -= VP9_INTERP_EXTEND - 1; + x1 += VP9_INTERP_EXTEND; + x_pad = 1; + } + + if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) { + y0 -= VP9_INTERP_EXTEND - 1; + y1 += VP9_INTERP_EXTEND; + y_pad = 1; + } + + // Wait until reference block is ready. Pad 7 more pixels as last 7 + // pixels of each superblock row can be changed by next superblock row. + if (worker != NULL) + vp9_frameworker_wait(worker, ref_frame_buf, + VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); + + // Skip border extension if block is inside the frame. + if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 || + y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { + // Extend the border. + const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0; + const int b_w = x1 - x0 + 1; + const int b_h = y1 - y0 + 1; + const int border_offset = y_pad * 3 * b_w + x_pad * 3; + + extend_and_predict(buf_ptr1, buf_stride, x0, y0, b_w, b_h, + frame_width, frame_height, border_offset, + dst, dst_buf->stride, + subpel_x, subpel_y, + kernel, sf, +#if CONFIG_VP9_HIGHBITDEPTH + xd, +#endif + w, h, ref, xs, ys); + return; + } + } else { + // Wait until reference block is ready. Pad 7 more pixels as last 7 + // pixels of each superblock row can be changed by next superblock row. + if (worker != NULL) { + const int y1 = (y0_16 + (h - 1) * ys) >> SUBPEL_BITS; + vp9_frameworker_wait(worker, ref_frame_buf, + VPXMAX(0, (y1 + 7)) << (plane == 0 ? 0 : 1)); + } + } +#if CONFIG_VP9_HIGHBITDEPTH + if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { + highbd_inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); + } else { + inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); + } +#else + inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, + subpel_y, sf, w, h, ref, kernel, xs, ys); +#endif // CONFIG_VP9_HIGHBITDEPTH +} + +static void dec_build_inter_predictors_sb(VP9Decoder *const pbi, + MACROBLOCKD *xd, + int mi_row, int mi_col) { + int plane; + const int mi_x = mi_col * MI_SIZE; + const int mi_y = mi_row * MI_SIZE; + const MODE_INFO *mi = xd->mi[0]; + const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; + const BLOCK_SIZE sb_type = mi->sb_type; + const int is_compound = has_second_ref(mi); + int ref; + int is_scaled; + VPxWorker *const fwo = pbi->frame_parallel_decode ? + pbi->frame_worker_owner : NULL; + + for (ref = 0; ref < 1 + is_compound; ++ref) { + const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; + RefBuffer *ref_buf = &pbi->common.frame_refs[frame - LAST_FRAME]; + const struct scale_factors *const sf = &ref_buf->sf; + const int idx = ref_buf->idx; + BufferPool *const pool = pbi->common.buffer_pool; + RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; + + if (!vp9_is_valid_scale(sf)) + vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, + "Reference frame has invalid dimensions"); + + is_scaled = vp9_is_scaled(sf); + vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, + is_scaled ? sf : NULL); + xd->block_refs[ref] = ref_buf; + + if (sb_type < BLOCK_8X8) { + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + struct buf_2d *const dst_buf = &pd->dst; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int n4w_x4 = 4 * num_4x4_w; + const int n4h_x4 = 4 * num_4x4_h; + struct buf_2d *const pre_buf = &pd->pre[ref]; + int i = 0, x, y; + for (y = 0; y < num_4x4_h; ++y) { + for (x = 0; x < num_4x4_w; ++x) { + const MV mv = average_split_mvs(pd, mi, ref, i++); + dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, + 4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel, + sf, pre_buf, dst_buf, &mv, + ref_frame_buf, is_scaled, ref); + } + } + } + } else { + const MV mv = mi->mv[ref].as_mv; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + struct buf_2d *const dst_buf = &pd->dst; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int n4w_x4 = 4 * num_4x4_w; + const int n4h_x4 = 4 * num_4x4_h; + struct buf_2d *const pre_buf = &pd->pre[ref]; + dec_build_inter_predictors(fwo, xd, plane, n4w_x4, n4h_x4, + 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel, + sf, pre_buf, dst_buf, &mv, + ref_frame_buf, is_scaled, ref); + } + } + } +} + +static INLINE TX_SIZE dec_get_uv_tx_size(const MODE_INFO *mi, + int n4_wl, int n4_hl) { + // get minimum log2 num4x4s dimension + const int x = VPXMIN(n4_wl, n4_hl); + return VPXMIN(mi->tx_size, x); +} + +static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) { + struct macroblockd_plane *const pd = &xd->plane[i]; + memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_w); + memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_h); + } +} + +static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl, + int bhl) { + int i; + for (i = 0; i < MAX_MB_PLANE; i++) { + xd->plane[i].n4_w = (bw << 1) >> xd->plane[i].subsampling_x; + xd->plane[i].n4_h = (bh << 1) >> xd->plane[i].subsampling_y; + xd->plane[i].n4_wl = bwl - xd->plane[i].subsampling_x; + xd->plane[i].n4_hl = bhl - xd->plane[i].subsampling_y; + } +} + +static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, + BLOCK_SIZE bsize, int mi_row, int mi_col, + int bw, int bh, int x_mis, int y_mis, + int bwl, int bhl) { + const int offset = mi_row * cm->mi_stride + mi_col; + int x, y; + const TileInfo *const tile = &xd->tile; + + xd->mi = cm->mi_grid_visible + offset; + xd->mi[0] = &cm->mi[offset]; + // TODO(slavarnway): Generate sb_type based on bwl and bhl, instead of + // passing bsize from decode_partition(). + xd->mi[0]->sb_type = bsize; + for (y = 0; y < y_mis; ++y) + for (x = !y; x < x_mis; ++x) { + xd->mi[y * cm->mi_stride + x] = xd->mi[0]; + } + + set_plane_n4(xd, bw, bh, bwl, bhl); + + set_skip_context(xd, mi_row, mi_col); + + // Distance of Mb to the various image edges. These are specified to 8th pel + // as they are always compared to values that are in 1/8th pel units + set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); + + vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); + return xd->mi[0]; +} + +static void decode_block(VP9Decoder *const pbi, MACROBLOCKD *const xd, + int mi_row, int mi_col, + vpx_reader *r, BLOCK_SIZE bsize, + int bwl, int bhl) { + VP9_COMMON *const cm = &pbi->common; + const int less8x8 = bsize < BLOCK_8X8; + const int bw = 1 << (bwl - 1); + const int bh = 1 << (bhl - 1); + const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); + const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); + + MODE_INFO *mi = set_offsets(cm, xd, bsize, mi_row, mi_col, + bw, bh, x_mis, y_mis, bwl, bhl); + + if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { + const BLOCK_SIZE uv_subsize = + ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; + if (uv_subsize == BLOCK_INVALID) + vpx_internal_error(xd->error_info, + VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); + } + + vp9_read_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); + + if (mi->skip) { + dec_reset_skip_context(xd); + } + + if (!is_inter_block(mi)) { + int plane; + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const TX_SIZE tx_size = + plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl) + : mi->tx_size; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int step = (1 << tx_size); + int row, col; + const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? + 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? + 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; + xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + predict_and_reconstruct_intra_block(xd, r, mi, plane, + row, col, tx_size); + } + } else { + // Prediction + dec_build_inter_predictors_sb(pbi, xd, mi_row, mi_col); + + // Reconstruction + if (!mi->skip) { + int eobtotal = 0; + int plane; + + for (plane = 0; plane < MAX_MB_PLANE; ++plane) { + const struct macroblockd_plane *const pd = &xd->plane[plane]; + const TX_SIZE tx_size = + plane ? dec_get_uv_tx_size(mi, pd->n4_wl, pd->n4_hl) + : mi->tx_size; + const int num_4x4_w = pd->n4_w; + const int num_4x4_h = pd->n4_h; + const int step = (1 << tx_size); + int row, col; + const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? + 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); + const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? + 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); + + xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; + xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; + + for (row = 0; row < max_blocks_high; row += step) + for (col = 0; col < max_blocks_wide; col += step) + eobtotal += reconstruct_inter_block(xd, r, mi, plane, row, col, + tx_size); + } + + if (!less8x8 && eobtotal == 0) + mi->skip = 1; // skip loopfilter + } + } + + xd->corrupted |= vpx_reader_has_error(r); + + if (cm->lf.filter_level) { + vp9_build_mask(cm, mi, mi_row, mi_col, bw, bh); + } +} + +static INLINE int dec_partition_plane_context(const MACROBLOCKD *xd, + int mi_row, int mi_col, + int bsl) { + const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; + const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + int above = (*above_ctx >> bsl) & 1 , left = (*left_ctx >> bsl) & 1; + +// assert(bsl >= 0); + + return (left * 2 + above) + bsl * PARTITION_PLOFFSET; +} + +static INLINE void dec_update_partition_context(MACROBLOCKD *xd, + int mi_row, int mi_col, + BLOCK_SIZE subsize, + int bw) { + PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; + PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); + + // update the partition context at the end notes. set partition bits + // of block sizes larger than the current one to be one, and partition + // bits of smaller block sizes to be zero. + memset(above_ctx, partition_context_lookup[subsize].above, bw); + memset(left_ctx, partition_context_lookup[subsize].left, bw); +} + +static PARTITION_TYPE read_partition(MACROBLOCKD *xd, int mi_row, int mi_col, + vpx_reader *r, + int has_rows, int has_cols, int bsl) { + const int ctx = dec_partition_plane_context(xd, mi_row, mi_col, bsl); + const vpx_prob *const probs = get_partition_probs(xd, ctx); + FRAME_COUNTS *counts = xd->counts; + PARTITION_TYPE p; + + if (has_rows && has_cols) + p = (PARTITION_TYPE)vpx_read_tree(r, vp9_partition_tree, probs); + else if (!has_rows && has_cols) + p = vpx_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; + else if (has_rows && !has_cols) + p = vpx_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT; + else + p = PARTITION_SPLIT; + + if (counts) + ++counts->partition[ctx][p]; + + return p; +} + +// TODO(slavarnway): eliminate bsize and subsize in future commits +static void decode_partition(VP9Decoder *const pbi, MACROBLOCKD *const xd, + int mi_row, int mi_col, + vpx_reader* r, BLOCK_SIZE bsize, int n4x4_l2) { + VP9_COMMON *const cm = &pbi->common; + const int n8x8_l2 = n4x4_l2 - 1; + const int num_8x8_wh = 1 << n8x8_l2; + const int hbs = num_8x8_wh >> 1; + PARTITION_TYPE partition; + BLOCK_SIZE subsize; + const int has_rows = (mi_row + hbs) < cm->mi_rows; + const int has_cols = (mi_col + hbs) < cm->mi_cols; + + if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) + return; + + partition = read_partition(xd, mi_row, mi_col, r, has_rows, has_cols, + n8x8_l2); + subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition); + if (!hbs) { + // calculate bmode block dimensions (log 2) + xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); + xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); + decode_block(pbi, xd, mi_row, mi_col, r, subsize, 1, 1); + } else { + switch (partition) { + case PARTITION_NONE: + decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n4x4_l2); + break; + case PARTITION_HORZ: + decode_block(pbi, xd, mi_row, mi_col, r, subsize, n4x4_l2, n8x8_l2); + if (has_rows) + decode_block(pbi, xd, mi_row + hbs, mi_col, r, subsize, n4x4_l2, + n8x8_l2); + break; + case PARTITION_VERT: + decode_block(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2, n4x4_l2); + if (has_cols) + decode_block(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2, + n4x4_l2); + break; + case PARTITION_SPLIT: + decode_partition(pbi, xd, mi_row, mi_col, r, subsize, n8x8_l2); + decode_partition(pbi, xd, mi_row, mi_col + hbs, r, subsize, n8x8_l2); + decode_partition(pbi, xd, mi_row + hbs, mi_col, r, subsize, n8x8_l2); + decode_partition(pbi, xd, mi_row + hbs, mi_col + hbs, r, subsize, + n8x8_l2); + break; + default: + assert(0 && "Invalid partition type"); + } + } + + // update partition context + if (bsize >= BLOCK_8X8 && + (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) + dec_update_partition_context(xd, mi_row, mi_col, subsize, num_8x8_wh); +} + +static void setup_token_decoder(const uint8_t *data, + const uint8_t *data_end, + size_t read_size, + struct vpx_internal_error_info *error_info, + vpx_reader *r, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) { + // Validate the calculated partition length. If the buffer + // described by the partition can't be fully read, then restrict + // it to the portion that can be (for EC mode) or throw an error. + if (!read_is_valid(data, read_size, data_end)) + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt tile length"); + + if (vpx_reader_init(r, data, read_size, decrypt_cb, decrypt_state)) + vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR, + "Failed to allocate bool decoder %d", 1); +} + +static void read_coef_probs_common(vp9_coeff_probs_model *coef_probs, + vpx_reader *r) { + int i, j, k, l, m; + + if (vpx_read_bit(r)) + for (i = 0; i < PLANE_TYPES; ++i) + for (j = 0; j < REF_TYPES; ++j) + for (k = 0; k < COEF_BANDS; ++k) + for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) + for (m = 0; m < UNCONSTRAINED_NODES; ++m) + vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]); +} + +static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, + vpx_reader *r) { + const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; + TX_SIZE tx_size; + for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) + read_coef_probs_common(fc->coef_probs[tx_size], r); +} + +static void setup_segmentation(struct segmentation *seg, + struct vpx_read_bit_buffer *rb) { + int i, j; + + seg->update_map = 0; + seg->update_data = 0; + + seg->enabled = vpx_rb_read_bit(rb); + if (!seg->enabled) + return; + + // Segmentation map update + seg->update_map = vpx_rb_read_bit(rb); + if (seg->update_map) { + for (i = 0; i < SEG_TREE_PROBS; i++) + seg->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) + : MAX_PROB; + + seg->temporal_update = vpx_rb_read_bit(rb); + if (seg->temporal_update) { + for (i = 0; i < PREDICTION_PROBS; i++) + seg->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) + : MAX_PROB; + } else { + for (i = 0; i < PREDICTION_PROBS; i++) + seg->pred_probs[i] = MAX_PROB; + } + } + + // Segmentation data update + seg->update_data = vpx_rb_read_bit(rb); + if (seg->update_data) { + seg->abs_delta = vpx_rb_read_bit(rb); + + vp9_clearall_segfeatures(seg); + + for (i = 0; i < MAX_SEGMENTS; i++) { + for (j = 0; j < SEG_LVL_MAX; j++) { + int data = 0; + const int feature_enabled = vpx_rb_read_bit(rb); + if (feature_enabled) { + vp9_enable_segfeature(seg, i, j); + data = decode_unsigned_max(rb, vp9_seg_feature_data_max(j)); + if (vp9_is_segfeature_signed(j)) + data = vpx_rb_read_bit(rb) ? -data : data; + } + vp9_set_segdata(seg, i, j, data); + } + } + } +} + +static void setup_loopfilter(struct loopfilter *lf, + struct vpx_read_bit_buffer *rb) { + lf->filter_level = vpx_rb_read_literal(rb, 6); + lf->sharpness_level = vpx_rb_read_literal(rb, 3); + + // Read in loop filter deltas applied at the MB level based on mode or ref + // frame. + lf->mode_ref_delta_update = 0; + + lf->mode_ref_delta_enabled = vpx_rb_read_bit(rb); + if (lf->mode_ref_delta_enabled) { + lf->mode_ref_delta_update = vpx_rb_read_bit(rb); + if (lf->mode_ref_delta_update) { + int i; + + for (i = 0; i < MAX_REF_LF_DELTAS; i++) + if (vpx_rb_read_bit(rb)) + lf->ref_deltas[i] = vpx_rb_read_signed_literal(rb, 6); + + for (i = 0; i < MAX_MODE_LF_DELTAS; i++) + if (vpx_rb_read_bit(rb)) + lf->mode_deltas[i] = vpx_rb_read_signed_literal(rb, 6); + } + } +} + +static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) { + return vpx_rb_read_bit(rb) ? vpx_rb_read_signed_literal(rb, 4) : 0; +} + +static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd, + struct vpx_read_bit_buffer *rb) { + cm->base_qindex = vpx_rb_read_literal(rb, QINDEX_BITS); + cm->y_dc_delta_q = read_delta_q(rb); + cm->uv_dc_delta_q = read_delta_q(rb); + cm->uv_ac_delta_q = read_delta_q(rb); + cm->dequant_bit_depth = cm->bit_depth; + xd->lossless = cm->base_qindex == 0 && + cm->y_dc_delta_q == 0 && + cm->uv_dc_delta_q == 0 && + cm->uv_ac_delta_q == 0; + +#if CONFIG_VP9_HIGHBITDEPTH + xd->bd = (int)cm->bit_depth; +#endif +} + +static void setup_segmentation_dequant(VP9_COMMON *const cm) { + // Build y/uv dequant values based on segmentation. + if (cm->seg.enabled) { + int i; + for (i = 0; i < MAX_SEGMENTS; ++i) { + const int qindex = vp9_get_qindex(&cm->seg, i, cm->base_qindex); + cm->y_dequant[i][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, + cm->bit_depth); + cm->y_dequant[i][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); + cm->uv_dequant[i][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, + cm->bit_depth); + cm->uv_dequant[i][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, + cm->bit_depth); + } + } else { + const int qindex = cm->base_qindex; + // When segmentation is disabled, only the first value is used. The + // remaining are don't cares. + cm->y_dequant[0][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, cm->bit_depth); + cm->y_dequant[0][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); + cm->uv_dequant[0][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, + cm->bit_depth); + cm->uv_dequant[0][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, + cm->bit_depth); + } +} + +static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) { + const INTERP_FILTER literal_to_filter[] = { EIGHTTAP_SMOOTH, + EIGHTTAP, + EIGHTTAP_SHARP, + BILINEAR }; + return vpx_rb_read_bit(rb) ? SWITCHABLE + : literal_to_filter[vpx_rb_read_literal(rb, 2)]; +} + +static void setup_render_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { + cm->render_width = cm->width; + cm->render_height = cm->height; + if (vpx_rb_read_bit(rb)) + vp9_read_frame_size(rb, &cm->render_width, &cm->render_height); +} + +static void resize_mv_buffer(VP9_COMMON *cm) { + vpx_free(cm->cur_frame->mvs); + cm->cur_frame->mi_rows = cm->mi_rows; + cm->cur_frame->mi_cols = cm->mi_cols; + CHECK_MEM_ERROR(cm, cm->cur_frame->mvs, + (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, + sizeof(*cm->cur_frame->mvs))); +} + +static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { +#if CONFIG_SIZE_LIMIT + if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Dimensions of %dx%d beyond allowed size of %dx%d.", + width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT); +#endif + if (cm->width != width || cm->height != height) { + const int new_mi_rows = + ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2; + const int new_mi_cols = + ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2; + + // Allocations in vp9_alloc_context_buffers() depend on individual + // dimensions as well as the overall size. + if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) { + if (vp9_alloc_context_buffers(cm, width, height)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate context buffers"); + } else { + vp9_set_mb_mi(cm, width, height); + } + vp9_init_context_buffers(cm); + cm->width = width; + cm->height = height; + } + if (cm->cur_frame->mvs == NULL || cm->mi_rows > cm->cur_frame->mi_rows || + cm->mi_cols > cm->cur_frame->mi_cols) { + resize_mv_buffer(cm); + } +} + +static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { + int width, height; + BufferPool *const pool = cm->buffer_pool; + vp9_read_frame_size(rb, &width, &height); + resize_context_buffers(cm, width, height); + setup_render_size(cm, rb); + + lock_buffer_pool(pool); + if (vpx_realloc_frame_buffer( + get_frame_new_buffer(cm), cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_DEC_BORDER_IN_PIXELS, + cm->byte_alignment, + &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, + pool->cb_priv)) { + unlock_buffer_pool(pool); + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffer"); + } + unlock_buffer_pool(pool); + + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; + pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; + pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; + pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; + pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; +} + +static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth, + int ref_xss, int ref_yss, + vpx_bit_depth_t this_bit_depth, + int this_xss, int this_yss) { + return ref_bit_depth == this_bit_depth && ref_xss == this_xss && + ref_yss == this_yss; +} + +static void setup_frame_size_with_refs(VP9_COMMON *cm, + struct vpx_read_bit_buffer *rb) { + int width, height; + int found = 0, i; + int has_valid_ref_frame = 0; + BufferPool *const pool = cm->buffer_pool; + for (i = 0; i < REFS_PER_FRAME; ++i) { + if (vpx_rb_read_bit(rb)) { + if (cm->frame_refs[i].idx != INVALID_IDX) { + YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf; + width = buf->y_crop_width; + height = buf->y_crop_height; + found = 1; + break; + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Failed to decode frame size"); + } + } + } + + if (!found) + vp9_read_frame_size(rb, &width, &height); + + if (width <= 0 || height <= 0) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid frame size"); + + // Check to make sure at least one of frames that this frame references + // has valid dimensions. + for (i = 0; i < REFS_PER_FRAME; ++i) { + RefBuffer *const ref_frame = &cm->frame_refs[i]; + has_valid_ref_frame |= (ref_frame->idx != INVALID_IDX && + valid_ref_frame_size(ref_frame->buf->y_crop_width, + ref_frame->buf->y_crop_height, + width, height)); + } + if (!has_valid_ref_frame) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Referenced frame has invalid size"); + for (i = 0; i < REFS_PER_FRAME; ++i) { + RefBuffer *const ref_frame = &cm->frame_refs[i]; + if (ref_frame->idx == INVALID_IDX || + !valid_ref_frame_img_fmt(ref_frame->buf->bit_depth, + ref_frame->buf->subsampling_x, + ref_frame->buf->subsampling_y, + cm->bit_depth, + cm->subsampling_x, + cm->subsampling_y)) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Referenced frame has incompatible color format"); + } + + resize_context_buffers(cm, width, height); + setup_render_size(cm, rb); + + lock_buffer_pool(pool); + if (vpx_realloc_frame_buffer( + get_frame_new_buffer(cm), cm->width, cm->height, + cm->subsampling_x, cm->subsampling_y, +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth, +#endif + VP9_DEC_BORDER_IN_PIXELS, + cm->byte_alignment, + &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, + pool->cb_priv)) { + unlock_buffer_pool(pool); + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate frame buffer"); + } + unlock_buffer_pool(pool); + + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; + pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; + pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; + pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; + pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; + pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; + pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; +} + +static void setup_tile_info(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { + int min_log2_tile_cols, max_log2_tile_cols, max_ones; + vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); + + // columns + max_ones = max_log2_tile_cols - min_log2_tile_cols; + cm->log2_tile_cols = min_log2_tile_cols; + while (max_ones-- && vpx_rb_read_bit(rb)) + cm->log2_tile_cols++; + + if (cm->log2_tile_cols > 6) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid number of tile columns"); + + // rows + cm->log2_tile_rows = vpx_rb_read_bit(rb); + if (cm->log2_tile_rows) + cm->log2_tile_rows += vpx_rb_read_bit(rb); +} + +// Reads the next tile returning its size and adjusting '*data' accordingly +// based on 'is_last'. +static void get_tile_buffer(const uint8_t *const data_end, + int is_last, + struct vpx_internal_error_info *error_info, + const uint8_t **data, + vpx_decrypt_cb decrypt_cb, void *decrypt_state, + TileBuffer *buf) { + size_t size; + + if (!is_last) { + if (!read_is_valid(*data, 4, data_end)) + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt tile length"); + + if (decrypt_cb) { + uint8_t be_data[4]; + decrypt_cb(decrypt_state, *data, be_data, 4); + size = mem_get_be32(be_data); + } else { + size = mem_get_be32(*data); + } + *data += 4; + + if (size > (size_t)(data_end - *data)) + vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt tile size"); + } else { + size = data_end - *data; + } + + buf->data = *data; + buf->size = size; + + *data += size; +} + +static void get_tile_buffers(VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + int tile_cols, int tile_rows, + TileBuffer (*tile_buffers)[1 << 6]) { + int r, c; + + for (r = 0; r < tile_rows; ++r) { + for (c = 0; c < tile_cols; ++c) { + const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1); + TileBuffer *const buf = &tile_buffers[r][c]; + buf->col = c; + get_tile_buffer(data_end, is_last, &pbi->common.error, &data, + pbi->decrypt_cb, pbi->decrypt_state, buf); + } + } +} + +static const uint8_t *decode_tiles(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { + VP9_COMMON *const cm = &pbi->common; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + TileBuffer tile_buffers[4][1 << 6]; + int tile_row, tile_col; + int mi_row, mi_col; + TileWorkerData *tile_data = NULL; + + if (cm->lf.filter_level && !cm->skip_loop_filter && + pbi->lf_worker.data1 == NULL) { + CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, + vpx_memalign(32, sizeof(LFWorkerData))); + pbi->lf_worker.hook = (VPxWorkerHook)vp9_loop_filter_worker; + if (pbi->max_threads > 1 && !winterface->reset(&pbi->lf_worker)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Loop filter thread creation failed"); + } + } + + if (cm->lf.filter_level && !cm->skip_loop_filter) { + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + // Be sure to sync as we might be resuming after a failed frame decode. + winterface->sync(&pbi->lf_worker); + vp9_loop_filter_data_reset(lf_data, get_frame_new_buffer(cm), cm, + pbi->mb.plane); + } + + assert(tile_rows <= 4); + assert(tile_cols <= (1 << 6)); + + // Note: this memset assumes above_context[0], [1] and [2] + // are allocated as part of the same buffer. + memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); + + memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_cols); + + vp9_reset_lfm(cm); + + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); + + // Load all tile information into tile_data. + for (tile_row = 0; tile_row < tile_rows; ++tile_row) { + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + const TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; + tile_data = pbi->tile_worker_data + tile_cols * tile_row + tile_col; + tile_data->xd = pbi->mb; + tile_data->xd.corrupted = 0; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? NULL : &cm->counts; + vp9_zero(tile_data->dqcoeff); + vp9_tile_init(&tile_data->xd.tile, cm, tile_row, tile_col); + setup_token_decoder(buf->data, data_end, buf->size, &cm->error, + &tile_data->bit_reader, pbi->decrypt_cb, + pbi->decrypt_state); + vp9_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); + } + } + + for (tile_row = 0; tile_row < tile_rows; ++tile_row) { + TileInfo tile; + vp9_tile_set_row(&tile, cm, tile_row); + for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; + mi_row += MI_BLOCK_SIZE) { + for (tile_col = 0; tile_col < tile_cols; ++tile_col) { + const int col = pbi->inv_tile_order ? + tile_cols - tile_col - 1 : tile_col; + tile_data = pbi->tile_worker_data + tile_cols * tile_row + col; + vp9_tile_set_col(&tile, cm, col); + vp9_zero(tile_data->xd.left_context); + vp9_zero(tile_data->xd.left_seg_context); + for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; + mi_col += MI_BLOCK_SIZE) { + decode_partition(pbi, &tile_data->xd, mi_row, + mi_col, &tile_data->bit_reader, BLOCK_64X64, 4); + } + pbi->mb.corrupted |= tile_data->xd.corrupted; + if (pbi->mb.corrupted) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Failed to decode tile data"); + } + // Loopfilter one row. + if (cm->lf.filter_level && !cm->skip_loop_filter) { + const int lf_start = mi_row - MI_BLOCK_SIZE; + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + + // delay the loopfilter by 1 macroblock row. + if (lf_start < 0) continue; + + // decoding has completed: finish up the loop filter in this thread. + if (mi_row + MI_BLOCK_SIZE >= cm->mi_rows) continue; + + winterface->sync(&pbi->lf_worker); + lf_data->start = lf_start; + lf_data->stop = mi_row; + if (pbi->max_threads > 1) { + winterface->launch(&pbi->lf_worker); + } else { + winterface->execute(&pbi->lf_worker); + } + } + // After loopfiltering, the last 7 row pixels in each superblock row may + // still be changed by the longest loopfilter of the next superblock + // row. + if (pbi->frame_parallel_decode) + vp9_frameworker_broadcast(pbi->cur_buf, + mi_row << MI_BLOCK_SIZE_LOG2); + } + } + + // Loopfilter remaining rows in the frame. + if (cm->lf.filter_level && !cm->skip_loop_filter) { + LFWorkerData *const lf_data = (LFWorkerData*)pbi->lf_worker.data1; + winterface->sync(&pbi->lf_worker); + lf_data->start = lf_data->stop; + lf_data->stop = cm->mi_rows; + winterface->execute(&pbi->lf_worker); + } + + // Get last tile data. + tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1; + + if (pbi->frame_parallel_decode) + vp9_frameworker_broadcast(pbi->cur_buf, INT_MAX); + return vpx_reader_find_end(&tile_data->bit_reader); +} + +// On entry 'tile_data->data_end' points to the end of the input frame, on exit +// it is updated to reflect the bitreader position of the final tile column if +// present in the tile buffer group or NULL otherwise. +static int tile_worker_hook(TileWorkerData *const tile_data, + VP9Decoder *const pbi) { + TileInfo *volatile tile = &tile_data->xd.tile; + const int final_col = (1 << pbi->common.log2_tile_cols) - 1; + const uint8_t *volatile bit_reader_end = NULL; + volatile int n = tile_data->buf_start; + tile_data->error_info.setjmp = 1; + + if (setjmp(tile_data->error_info.jmp)) { + tile_data->error_info.setjmp = 0; + tile_data->xd.corrupted = 1; + tile_data->data_end = NULL; + return 0; + } + + tile_data->xd.error_info = &tile_data->error_info; + tile_data->xd.corrupted = 0; + + do { + int mi_row, mi_col; + const TileBuffer *const buf = pbi->tile_buffers + n; + vp9_zero(tile_data->dqcoeff); + vp9_tile_init(tile, &pbi->common, 0, buf->col); + setup_token_decoder(buf->data, tile_data->data_end, buf->size, + &tile_data->error_info, &tile_data->bit_reader, + pbi->decrypt_cb, pbi->decrypt_state); + vp9_init_macroblockd(&pbi->common, &tile_data->xd, tile_data->dqcoeff); + + for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; + mi_row += MI_BLOCK_SIZE) { + vp9_zero(tile_data->xd.left_context); + vp9_zero(tile_data->xd.left_seg_context); + for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; + mi_col += MI_BLOCK_SIZE) { + decode_partition(pbi, &tile_data->xd, mi_row, mi_col, + &tile_data->bit_reader, BLOCK_64X64, 4); + } + } + + if (buf->col == final_col) { + bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader); + } + } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end); + + tile_data->data_end = bit_reader_end; + return !tile_data->xd.corrupted; +} + +// sorts in descending order +static int compare_tile_buffers(const void *a, const void *b) { + const TileBuffer *const buf1 = (const TileBuffer*)a; + const TileBuffer *const buf2 = (const TileBuffer*)b; + return (int)(buf2->size - buf1->size); +} + +static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, + const uint8_t *data, + const uint8_t *data_end) { + VP9_COMMON *const cm = &pbi->common; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + const uint8_t *bit_reader_end = NULL; + const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); + const int tile_cols = 1 << cm->log2_tile_cols; + const int tile_rows = 1 << cm->log2_tile_rows; + const int num_workers = VPXMIN(pbi->max_threads, tile_cols); + int n; + + assert(tile_cols <= (1 << 6)); + assert(tile_rows == 1); + (void)tile_rows; + + if (pbi->num_tile_workers == 0) { + const int num_threads = pbi->max_threads; + CHECK_MEM_ERROR(cm, pbi->tile_workers, + vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); + for (n = 0; n < num_threads; ++n) { + VPxWorker *const worker = &pbi->tile_workers[n]; + ++pbi->num_tile_workers; + + winterface->init(worker); + if (n < num_threads - 1 && !winterface->reset(worker)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Tile decoder thread creation failed"); + } + } + } + + // Reset tile decoding hook + for (n = 0; n < num_workers; ++n) { + VPxWorker *const worker = &pbi->tile_workers[n]; + TileWorkerData *const tile_data = + &pbi->tile_worker_data[n + pbi->total_tiles]; + winterface->sync(worker); + tile_data->xd = pbi->mb; + tile_data->xd.counts = + cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; + worker->hook = (VPxWorkerHook)tile_worker_hook; + worker->data1 = tile_data; + worker->data2 = pbi; + } + + // Note: this memset assumes above_context[0], [1] and [2] + // are allocated as part of the same buffer. + memset(cm->above_context, 0, + sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); + memset(cm->above_seg_context, 0, + sizeof(*cm->above_seg_context) * aligned_mi_cols); + + vp9_reset_lfm(cm); + + // Load tile data into tile_buffers + get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, + &pbi->tile_buffers); + + // Sort the buffers based on size in descending order. + qsort(pbi->tile_buffers, tile_cols, sizeof(pbi->tile_buffers[0]), + compare_tile_buffers); + + if (num_workers == tile_cols) { + // Rearrange the tile buffers such that the largest, and + // presumably the most difficult, tile will be decoded in the main thread. + // This should help minimize the number of instances where the main thread + // is waiting for a worker to complete. + const TileBuffer largest = pbi->tile_buffers[0]; + memmove(pbi->tile_buffers, pbi->tile_buffers + 1, + (tile_cols - 1) * sizeof(pbi->tile_buffers[0])); + pbi->tile_buffers[tile_cols - 1] = largest; + } else { + int start = 0, end = tile_cols - 2; + TileBuffer tmp; + + // Interleave the tiles to distribute the load between threads, assuming a + // larger tile implies it is more difficult to decode. + while (start < end) { + tmp = pbi->tile_buffers[start]; + pbi->tile_buffers[start] = pbi->tile_buffers[end]; + pbi->tile_buffers[end] = tmp; + start += 2; + end -= 2; + } + } + + // Initialize thread frame counts. + if (!cm->frame_parallel_decoding_mode) { + for (n = 0; n < num_workers; ++n) { + TileWorkerData *const tile_data = + (TileWorkerData*)pbi->tile_workers[n].data1; + vp9_zero(tile_data->counts); + } + } + + { + const int base = tile_cols / num_workers; + const int remain = tile_cols % num_workers; + int buf_start = 0; + + for (n = 0; n < num_workers; ++n) { + const int count = base + (remain + n) / num_workers; + VPxWorker *const worker = &pbi->tile_workers[n]; + TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; + + tile_data->buf_start = buf_start; + tile_data->buf_end = buf_start + count - 1; + tile_data->data_end = data_end; + buf_start += count; + + worker->had_error = 0; + if (n == num_workers - 1) { + assert(tile_data->buf_end == tile_cols - 1); + winterface->execute(worker); + } else { + winterface->launch(worker); + } + } + + for (; n > 0; --n) { + VPxWorker *const worker = &pbi->tile_workers[n - 1]; + TileWorkerData *const tile_data = (TileWorkerData*)worker->data1; + // TODO(jzern): The tile may have specific error data associated with + // its vpx_internal_error_info which could be propagated to the main info + // in cm. Additionally once the threads have been synced and an error is + // detected, there's no point in continuing to decode tiles. + pbi->mb.corrupted |= !winterface->sync(worker); + if (!bit_reader_end) bit_reader_end = tile_data->data_end; + } + } + + // Accumulate thread frame counts. + if (!cm->frame_parallel_decoding_mode) { + for (n = 0; n < num_workers; ++n) { + TileWorkerData *const tile_data = + (TileWorkerData*)pbi->tile_workers[n].data1; + vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1); + } + } + + assert(bit_reader_end || pbi->mb.corrupted); + return bit_reader_end; +} + +static void error_handler(void *data) { + VP9_COMMON *const cm = (VP9_COMMON *)data; + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); +} + +static void read_bitdepth_colorspace_sampling( + VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { + if (cm->profile >= PROFILE_2) { + cm->bit_depth = vpx_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10; +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth = 1; +#endif + } else { + cm->bit_depth = VPX_BITS_8; +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth = 0; +#endif + } + cm->color_space = vpx_rb_read_literal(rb, 3); + if (cm->color_space != VPX_CS_SRGB) { + cm->color_range = (vpx_color_range_t)vpx_rb_read_bit(rb); + if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { + cm->subsampling_x = vpx_rb_read_bit(rb); + cm->subsampling_y = vpx_rb_read_bit(rb); + if (cm->subsampling_x == 1 && cm->subsampling_y == 1) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "4:2:0 color not supported in profile 1 or 3"); + if (vpx_rb_read_bit(rb)) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Reserved bit set"); + } else { + cm->subsampling_y = cm->subsampling_x = 1; + } + } else { + cm->color_range = VPX_CR_FULL_RANGE; + if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { + // Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed. + // 4:2:2 or 4:4:0 chroma sampling is not allowed. + cm->subsampling_y = cm->subsampling_x = 0; + if (vpx_rb_read_bit(rb)) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Reserved bit set"); + } else { + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "4:4:4 color not supported in profile 0 or 2"); + } + } +} + +static size_t read_uncompressed_header(VP9Decoder *pbi, + struct vpx_read_bit_buffer *rb) { + VP9_COMMON *const cm = &pbi->common; + BufferPool *const pool = cm->buffer_pool; + RefCntBuffer *const frame_bufs = pool->frame_bufs; + int i, mask, ref_index = 0; + size_t sz; + + cm->last_frame_type = cm->frame_type; + cm->last_intra_only = cm->intra_only; + + if (vpx_rb_read_literal(rb, 2) != VP9_FRAME_MARKER) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid frame marker"); + + cm->profile = vp9_read_profile(rb); +#if CONFIG_VP9_HIGHBITDEPTH + if (cm->profile >= MAX_PROFILES) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Unsupported bitstream profile"); +#else + if (cm->profile >= PROFILE_2) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Unsupported bitstream profile"); +#endif + + cm->show_existing_frame = vpx_rb_read_bit(rb); + if (cm->show_existing_frame) { + // Show an existing frame directly. + const int frame_to_show = cm->ref_frame_map[vpx_rb_read_literal(rb, 3)]; + lock_buffer_pool(pool); + if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) { + unlock_buffer_pool(pool); + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Buffer %d does not contain a decoded frame", + frame_to_show); + } + + ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show); + unlock_buffer_pool(pool); + pbi->refresh_frame_flags = 0; + cm->lf.filter_level = 0; + cm->show_frame = 1; + + if (pbi->frame_parallel_decode) { + for (i = 0; i < REF_FRAMES; ++i) + cm->next_ref_frame_map[i] = cm->ref_frame_map[i]; + } + return 0; + } + + cm->frame_type = (FRAME_TYPE) vpx_rb_read_bit(rb); + cm->show_frame = vpx_rb_read_bit(rb); + cm->error_resilient_mode = vpx_rb_read_bit(rb); + + if (cm->frame_type == KEY_FRAME) { + if (!vp9_read_sync_code(rb)) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid frame sync code"); + + read_bitdepth_colorspace_sampling(cm, rb); + pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1; + + for (i = 0; i < REFS_PER_FRAME; ++i) { + cm->frame_refs[i].idx = INVALID_IDX; + cm->frame_refs[i].buf = NULL; + } + + setup_frame_size(cm, rb); + if (pbi->need_resync) { + memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + pbi->need_resync = 0; + } + } else { + cm->intra_only = cm->show_frame ? 0 : vpx_rb_read_bit(rb); + + cm->reset_frame_context = cm->error_resilient_mode ? + 0 : vpx_rb_read_literal(rb, 2); + + if (cm->intra_only) { + if (!vp9_read_sync_code(rb)) + vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid frame sync code"); + if (cm->profile > PROFILE_0) { + read_bitdepth_colorspace_sampling(cm, rb); + } else { + // NOTE: The intra-only frame header does not include the specification + // of either the color format or color sub-sampling in profile 0. VP9 + // specifies that the default color format should be YUV 4:2:0 in this + // case (normative). + cm->color_space = VPX_CS_BT_601; + cm->color_range = VPX_CR_STUDIO_RANGE; + cm->subsampling_y = cm->subsampling_x = 1; + cm->bit_depth = VPX_BITS_8; +#if CONFIG_VP9_HIGHBITDEPTH + cm->use_highbitdepth = 0; +#endif + } + + pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES); + setup_frame_size(cm, rb); + if (pbi->need_resync) { + memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + pbi->need_resync = 0; + } + } else if (pbi->need_resync != 1) { /* Skip if need resync */ + pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES); + for (i = 0; i < REFS_PER_FRAME; ++i) { + const int ref = vpx_rb_read_literal(rb, REF_FRAMES_LOG2); + const int idx = cm->ref_frame_map[ref]; + RefBuffer *const ref_frame = &cm->frame_refs[i]; + ref_frame->idx = idx; + ref_frame->buf = &frame_bufs[idx].buf; + cm->ref_frame_sign_bias[LAST_FRAME + i] = vpx_rb_read_bit(rb); + } + + setup_frame_size_with_refs(cm, rb); + + cm->allow_high_precision_mv = vpx_rb_read_bit(rb); + cm->interp_filter = read_interp_filter(rb); + + for (i = 0; i < REFS_PER_FRAME; ++i) { + RefBuffer *const ref_buf = &cm->frame_refs[i]; +#if CONFIG_VP9_HIGHBITDEPTH + vp9_setup_scale_factors_for_frame(&ref_buf->sf, + ref_buf->buf->y_crop_width, + ref_buf->buf->y_crop_height, + cm->width, cm->height, + cm->use_highbitdepth); +#else + vp9_setup_scale_factors_for_frame(&ref_buf->sf, + ref_buf->buf->y_crop_width, + ref_buf->buf->y_crop_height, + cm->width, cm->height); +#endif + } + } + } +#if CONFIG_VP9_HIGHBITDEPTH + get_frame_new_buffer(cm)->bit_depth = cm->bit_depth; +#endif + get_frame_new_buffer(cm)->color_space = cm->color_space; + get_frame_new_buffer(cm)->color_range = cm->color_range; + get_frame_new_buffer(cm)->render_width = cm->render_width; + get_frame_new_buffer(cm)->render_height = cm->render_height; + + if (pbi->need_resync) { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Keyframe / intra-only frame required to reset decoder" + " state"); + } + + if (!cm->error_resilient_mode) { + cm->refresh_frame_context = vpx_rb_read_bit(rb); + cm->frame_parallel_decoding_mode = vpx_rb_read_bit(rb); + if (!cm->frame_parallel_decoding_mode) + vp9_zero(cm->counts); + } else { + cm->refresh_frame_context = 0; + cm->frame_parallel_decoding_mode = 1; + } + + // This flag will be overridden by the call to vp9_setup_past_independence + // below, forcing the use of context 0 for those frame types. + cm->frame_context_idx = vpx_rb_read_literal(rb, FRAME_CONTEXTS_LOG2); + + // Generate next_ref_frame_map. + lock_buffer_pool(pool); + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { + if (mask & 1) { + cm->next_ref_frame_map[ref_index] = cm->new_fb_idx; + ++frame_bufs[cm->new_fb_idx].ref_count; + } else { + cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; + } + // Current thread holds the reference frame. + if (cm->ref_frame_map[ref_index] >= 0) + ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; + ++ref_index; + } + + for (; ref_index < REF_FRAMES; ++ref_index) { + cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; + // Current thread holds the reference frame. + if (cm->ref_frame_map[ref_index] >= 0) + ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; + } + unlock_buffer_pool(pool); + pbi->hold_ref_buf = 1; + + if (frame_is_intra_only(cm) || cm->error_resilient_mode) + vp9_setup_past_independence(cm); + + setup_loopfilter(&cm->lf, rb); + setup_quantization(cm, &pbi->mb, rb); + setup_segmentation(&cm->seg, rb); + setup_segmentation_dequant(cm); + + setup_tile_info(cm, rb); + sz = vpx_rb_read_literal(rb, 16); + + if (sz == 0) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Invalid header size"); + + return sz; +} + +static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, + size_t partition_size) { + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + FRAME_CONTEXT *const fc = cm->fc; + vpx_reader r; + int k; + + if (vpx_reader_init(&r, data, partition_size, pbi->decrypt_cb, + pbi->decrypt_state)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to allocate bool decoder 0"); + + cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r); + if (cm->tx_mode == TX_MODE_SELECT) + read_tx_mode_probs(&fc->tx_probs, &r); + read_coef_probs(fc, cm->tx_mode, &r); + + for (k = 0; k < SKIP_CONTEXTS; ++k) + vp9_diff_update_prob(&r, &fc->skip_probs[k]); + + if (!frame_is_intra_only(cm)) { + nmv_context *const nmvc = &fc->nmvc; + int i, j; + + read_inter_mode_probs(fc, &r); + + if (cm->interp_filter == SWITCHABLE) + read_switchable_interp_probs(fc, &r); + + for (i = 0; i < INTRA_INTER_CONTEXTS; i++) + vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]); + + cm->reference_mode = read_frame_reference_mode(cm, &r); + if (cm->reference_mode != SINGLE_REFERENCE) + setup_compound_reference_mode(cm); + read_frame_reference_mode_probs(cm, &r); + + for (j = 0; j < BLOCK_SIZE_GROUPS; j++) + for (i = 0; i < INTRA_MODES - 1; ++i) + vp9_diff_update_prob(&r, &fc->y_mode_prob[j][i]); + + for (j = 0; j < PARTITION_CONTEXTS; ++j) + for (i = 0; i < PARTITION_TYPES - 1; ++i) + vp9_diff_update_prob(&r, &fc->partition_prob[j][i]); + + read_mv_probs(nmvc, cm->allow_high_precision_mv, &r); + } + + return vpx_reader_has_error(&r); +} + +static struct vpx_read_bit_buffer *init_read_bit_buffer( + VP9Decoder *pbi, + struct vpx_read_bit_buffer *rb, + const uint8_t *data, + const uint8_t *data_end, + uint8_t clear_data[MAX_VP9_HEADER_SIZE]) { + rb->bit_offset = 0; + rb->error_handler = error_handler; + rb->error_handler_data = &pbi->common; + if (pbi->decrypt_cb) { + const int n = (int)VPXMIN(MAX_VP9_HEADER_SIZE, data_end - data); + pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n); + rb->bit_buffer = clear_data; + rb->bit_buffer_end = clear_data + n; + } else { + rb->bit_buffer = data; + rb->bit_buffer_end = data_end; + } + return rb; +} + +//------------------------------------------------------------------------------ + +int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb) { + return vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 && + vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 && + vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2; +} + +void vp9_read_frame_size(struct vpx_read_bit_buffer *rb, + int *width, int *height) { + *width = vpx_rb_read_literal(rb, 16) + 1; + *height = vpx_rb_read_literal(rb, 16) + 1; +} + +BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb) { + int profile = vpx_rb_read_bit(rb); + profile |= vpx_rb_read_bit(rb) << 1; + if (profile > 2) + profile += vpx_rb_read_bit(rb); + return (BITSTREAM_PROFILE) profile; +} + +void vp9_decode_frame(VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + const uint8_t **p_data_end) { + VP9_COMMON *const cm = &pbi->common; + MACROBLOCKD *const xd = &pbi->mb; + struct vpx_read_bit_buffer rb; + int context_updated = 0; + uint8_t clear_data[MAX_VP9_HEADER_SIZE]; + const size_t first_partition_size = read_uncompressed_header(pbi, + init_read_bit_buffer(pbi, &rb, data, data_end, clear_data)); + const int tile_rows = 1 << cm->log2_tile_rows; + const int tile_cols = 1 << cm->log2_tile_cols; + YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); + xd->cur_buf = new_fb; + + if (!first_partition_size) { + // showing a frame directly + *p_data_end = data + (cm->profile <= PROFILE_2 ? 1 : 2); + return; + } + + data += vpx_rb_bytes_read(&rb); + if (!read_is_valid(data, first_partition_size, data_end)) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Truncated packet or corrupt header length"); + + cm->use_prev_frame_mvs = !cm->error_resilient_mode && + cm->width == cm->last_width && + cm->height == cm->last_height && + !cm->last_intra_only && + cm->last_show_frame && + (cm->last_frame_type != KEY_FRAME); + + vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); + + *cm->fc = cm->frame_contexts[cm->frame_context_idx]; + if (!cm->fc->initialized) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Uninitialized entropy context."); + + xd->corrupted = 0; + new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); + if (new_fb->corrupted) + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Decode failed. Frame data header is corrupted."); + + if (cm->lf.filter_level && !cm->skip_loop_filter) { + vp9_loop_filter_frame_init(cm, cm->lf.filter_level); + } + + // If encoded in frame parallel mode, frame context is ready after decoding + // the frame header. + if (pbi->frame_parallel_decode && cm->frame_parallel_decoding_mode) { + VPxWorker *const worker = pbi->frame_worker_owner; + FrameWorkerData *const frame_worker_data = worker->data1; + if (cm->refresh_frame_context) { + context_updated = 1; + cm->frame_contexts[cm->frame_context_idx] = *cm->fc; + } + vp9_frameworker_lock_stats(worker); + pbi->cur_buf->row = -1; + pbi->cur_buf->col = -1; + frame_worker_data->frame_context_ready = 1; + // Signal the main thread that context is ready. + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); + } + + if (pbi->tile_worker_data == NULL || + (tile_cols * tile_rows) != pbi->total_tiles) { + const int num_tile_workers = tile_cols * tile_rows + + ((pbi->max_threads > 1) ? pbi->max_threads : 0); + const size_t twd_size = num_tile_workers * sizeof(*pbi->tile_worker_data); + // Ensure tile data offsets will be properly aligned. This may fail on + // platforms without DECLARE_ALIGNED(). + assert((sizeof(*pbi->tile_worker_data) % 16) == 0); + vpx_free(pbi->tile_worker_data); + CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, twd_size)); + pbi->total_tiles = tile_rows * tile_cols; + } + + if (pbi->max_threads > 1 && tile_rows == 1 && tile_cols > 1) { + // Multi-threaded tile decoder + *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); + if (!xd->corrupted) { + if (!cm->skip_loop_filter) { + // If multiple threads are used to decode tiles, then we use those + // threads to do parallel loopfiltering. + vp9_loop_filter_frame_mt(new_fb, cm, pbi->mb.plane, + cm->lf.filter_level, 0, 0, pbi->tile_workers, + pbi->num_tile_workers, &pbi->lf_row_sync); + } + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Decode failed. Frame data is corrupted."); + } + } else { + *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); + } + + if (!xd->corrupted) { + if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { + vp9_adapt_coef_probs(cm); + + if (!frame_is_intra_only(cm)) { + vp9_adapt_mode_probs(cm); + vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); + } + } + } else { + vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, + "Decode failed. Frame data is corrupted."); + } + + // Non frame parallel update frame context here. + if (cm->refresh_frame_context && !context_updated) + cm->frame_contexts[cm->frame_context_idx] = *cm->fc; +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h new file mode 100644 index 0000000000..ce33cbdbd9 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decodeframe.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_DECODER_VP9_DECODEFRAME_H_ +#define VP9_DECODER_VP9_DECODEFRAME_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "vp9/common/vp9_enums.h" + +struct VP9Decoder; +struct vpx_read_bit_buffer; + +int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb); +void vp9_read_frame_size(struct vpx_read_bit_buffer *rb, + int *width, int *height); +BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb); + +void vp9_decode_frame(struct VP9Decoder *pbi, + const uint8_t *data, const uint8_t *data_end, + const uint8_t **p_data_end); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DECODEFRAME_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c new file mode 100644 index 0000000000..ffc6839ad1 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.c @@ -0,0 +1,911 @@ +/* + Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> + +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_entropy.h" +#include "vp9/common/vp9_entropymode.h" +#include "vp9/common/vp9_entropymv.h" +#include "vp9/common/vp9_mvref_common.h" +#include "vp9/common/vp9_pred_common.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/common/vp9_seg_common.h" + +#include "vp9/decoder/vp9_decodemv.h" +#include "vp9/decoder/vp9_decodeframe.h" + +#include "vpx_dsp/vpx_dsp_common.h" + +static PREDICTION_MODE read_intra_mode(vpx_reader *r, const vpx_prob *p) { + return (PREDICTION_MODE)vpx_read_tree(r, vp9_intra_mode_tree, p); +} + +static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, MACROBLOCKD *xd, + vpx_reader *r, int size_group) { + const PREDICTION_MODE y_mode = + read_intra_mode(r, cm->fc->y_mode_prob[size_group]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->y_mode[size_group][y_mode]; + return y_mode; +} + +static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, MACROBLOCKD *xd, + vpx_reader *r, + PREDICTION_MODE y_mode) { + const PREDICTION_MODE uv_mode = read_intra_mode(r, + cm->fc->uv_mode_prob[y_mode]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->uv_mode[y_mode][uv_mode]; + return uv_mode; +} + +static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, MACROBLOCKD *xd, + vpx_reader *r, int ctx) { + const int mode = vpx_read_tree(r, vp9_inter_mode_tree, + cm->fc->inter_mode_probs[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->inter_mode[ctx][mode]; + + return NEARESTMV + mode; +} + +static int read_segment_id(vpx_reader *r, const struct segmentation *seg) { + return vpx_read_tree(r, vp9_segment_tree, seg->tree_probs); +} + +static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, + TX_SIZE max_tx_size, vpx_reader *r) { + FRAME_COUNTS *counts = xd->counts; + const int ctx = get_tx_size_context(xd); + const vpx_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs); + int tx_size = vpx_read(r, tx_probs[0]); + if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { + tx_size += vpx_read(r, tx_probs[1]); + if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) + tx_size += vpx_read(r, tx_probs[2]); + } + + if (counts) + ++get_tx_counts(max_tx_size, ctx, &counts->tx)[tx_size]; + return (TX_SIZE)tx_size; +} + +static INLINE TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, + int allow_select, vpx_reader *r) { + TX_MODE tx_mode = cm->tx_mode; + BLOCK_SIZE bsize = xd->mi[0]->sb_type; + const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; + if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) + return read_selected_tx_size(cm, xd, max_tx_size, r); + else + return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]); +} + +static int dec_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids, + int mi_offset, int x_mis, int y_mis) { + int x, y, segment_id = INT_MAX; + + for (y = 0; y < y_mis; y++) + for (x = 0; x < x_mis; x++) + segment_id = + VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); + + assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); + return segment_id; +} + +static void set_segment_id(VP9_COMMON *cm, int mi_offset, + int x_mis, int y_mis, int segment_id) { + int x, y; + + assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); + + for (y = 0; y < y_mis; y++) + for (x = 0; x < x_mis; x++) + cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id; +} + +static void copy_segment_id(const VP9_COMMON *cm, + const uint8_t *last_segment_ids, + uint8_t *current_segment_ids, + int mi_offset, int x_mis, int y_mis) { + int x, y; + + for (y = 0; y < y_mis; y++) + for (x = 0; x < x_mis; x++) + current_segment_ids[mi_offset + y * cm->mi_cols + x] = last_segment_ids ? + last_segment_ids[mi_offset + y * cm->mi_cols + x] : 0; +} + +static int read_intra_segment_id(VP9_COMMON *const cm, int mi_offset, + int x_mis, int y_mis, + vpx_reader *r) { + struct segmentation *const seg = &cm->seg; + int segment_id; + + if (!seg->enabled) + return 0; // Default for disabled segmentation + + if (!seg->update_map) { + copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map, + mi_offset, x_mis, y_mis); + return 0; + } + + segment_id = read_segment_id(r, seg); + set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); + return segment_id; +} + +static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { + struct segmentation *const seg = &cm->seg; + MODE_INFO *const mi = xd->mi[0]; + int predicted_segment_id, segment_id; + const int mi_offset = mi_row * cm->mi_cols + mi_col; + + if (!seg->enabled) + return 0; // Default for disabled segmentation + + predicted_segment_id = cm->last_frame_seg_map ? + dec_get_segment_id(cm, cm->last_frame_seg_map, mi_offset, x_mis, y_mis) : + 0; + + if (!seg->update_map) { + copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map, + mi_offset, x_mis, y_mis); + return predicted_segment_id; + } + + if (seg->temporal_update) { + const vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd); + mi->seg_id_predicted = vpx_read(r, pred_prob); + segment_id = mi->seg_id_predicted ? predicted_segment_id + : read_segment_id(r, seg); + } else { + segment_id = read_segment_id(r, seg); + } + set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); + return segment_id; +} + +static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, + int segment_id, vpx_reader *r) { + if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { + return 1; + } else { + const int ctx = vp9_get_skip_context(xd); + const int skip = vpx_read(r, cm->fc->skip_probs[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->skip[ctx][skip]; + return skip; + } +} + +static void read_intra_frame_mode_info(VP9_COMMON *const cm, + MACROBLOCKD *const xd, + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { + MODE_INFO *const mi = xd->mi[0]; + const MODE_INFO *above_mi = xd->above_mi; + const MODE_INFO *left_mi = xd->left_mi; + const BLOCK_SIZE bsize = mi->sb_type; + int i; + const int mi_offset = mi_row * cm->mi_cols + mi_col; + + mi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r); + mi->skip = read_skip(cm, xd, mi->segment_id, r); + mi->tx_size = read_tx_size(cm, xd, 1, r); + mi->ref_frame[0] = INTRA_FRAME; + mi->ref_frame[1] = NONE; + + switch (bsize) { + case BLOCK_4X4: + for (i = 0; i < 4; ++i) + mi->bmi[i].as_mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i)); + mi->mode = mi->bmi[3].as_mode; + break; + case BLOCK_4X8: + mi->bmi[0].as_mode = mi->bmi[2].as_mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); + mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1)); + break; + case BLOCK_8X4: + mi->bmi[0].as_mode = mi->bmi[1].as_mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); + mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = + read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2)); + break; + default: + mi->mode = read_intra_mode(r, + get_y_mode_probs(mi, above_mi, left_mi, 0)); + } + + mi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mi->mode]); +} + +static int read_mv_component(vpx_reader *r, + const nmv_component *mvcomp, int usehp) { + int mag, d, fr, hp; + const int sign = vpx_read(r, mvcomp->sign); + const int mv_class = vpx_read_tree(r, vp9_mv_class_tree, mvcomp->classes); + const int class0 = mv_class == MV_CLASS_0; + + // Integer part + if (class0) { + d = vpx_read_tree(r, vp9_mv_class0_tree, mvcomp->class0); + mag = 0; + } else { + int i; + const int n = mv_class + CLASS0_BITS - 1; // number of bits + + d = 0; + for (i = 0; i < n; ++i) + d |= vpx_read(r, mvcomp->bits[i]) << i; + mag = CLASS0_SIZE << (mv_class + 2); + } + + // Fractional part + fr = vpx_read_tree(r, vp9_mv_fp_tree, class0 ? mvcomp->class0_fp[d] + : mvcomp->fp); + + // High precision part (if hp is not used, the default value of the hp is 1) + hp = usehp ? vpx_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp) + : 1; + + // Result + mag += ((d << 3) | (fr << 1) | hp) + 1; + return sign ? -mag : mag; +} + +static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref, + const nmv_context *ctx, + nmv_context_counts *counts, int allow_hp) { + const MV_JOINT_TYPE joint_type = + (MV_JOINT_TYPE)vpx_read_tree(r, vp9_mv_joint_tree, ctx->joints); + const int use_hp = allow_hp && use_mv_hp(ref); + MV diff = {0, 0}; + + if (mv_joint_vertical(joint_type)) + diff.row = read_mv_component(r, &ctx->comps[0], use_hp); + + if (mv_joint_horizontal(joint_type)) + diff.col = read_mv_component(r, &ctx->comps[1], use_hp); + + vp9_inc_mv(&diff, counts); + + mv->row = ref->row + diff.row; + mv->col = ref->col + diff.col; +} + +static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm, + const MACROBLOCKD *xd, + vpx_reader *r) { + if (cm->reference_mode == REFERENCE_MODE_SELECT) { + const int ctx = vp9_get_reference_mode_context(cm, xd); + const REFERENCE_MODE mode = + (REFERENCE_MODE)vpx_read(r, cm->fc->comp_inter_prob[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->comp_inter[ctx][mode]; + return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE + } else { + return cm->reference_mode; + } +} + +// Read the referncence frame +static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, + vpx_reader *r, + int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { + FRAME_CONTEXT *const fc = cm->fc; + FRAME_COUNTS *counts = xd->counts; + + if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { + ref_frame[0] = (MV_REFERENCE_FRAME)get_segdata(&cm->seg, segment_id, + SEG_LVL_REF_FRAME); + ref_frame[1] = NONE; + } else { + const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r); + // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding + if (mode == COMPOUND_REFERENCE) { + const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; + const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); + const int bit = vpx_read(r, fc->comp_ref_prob[ctx]); + if (counts) + ++counts->comp_ref[ctx][bit]; + ref_frame[idx] = cm->comp_fixed_ref; + ref_frame[!idx] = cm->comp_var_ref[bit]; + } else if (mode == SINGLE_REFERENCE) { + const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); + const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]); + if (counts) + ++counts->single_ref[ctx0][0][bit0]; + if (bit0) { + const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); + const int bit1 = vpx_read(r, fc->single_ref_prob[ctx1][1]); + if (counts) + ++counts->single_ref[ctx1][1][bit1]; + ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; + } else { + ref_frame[0] = LAST_FRAME; + } + + ref_frame[1] = NONE; + } else { + assert(0 && "Invalid prediction mode."); + } + } +} + +// TODO(slavarnway): Move this decoder version of +// vp9_get_pred_context_switchable_interp() to vp9_pred_common.h and update the +// encoder. +// +// Returns a context number for the given MB prediction signal +static int dec_get_pred_context_switchable_interp(const MACROBLOCKD *xd) { + // Note: + // The mode info data structure has a one element border above and to the + // left of the entries corresponding to real macroblocks. + // The prediction flags in these dummy entries are initialized to 0. + const MODE_INFO *const left_mi = xd->left_mi; + const int left_type = left_mi ? left_mi->interp_filter : SWITCHABLE_FILTERS; + const MODE_INFO *const above_mi = xd->above_mi; + const int above_type = above_mi ? above_mi->interp_filter + : SWITCHABLE_FILTERS; + + if (left_type == above_type) + return left_type; + else if (left_type == SWITCHABLE_FILTERS) + return above_type; + else if (above_type == SWITCHABLE_FILTERS) + return left_type; + else + return SWITCHABLE_FILTERS; +} + +static INLINE INTERP_FILTER read_switchable_interp_filter( + VP9_COMMON *const cm, MACROBLOCKD *const xd, + vpx_reader *r) { + const int ctx = dec_get_pred_context_switchable_interp(xd); + const INTERP_FILTER type = + (INTERP_FILTER)vpx_read_tree(r, vp9_switchable_interp_tree, + cm->fc->switchable_interp_prob[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->switchable_interp[ctx][type]; + return type; +} + +static void read_intra_block_mode_info(VP9_COMMON *const cm, + MACROBLOCKD *const xd, MODE_INFO *mi, + vpx_reader *r) { + const BLOCK_SIZE bsize = mi->sb_type; + int i; + + switch (bsize) { + case BLOCK_4X4: + for (i = 0; i < 4; ++i) + mi->bmi[i].as_mode = read_intra_mode_y(cm, xd, r, 0); + mi->mode = mi->bmi[3].as_mode; + break; + case BLOCK_4X8: + mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, xd, + r, 0); + mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = + read_intra_mode_y(cm, xd, r, 0); + break; + case BLOCK_8X4: + mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, xd, + r, 0); + mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = + read_intra_mode_y(cm, xd, r, 0); + break; + default: + mi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]); + } + + mi->uv_mode = read_intra_mode_uv(cm, xd, r, mi->mode); + + // Initialize interp_filter here so we do not have to check for inter block + // modes in dec_get_pred_context_switchable_interp() + mi->interp_filter = SWITCHABLE_FILTERS; + + mi->ref_frame[0] = INTRA_FRAME; + mi->ref_frame[1] = NONE; +} + +static INLINE int is_mv_valid(const MV *mv) { + return mv->row > MV_LOW && mv->row < MV_UPP && + mv->col > MV_LOW && mv->col < MV_UPP; +} + +static INLINE void copy_mv_pair(int_mv *dst, const int_mv *src) { + memcpy(dst, src, sizeof(*dst) * 2); +} + +static INLINE void zero_mv_pair(int_mv *dst) { + memset(dst, 0, sizeof(*dst) * 2); +} + +static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd, + PREDICTION_MODE mode, + int_mv mv[2], int_mv ref_mv[2], + int_mv near_nearest_mv[2], + int is_compound, int allow_hp, vpx_reader *r) { + int i; + int ret = 1; + + switch (mode) { + case NEWMV: { + FRAME_COUNTS *counts = xd->counts; + nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL; + for (i = 0; i < 1 + is_compound; ++i) { + read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts, + allow_hp); + ret = ret && is_mv_valid(&mv[i].as_mv); + } + break; + } + case NEARMV: + case NEARESTMV: { + copy_mv_pair(mv, near_nearest_mv); + break; + } + case ZEROMV: { + zero_mv_pair(mv); + break; + } + default: { + return 0; + } + } + return ret; +} + +static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, + int segment_id, vpx_reader *r) { + if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { + return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME; + } else { + const int ctx = get_intra_inter_context(xd); + const int is_inter = vpx_read(r, cm->fc->intra_inter_prob[ctx]); + FRAME_COUNTS *counts = xd->counts; + if (counts) + ++counts->intra_inter[ctx][is_inter]; + return is_inter; + } +} + +static void dec_find_best_ref_mvs(int allow_hp, int_mv *mvlist, int_mv *best_mv, + int refmv_count) { + int i; + + // Make sure all the candidates are properly clamped etc + for (i = 0; i < refmv_count; ++i) { + lower_mv_precision(&mvlist[i].as_mv, allow_hp); + *best_mv = mvlist[i]; + } +} + +static void fpm_sync(void *const data, int mi_row) { + VP9Decoder *const pbi = (VP9Decoder *)data; + vp9_frameworker_wait(pbi->frame_worker_owner, pbi->common.prev_frame, + mi_row << MI_BLOCK_SIZE_LOG2); +} + +// This macro is used to add a motion vector mv_ref list if it isn't +// already in the list. If it's the second motion vector or early_break +// it will also skip all additional processing and jump to Done! +#define ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done) \ + do { \ + if (refmv_count) { \ + if ((mv).as_int != (mv_ref_list)[0].as_int) { \ + (mv_ref_list)[(refmv_count)] = (mv); \ + refmv_count++; \ + goto Done; \ + } \ + } else { \ + (mv_ref_list)[(refmv_count)++] = (mv); \ + if (early_break) \ + goto Done; \ + } \ + } while (0) + +// If either reference frame is different, not INTRA, and they +// are different from each other scale and add the mv to our list. +#define IF_DIFF_REF_FRAME_ADD_MV_EB(mbmi, ref_frame, ref_sign_bias, \ + refmv_count, mv_ref_list, Done) \ + do { \ + if (is_inter_block(mbmi)) { \ + if ((mbmi)->ref_frame[0] != ref_frame) \ + ADD_MV_REF_LIST_EB(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, Done); \ + if (has_second_ref(mbmi) && \ + (mbmi)->ref_frame[1] != ref_frame && \ + (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ + ADD_MV_REF_LIST_EB(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ + refmv_count, mv_ref_list, Done); \ + } \ + } while (0) + +// This function searches the neighborhood of a given MB/SB +// to try and find candidate reference vectors. +static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, + PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, + const POSITION *const mv_ref_search, + int_mv *mv_ref_list, + int mi_row, int mi_col, int block, int is_sub8x8, + find_mv_refs_sync sync, void *const data) { + const int *ref_sign_bias = cm->ref_frame_sign_bias; + int i, refmv_count = 0; + int different_ref_found = 0; + const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? + cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; + const TileInfo *const tile = &xd->tile; + // If mode is nearestmv or newmv (uses nearestmv as a reference) then stop + // searching after the first mv is found. + const int early_break = (mode != NEARMV); + + // Blank the reference vector list + memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); + + i = 0; + if (is_sub8x8) { + // If the size < 8x8 we get the mv from the bmi substructure for the + // nearest two blocks. + for (i = 0; i < 2; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate_mi = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + different_ref_found = 1; + + if (candidate_mi->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST_EB( + get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), + refmv_count, mv_ref_list, Done); + else if (candidate_mi->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST_EB( + get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), + refmv_count, mv_ref_list, Done); + } + } + } + + // Check the rest of the neighbors in much the same way + // as before except we don't need to keep track of sub blocks or + // mode counts. + for (; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + different_ref_found = 1; + + if (candidate->ref_frame[0] == ref_frame) + ADD_MV_REF_LIST_EB(candidate->mv[0], refmv_count, mv_ref_list, Done); + else if (candidate->ref_frame[1] == ref_frame) + ADD_MV_REF_LIST_EB(candidate->mv[1], refmv_count, mv_ref_list, Done); + } + } + + // TODO(hkuang): Remove this sync after fixing pthread_cond_broadcast + // on windows platform. The sync here is unnecessary if use_prev_frame_mvs + // is 0. But after removing it, there will be hang in the unit test on windows + // due to several threads waiting for a thread's signal. +#if defined(_WIN32) && !HAVE_PTHREAD_H + if (cm->frame_parallel_decode && sync != NULL) { + sync(data, mi_row); + } +#endif + + // Check the last frame's mode and mv info. + if (prev_frame_mvs) { + // Synchronize here for frame parallel decode if sync function is provided. + if (cm->frame_parallel_decode && sync != NULL) { + sync(data, mi_row); + } + + if (prev_frame_mvs->ref_frame[0] == ref_frame) { + ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); + } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { + ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); + } + } + + // Since we couldn't find 2 mvs from the same reference frame + // go back through the neighbors and find motion vectors from + // different reference frames. + if (different_ref_found) { + for (i = 0; i < MVREF_NEIGHBOURS; ++i) { + const POSITION *mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + + // If the candidate is INTRA we don't want to consider its mv. + IF_DIFF_REF_FRAME_ADD_MV_EB(candidate, ref_frame, ref_sign_bias, + refmv_count, mv_ref_list, Done); + } + } + } + + // Since we still don't have a candidate we'll try the last frame. + if (prev_frame_mvs) { + if (prev_frame_mvs->ref_frame[0] != ref_frame && + prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { + int_mv mv = prev_frame_mvs->mv[0]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); + } + + if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && + prev_frame_mvs->ref_frame[1] != ref_frame && + prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { + int_mv mv = prev_frame_mvs->mv[1]; + if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != + ref_sign_bias[ref_frame]) { + mv.as_mv.row *= -1; + mv.as_mv.col *= -1; + } + ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); + } + } + + if (mode == NEARMV) + refmv_count = MAX_MV_REF_CANDIDATES; + else + // we only care about the nearestmv for the remaining modes + refmv_count = 1; + + Done: + // Clamp vectors + for (i = 0; i < refmv_count; ++i) + clamp_mv_ref(&mv_ref_list[i].as_mv, xd); + + return refmv_count; +} + +static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, + const POSITION *const mv_ref_search, + PREDICTION_MODE b_mode, int block, + int ref, int mi_row, int mi_col, + int_mv *best_sub8x8) { + int_mv mv_list[MAX_MV_REF_CANDIDATES]; + MODE_INFO *const mi = xd->mi[0]; + b_mode_info *bmi = mi->bmi; + int n; + int refmv_count; + + assert(MAX_MV_REF_CANDIDATES == 2); + + refmv_count = dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], + mv_ref_search, mv_list, mi_row, mi_col, block, + 1, NULL, NULL); + + switch (block) { + case 0: + best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; + break; + case 1: + case 2: + if (b_mode == NEARESTMV) { + best_sub8x8->as_int = bmi[0].as_mv[ref].as_int; + } else { + best_sub8x8->as_int = 0; + for (n = 0; n < refmv_count; ++n) + if (bmi[0].as_mv[ref].as_int != mv_list[n].as_int) { + best_sub8x8->as_int = mv_list[n].as_int; + break; + } + } + break; + case 3: + if (b_mode == NEARESTMV) { + best_sub8x8->as_int = bmi[2].as_mv[ref].as_int; + } else { + int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; + candidates[0] = bmi[1].as_mv[ref]; + candidates[1] = bmi[0].as_mv[ref]; + candidates[2] = mv_list[0]; + candidates[3] = mv_list[1]; + best_sub8x8->as_int = 0; + for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) + if (bmi[2].as_mv[ref].as_int != candidates[n].as_int) { + best_sub8x8->as_int = candidates[n].as_int; + break; + } + } + break; + default: + assert(0 && "Invalid block index."); + } +} + +static uint8_t get_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd, + const POSITION *const mv_ref_search, + int mi_row, int mi_col) { + int i; + int context_counter = 0; + const TileInfo *const tile = &xd->tile; + + // Get mode count from nearest 2 blocks + for (i = 0; i < 2; ++i) { + const POSITION *const mv_ref = &mv_ref_search[i]; + if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { + const MODE_INFO *const candidate = + xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; + // Keep counts for entropy encoding. + context_counter += mode_2_counter[candidate->mode]; + } + } + + return counter_to_context[context_counter]; +} + +static void read_inter_block_mode_info(VP9Decoder *const pbi, + MACROBLOCKD *const xd, + MODE_INFO *const mi, + int mi_row, int mi_col, vpx_reader *r) { + VP9_COMMON *const cm = &pbi->common; + const BLOCK_SIZE bsize = mi->sb_type; + const int allow_hp = cm->allow_high_precision_mv; + int_mv best_ref_mvs[2]; + int ref, is_compound; + uint8_t inter_mode_ctx; + const POSITION *const mv_ref_search = mv_ref_blocks[bsize]; + + read_ref_frames(cm, xd, r, mi->segment_id, mi->ref_frame); + is_compound = has_second_ref(mi); + inter_mode_ctx = get_mode_context(cm, xd, mv_ref_search, mi_row, mi_col); + + if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { + mi->mode = ZEROMV; + if (bsize < BLOCK_8X8) { + vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, + "Invalid usage of segement feature on small blocks"); + return; + } + } else { + if (bsize >= BLOCK_8X8) + mi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx); + else + // Sub 8x8 blocks use the nearestmv as a ref_mv if the b_mode is NEWMV. + // Setting mode to NEARESTMV forces the search to stop after the nearestmv + // has been found. After b_modes have been read, mode will be overwritten + // by the last b_mode. + mi->mode = NEARESTMV; + + if (mi->mode != ZEROMV) { + for (ref = 0; ref < 1 + is_compound; ++ref) { + int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; + const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; + int refmv_count; + + refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, + tmp_mvs, mi_row, mi_col, -1, 0, + fpm_sync, (void *)pbi); + + dec_find_best_ref_mvs(allow_hp, tmp_mvs, &best_ref_mvs[ref], + refmv_count); + } + } + } + + mi->interp_filter = (cm->interp_filter == SWITCHABLE) + ? read_switchable_interp_filter(cm, xd, r) + : cm->interp_filter; + + if (bsize < BLOCK_8X8) { + const int num_4x4_w = 1 << xd->bmode_blocks_wl; + const int num_4x4_h = 1 << xd->bmode_blocks_hl; + int idx, idy; + PREDICTION_MODE b_mode; + int_mv best_sub8x8[2]; + for (idy = 0; idy < 2; idy += num_4x4_h) { + for (idx = 0; idx < 2; idx += num_4x4_w) { + const int j = idy * 2 + idx; + b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx); + + if (b_mode == NEARESTMV || b_mode == NEARMV) { + for (ref = 0; ref < 1 + is_compound; ++ref) + append_sub8x8_mvs_for_idx(cm, xd, mv_ref_search, b_mode, j, ref, + mi_row, mi_col, &best_sub8x8[ref]); + } + + if (!assign_mv(cm, xd, b_mode, mi->bmi[j].as_mv, best_ref_mvs, + best_sub8x8, is_compound, allow_hp, r)) { + xd->corrupted |= 1; + break; + } + + if (num_4x4_h == 2) + mi->bmi[j + 2] = mi->bmi[j]; + if (num_4x4_w == 2) + mi->bmi[j + 1] = mi->bmi[j]; + } + } + + mi->mode = b_mode; + + copy_mv_pair(mi->mv, mi->bmi[3].as_mv); + } else { + xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs, + best_ref_mvs, is_compound, allow_hp, r); + } +} + +static void read_inter_frame_mode_info(VP9Decoder *const pbi, + MACROBLOCKD *const xd, + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { + VP9_COMMON *const cm = &pbi->common; + MODE_INFO *const mi = xd->mi[0]; + int inter_block; + + mi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r, x_mis, + y_mis); + mi->skip = read_skip(cm, xd, mi->segment_id, r); + inter_block = read_is_inter_block(cm, xd, mi->segment_id, r); + mi->tx_size = read_tx_size(cm, xd, !mi->skip || !inter_block, r); + + if (inter_block) + read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r); + else + read_intra_block_mode_info(cm, xd, mi, r); +} + +static INLINE void copy_ref_frame_pair(MV_REFERENCE_FRAME *dst, + const MV_REFERENCE_FRAME *src) { + memcpy(dst, src, sizeof(*dst) * 2); +} + +void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis) { + VP9_COMMON *const cm = &pbi->common; + MODE_INFO *const mi = xd->mi[0]; + MV_REF* frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; + int w, h; + + if (frame_is_intra_only(cm)) { + read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r, x_mis, y_mis); + } else { + read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); + + for (h = 0; h < y_mis; ++h) { + for (w = 0; w < x_mis; ++w) { + MV_REF *const mv = frame_mvs + w; + copy_ref_frame_pair(mv->ref_frame, mi->ref_frame); + copy_mv_pair(mv->mv, mi->mv); + } + frame_mvs += cm->mi_cols; + } + } +#if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH + if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && + (xd->above_mi == NULL || xd->left_mi == NULL) && + !is_inter_block(mi) && need_top_left[mi->uv_mode]) + assert(0); +#endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h new file mode 100644 index 0000000000..45569ec81f --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decodemv.h @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_DECODER_VP9_DECODEMV_H_ +#define VP9_DECODER_VP9_DECODEMV_H_ + +#include "vpx_dsp/bitreader.h" + +#include "vp9/decoder/vp9_decoder.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp9_read_mode_info(VP9Decoder *const pbi, MACROBLOCKD *xd, + int mi_row, int mi_col, vpx_reader *r, + int x_mis, int y_mis); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DECODEMV_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decoder.c b/thirdparty/libvpx/vp9/decoder/vp9_decoder.c new file mode 100644 index 0000000000..935c04f3aa --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decoder.c @@ -0,0 +1,518 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> +#include <limits.h> +#include <stdio.h> + +#include "./vp9_rtcd.h" +#include "./vpx_dsp_rtcd.h" +#include "./vpx_scale_rtcd.h" + +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/system_state.h" +#include "vpx_ports/vpx_once.h" +#include "vpx_ports/vpx_timer.h" +#include "vpx_scale/vpx_scale.h" +#include "vpx_util/vpx_thread.h" + +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/common/vp9_loopfilter.h" +#include "vp9/common/vp9_onyxc_int.h" +#if CONFIG_VP9_POSTPROC +#include "vp9/common/vp9_postproc.h" +#endif +#include "vp9/common/vp9_quant_common.h" +#include "vp9/common/vp9_reconintra.h" + +#include "vp9/decoder/vp9_decodeframe.h" +#include "vp9/decoder/vp9_decoder.h" +#include "vp9/decoder/vp9_detokenize.h" + +static void initialize_dec(void) { + static volatile int init_done = 0; + + if (!init_done) { + vp9_rtcd(); + vpx_dsp_rtcd(); + vpx_scale_rtcd(); + vp9_init_intra_predictors(); + init_done = 1; + } +} + +static void vp9_dec_setup_mi(VP9_COMMON *cm) { + cm->mi = cm->mip + cm->mi_stride + 1; + cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; + memset(cm->mi_grid_base, 0, + cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); +} + +static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) { + cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip)); + if (!cm->mip) + return 1; + cm->mi_alloc_size = mi_size; + cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO*)); + if (!cm->mi_grid_base) + return 1; + return 0; +} + +static void vp9_dec_free_mi(VP9_COMMON *cm) { + vpx_free(cm->mip); + cm->mip = NULL; + vpx_free(cm->mi_grid_base); + cm->mi_grid_base = NULL; +} + +VP9Decoder *vp9_decoder_create(BufferPool *const pool) { + VP9Decoder *volatile const pbi = vpx_memalign(32, sizeof(*pbi)); + VP9_COMMON *volatile const cm = pbi ? &pbi->common : NULL; + + if (!cm) + return NULL; + + vp9_zero(*pbi); + + if (setjmp(cm->error.jmp)) { + cm->error.setjmp = 0; + vp9_decoder_remove(pbi); + return NULL; + } + + cm->error.setjmp = 1; + + CHECK_MEM_ERROR(cm, cm->fc, + (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc))); + CHECK_MEM_ERROR(cm, cm->frame_contexts, + (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, + sizeof(*cm->frame_contexts))); + + pbi->need_resync = 1; + once(initialize_dec); + + // Initialize the references to not point to any frame buffers. + memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); + memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); + + cm->current_video_frame = 0; + pbi->ready_for_new_data = 1; + pbi->common.buffer_pool = pool; + + cm->bit_depth = VPX_BITS_8; + cm->dequant_bit_depth = VPX_BITS_8; + + cm->alloc_mi = vp9_dec_alloc_mi; + cm->free_mi = vp9_dec_free_mi; + cm->setup_mi = vp9_dec_setup_mi; + + vp9_loop_filter_init(cm); + + cm->error.setjmp = 0; + + vpx_get_worker_interface()->init(&pbi->lf_worker); + + return pbi; +} + +void vp9_decoder_remove(VP9Decoder *pbi) { + int i; + + if (!pbi) + return; + + vpx_get_worker_interface()->end(&pbi->lf_worker); + vpx_free(pbi->lf_worker.data1); + + for (i = 0; i < pbi->num_tile_workers; ++i) { + VPxWorker *const worker = &pbi->tile_workers[i]; + vpx_get_worker_interface()->end(worker); + } + + vpx_free(pbi->tile_worker_data); + vpx_free(pbi->tile_workers); + + if (pbi->num_tile_workers > 0) { + vp9_loop_filter_dealloc(&pbi->lf_row_sync); + } + + vpx_free(pbi); +} + +static int equal_dimensions(const YV12_BUFFER_CONFIG *a, + const YV12_BUFFER_CONFIG *b) { + return a->y_height == b->y_height && a->y_width == b->y_width && + a->uv_height == b->uv_height && a->uv_width == b->uv_width; +} + +vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd) { + VP9_COMMON *cm = &pbi->common; + + /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the + * encoder is using the frame buffers for. This is just a stub to keep the + * vpxenc --test-decode functionality working, and will be replaced in a + * later commit that adds VP9-specific controls for this functionality. + */ + if (ref_frame_flag == VP9_LAST_FLAG) { + const YV12_BUFFER_CONFIG *const cfg = get_ref_frame(cm, 0); + if (cfg == NULL) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "No 'last' reference frame"); + return VPX_CODEC_ERROR; + } + if (!equal_dimensions(cfg, sd)) + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Incorrect buffer dimensions"); + else + vp8_yv12_copy_frame(cfg, sd); + } else { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Invalid reference frame"); + } + + return cm->error.error_code; +} + + +vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd) { + RefBuffer *ref_buf = NULL; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + + // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the + // encoder is using the frame buffers for. This is just a stub to keep the + // vpxenc --test-decode functionality working, and will be replaced in a + // later commit that adds VP9-specific controls for this functionality. + if (ref_frame_flag == VP9_LAST_FLAG) { + ref_buf = &cm->frame_refs[0]; + } else if (ref_frame_flag == VP9_GOLD_FLAG) { + ref_buf = &cm->frame_refs[1]; + } else if (ref_frame_flag == VP9_ALT_FLAG) { + ref_buf = &cm->frame_refs[2]; + } else { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Invalid reference frame"); + return cm->error.error_code; + } + + if (!equal_dimensions(ref_buf->buf, sd)) { + vpx_internal_error(&cm->error, VPX_CODEC_ERROR, + "Incorrect buffer dimensions"); + } else { + int *ref_fb_ptr = &ref_buf->idx; + + // Find an empty frame buffer. + const int free_fb = get_free_fb(cm); + if (cm->new_fb_idx == INVALID_IDX) { + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Unable to find free frame buffer"); + return cm->error.error_code; + } + + // Decrease ref_count since it will be increased again in + // ref_cnt_fb() below. + --frame_bufs[free_fb].ref_count; + + // Manage the reference counters and copy image. + ref_cnt_fb(frame_bufs, ref_fb_ptr, free_fb); + ref_buf->buf = &frame_bufs[*ref_fb_ptr].buf; + vp8_yv12_copy_frame(sd, ref_buf->buf); + } + + return cm->error.error_code; +} + +/* If any buffer updating is signaled it should be done here. */ +static void swap_frame_buffers(VP9Decoder *pbi) { + int ref_index = 0, mask; + VP9_COMMON *const cm = &pbi->common; + BufferPool *const pool = cm->buffer_pool; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + + lock_buffer_pool(pool); + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { + const int old_idx = cm->ref_frame_map[ref_index]; + // Current thread releases the holding of reference frame. + decrease_ref_count(old_idx, frame_bufs, pool); + + // Release the reference frame in reference map. + if (mask & 1) { + decrease_ref_count(old_idx, frame_bufs, pool); + } + cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; + ++ref_index; + } + + // Current thread releases the holding of reference frame. + for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { + const int old_idx = cm->ref_frame_map[ref_index]; + decrease_ref_count(old_idx, frame_bufs, pool); + cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; + } + unlock_buffer_pool(pool); + pbi->hold_ref_buf = 0; + cm->frame_to_show = get_frame_new_buffer(cm); + + if (!pbi->frame_parallel_decode || !cm->show_frame) { + lock_buffer_pool(pool); + --frame_bufs[cm->new_fb_idx].ref_count; + unlock_buffer_pool(pool); + } + + // Invalidate these references until the next frame starts. + for (ref_index = 0; ref_index < 3; ref_index++) + cm->frame_refs[ref_index].idx = -1; +} + +int vp9_receive_compressed_data(VP9Decoder *pbi, + size_t size, const uint8_t **psource) { + VP9_COMMON *volatile const cm = &pbi->common; + BufferPool *volatile const pool = cm->buffer_pool; + RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs; + const uint8_t *source = *psource; + int retcode = 0; + cm->error.error_code = VPX_CODEC_OK; + + if (size == 0) { + // This is used to signal that we are missing frames. + // We do not know if the missing frame(s) was supposed to update + // any of the reference buffers, but we act conservative and + // mark only the last buffer as corrupted. + // + // TODO(jkoleszar): Error concealment is undefined and non-normative + // at this point, but if it becomes so, [0] may not always be the correct + // thing to do here. + if (cm->frame_refs[0].idx > 0) { + assert(cm->frame_refs[0].buf != NULL); + cm->frame_refs[0].buf->corrupted = 1; + } + } + + pbi->ready_for_new_data = 0; + + // Check if the previous frame was a frame without any references to it. + // Release frame buffer if not decoding in frame parallel mode. + if (!pbi->frame_parallel_decode && cm->new_fb_idx >= 0 + && frame_bufs[cm->new_fb_idx].ref_count == 0) + pool->release_fb_cb(pool->cb_priv, + &frame_bufs[cm->new_fb_idx].raw_frame_buffer); + // Find a free frame buffer. Return error if can not find any. + cm->new_fb_idx = get_free_fb(cm); + if (cm->new_fb_idx == INVALID_IDX) { + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Unable to find free frame buffer"); + return cm->error.error_code; + } + + // Assign a MV array to the frame buffer. + cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; + + pbi->hold_ref_buf = 0; + if (pbi->frame_parallel_decode) { + VPxWorker *const worker = pbi->frame_worker_owner; + vp9_frameworker_lock_stats(worker); + frame_bufs[cm->new_fb_idx].frame_worker_owner = worker; + // Reset decoding progress. + pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; + pbi->cur_buf->row = -1; + pbi->cur_buf->col = -1; + vp9_frameworker_unlock_stats(worker); + } else { + pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; + } + + + if (setjmp(cm->error.jmp)) { + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + int i; + + cm->error.setjmp = 0; + pbi->ready_for_new_data = 1; + + // Synchronize all threads immediately as a subsequent decode call may + // cause a resize invalidating some allocations. + winterface->sync(&pbi->lf_worker); + for (i = 0; i < pbi->num_tile_workers; ++i) { + winterface->sync(&pbi->tile_workers[i]); + } + + lock_buffer_pool(pool); + // Release all the reference buffers if worker thread is holding them. + if (pbi->hold_ref_buf == 1) { + int ref_index = 0, mask; + for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { + const int old_idx = cm->ref_frame_map[ref_index]; + // Current thread releases the holding of reference frame. + decrease_ref_count(old_idx, frame_bufs, pool); + + // Release the reference frame in reference map. + if (mask & 1) { + decrease_ref_count(old_idx, frame_bufs, pool); + } + ++ref_index; + } + + // Current thread releases the holding of reference frame. + for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { + const int old_idx = cm->ref_frame_map[ref_index]; + decrease_ref_count(old_idx, frame_bufs, pool); + } + pbi->hold_ref_buf = 0; + } + // Release current frame. + decrease_ref_count(cm->new_fb_idx, frame_bufs, pool); + unlock_buffer_pool(pool); + + vpx_clear_system_state(); + return -1; + } + + cm->error.setjmp = 1; + vp9_decode_frame(pbi, source, source + size, psource); + + swap_frame_buffers(pbi); + + vpx_clear_system_state(); + + if (!cm->show_existing_frame) { + cm->last_show_frame = cm->show_frame; + cm->prev_frame = cm->cur_frame; + if (cm->seg.enabled && !pbi->frame_parallel_decode) + vp9_swap_current_and_last_seg_map(cm); + } + + // Update progress in frame parallel decode. + if (pbi->frame_parallel_decode) { + // Need to lock the mutex here as another thread may + // be accessing this buffer. + VPxWorker *const worker = pbi->frame_worker_owner; + FrameWorkerData *const frame_worker_data = worker->data1; + vp9_frameworker_lock_stats(worker); + + if (cm->show_frame) { + cm->current_video_frame++; + } + frame_worker_data->frame_decoded = 1; + frame_worker_data->frame_context_ready = 1; + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); + } else { + cm->last_width = cm->width; + cm->last_height = cm->height; + if (cm->show_frame) { + cm->current_video_frame++; + } + } + + cm->error.setjmp = 0; + return retcode; +} + +int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, + vp9_ppflags_t *flags) { + VP9_COMMON *const cm = &pbi->common; + int ret = -1; +#if !CONFIG_VP9_POSTPROC + (void)*flags; +#endif + + if (pbi->ready_for_new_data == 1) + return ret; + + pbi->ready_for_new_data = 1; + + /* no raw frame to show!!! */ + if (!cm->show_frame) + return ret; + + pbi->ready_for_new_data = 1; + +#if CONFIG_VP9_POSTPROC + if (!cm->show_existing_frame) { + ret = vp9_post_proc_frame(cm, sd, flags); + } else { + *sd = *cm->frame_to_show; + ret = 0; + } +#else + *sd = *cm->frame_to_show; + ret = 0; +#endif /*!CONFIG_POSTPROC*/ + vpx_clear_system_state(); + return ret; +} + +vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, + size_t data_sz, + uint32_t sizes[8], int *count, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) { + // A chunk ending with a byte matching 0xc0 is an invalid chunk unless + // it is a super frame index. If the last byte of real video compression + // data is 0xc0 the encoder must add a 0 byte. If we have the marker but + // not the associated matching marker byte at the front of the index we have + // an invalid bitstream and need to return an error. + + uint8_t marker; + + assert(data_sz); + marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1); + *count = 0; + + if ((marker & 0xe0) == 0xc0) { + const uint32_t frames = (marker & 0x7) + 1; + const uint32_t mag = ((marker >> 3) & 0x3) + 1; + const size_t index_sz = 2 + mag * frames; + + // This chunk is marked as having a superframe index but doesn't have + // enough data for it, thus it's an invalid superframe index. + if (data_sz < index_sz) + return VPX_CODEC_CORRUPT_FRAME; + + { + const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state, + data + data_sz - index_sz); + + // This chunk is marked as having a superframe index but doesn't have + // the matching marker byte at the front of the index therefore it's an + // invalid chunk. + if (marker != marker2) + return VPX_CODEC_CORRUPT_FRAME; + } + + { + // Found a valid superframe index. + uint32_t i, j; + const uint8_t *x = &data[data_sz - index_sz + 1]; + + // Frames has a maximum of 8 and mag has a maximum of 4. + uint8_t clear_buffer[32]; + assert(sizeof(clear_buffer) >= frames * mag); + if (decrypt_cb) { + decrypt_cb(decrypt_state, x, clear_buffer, frames * mag); + x = clear_buffer; + } + + for (i = 0; i < frames; ++i) { + uint32_t this_sz = 0; + + for (j = 0; j < mag; ++j) + this_sz |= ((uint32_t)(*x++)) << (j * 8); + sizes[i] = this_sz; + } + *count = frames; + } + } + return VPX_CODEC_OK; +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_decoder.h b/thirdparty/libvpx/vp9/decoder/vp9_decoder.h new file mode 100644 index 0000000000..7111a36d37 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_decoder.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_DECODER_VP9_DECODER_H_ +#define VP9_DECODER_VP9_DECODER_H_ + +#include "./vpx_config.h" + +#include "vpx/vpx_codec.h" +#include "vpx_dsp/bitreader.h" +#include "vpx_scale/yv12config.h" +#include "vpx_util/vpx_thread.h" + +#include "vp9/common/vp9_thread_common.h" +#include "vp9/common/vp9_onyxc_int.h" +#include "vp9/common/vp9_ppflags.h" +#include "vp9/decoder/vp9_dthread.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct TileBuffer { + const uint8_t *data; + size_t size; + int col; // only used with multi-threaded decoding +} TileBuffer; + +typedef struct TileWorkerData { + const uint8_t *data_end; + int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive + vpx_reader bit_reader; + FRAME_COUNTS counts; + DECLARE_ALIGNED(16, MACROBLOCKD, xd); + /* dqcoeff are shared by all the planes. So planes must be decoded serially */ + DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); + struct vpx_internal_error_info error_info; +} TileWorkerData; + +typedef struct VP9Decoder { + DECLARE_ALIGNED(16, MACROBLOCKD, mb); + + DECLARE_ALIGNED(16, VP9_COMMON, common); + + int ready_for_new_data; + + int refresh_frame_flags; + + int frame_parallel_decode; // frame-based threading. + + // TODO(hkuang): Combine this with cur_buf in macroblockd as they are + // the same. + RefCntBuffer *cur_buf; // Current decoding frame buffer. + + VPxWorker *frame_worker_owner; // frame_worker that owns this pbi. + VPxWorker lf_worker; + VPxWorker *tile_workers; + TileWorkerData *tile_worker_data; + TileBuffer tile_buffers[64]; + int num_tile_workers; + int total_tiles; + + VP9LfSync lf_row_sync; + + vpx_decrypt_cb decrypt_cb; + void *decrypt_state; + + int max_threads; + int inv_tile_order; + int need_resync; // wait for key/intra-only frame. + int hold_ref_buf; // hold the reference buffer. +} VP9Decoder; + +int vp9_receive_compressed_data(struct VP9Decoder *pbi, + size_t size, const uint8_t **dest); + +int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, + vp9_ppflags_t *flags); + +vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd); + +vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, + VP9_REFFRAME ref_frame_flag, + YV12_BUFFER_CONFIG *sd); + +static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb, + void *decrypt_state, + const uint8_t *data) { + if (decrypt_cb) { + uint8_t marker; + decrypt_cb(decrypt_state, data, &marker, 1); + return marker; + } + return *data; +} + +// This function is exposed for use in tests, as well as the inlined function +// "read_marker". +vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, + size_t data_sz, + uint32_t sizes[8], int *count, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state); + +struct VP9Decoder *vp9_decoder_create(BufferPool *const pool); + +void vp9_decoder_remove(struct VP9Decoder *pbi); + +static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, + BufferPool *const pool) { + if (idx >= 0 && frame_bufs[idx].ref_count > 0) { + --frame_bufs[idx].ref_count; + // A worker may only get a free framebuffer index when calling get_free_fb. + // But the private buffer is not set up until finish decoding header. + // So any error happens during decoding header, the frame_bufs will not + // have valid priv buffer. + if (frame_bufs[idx].ref_count == 0 && + frame_bufs[idx].raw_frame_buffer.priv) { + pool->release_fb_cb(pool->cb_priv, &frame_bufs[idx].raw_frame_buffer); + } + } +} + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DECODER_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c new file mode 100644 index 0000000000..47dc107fe2 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.c @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "vpx_mem/vpx_mem.h" +#include "vpx_ports/mem.h" + +#include "vp9/common/vp9_blockd.h" +#include "vp9/common/vp9_common.h" +#include "vp9/common/vp9_entropy.h" +#if CONFIG_COEFFICIENT_RANGE_CHECKING +#include "vp9/common/vp9_idct.h" +#endif + +#include "vp9/decoder/vp9_detokenize.h" + +#define EOB_CONTEXT_NODE 0 +#define ZERO_CONTEXT_NODE 1 +#define ONE_CONTEXT_NODE 2 + +#define INCREMENT_COUNT(token) \ + do { \ + if (counts) \ + ++coef_counts[band][ctx][token]; \ + } while (0) + +static INLINE int read_coeff(const vpx_prob *probs, int n, vpx_reader *r) { + int i, val = 0; + for (i = 0; i < n; ++i) + val = (val << 1) | vpx_read(r, probs[i]); + return val; +} + +static int decode_coefs(const MACROBLOCKD *xd, + PLANE_TYPE type, + tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, + int ctx, const int16_t *scan, const int16_t *nb, + vpx_reader *r) { + FRAME_COUNTS *counts = xd->counts; + const int max_eob = 16 << (tx_size << 1); + const FRAME_CONTEXT *const fc = xd->fc; + const int ref = is_inter_block(xd->mi[0]); + int band, c = 0; + const vpx_prob (*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = + fc->coef_probs[tx_size][type][ref]; + const vpx_prob *prob; + unsigned int (*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; + unsigned int (*eob_branch_count)[COEFF_CONTEXTS]; + uint8_t token_cache[32 * 32]; + const uint8_t *band_translate = get_band_translate(tx_size); + const int dq_shift = (tx_size == TX_32X32); + int v, token; + int16_t dqv = dq[0]; + const uint8_t *const cat6_prob = +#if CONFIG_VP9_HIGHBITDEPTH + (xd->bd == VPX_BITS_12) ? vp9_cat6_prob_high12 : + (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 : +#endif // CONFIG_VP9_HIGHBITDEPTH + vp9_cat6_prob; + const int cat6_bits = +#if CONFIG_VP9_HIGHBITDEPTH + (xd->bd == VPX_BITS_12) ? 18 : + (xd->bd == VPX_BITS_10) ? 16 : +#endif // CONFIG_VP9_HIGHBITDEPTH + 14; + + if (counts) { + coef_counts = counts->coef[tx_size][type][ref]; + eob_branch_count = counts->eob_branch[tx_size][type][ref]; + } + + while (c < max_eob) { + int val = -1; + band = *band_translate++; + prob = coef_probs[band][ctx]; + if (counts) + ++eob_branch_count[band][ctx]; + if (!vpx_read(r, prob[EOB_CONTEXT_NODE])) { + INCREMENT_COUNT(EOB_MODEL_TOKEN); + break; + } + + while (!vpx_read(r, prob[ZERO_CONTEXT_NODE])) { + INCREMENT_COUNT(ZERO_TOKEN); + dqv = dq[1]; + token_cache[scan[c]] = 0; + ++c; + if (c >= max_eob) + return c; // zero tokens at the end (no eob token) + ctx = get_coef_context(nb, token_cache, c); + band = *band_translate++; + prob = coef_probs[band][ctx]; + } + + if (!vpx_read(r, prob[ONE_CONTEXT_NODE])) { + INCREMENT_COUNT(ONE_TOKEN); + token = ONE_TOKEN; + val = 1; + } else { + INCREMENT_COUNT(TWO_TOKEN); + token = vpx_read_tree(r, vp9_coef_con_tree, + vp9_pareto8_full[prob[PIVOT_NODE] - 1]); + switch (token) { + case TWO_TOKEN: + case THREE_TOKEN: + case FOUR_TOKEN: + val = token; + break; + case CATEGORY1_TOKEN: + val = CAT1_MIN_VAL + read_coeff(vp9_cat1_prob, 1, r); + break; + case CATEGORY2_TOKEN: + val = CAT2_MIN_VAL + read_coeff(vp9_cat2_prob, 2, r); + break; + case CATEGORY3_TOKEN: + val = CAT3_MIN_VAL + read_coeff(vp9_cat3_prob, 3, r); + break; + case CATEGORY4_TOKEN: + val = CAT4_MIN_VAL + read_coeff(vp9_cat4_prob, 4, r); + break; + case CATEGORY5_TOKEN: + val = CAT5_MIN_VAL + read_coeff(vp9_cat5_prob, 5, r); + break; + case CATEGORY6_TOKEN: + val = CAT6_MIN_VAL + read_coeff(cat6_prob, cat6_bits, r); + break; + } + } + v = (val * dqv) >> dq_shift; +#if CONFIG_COEFFICIENT_RANGE_CHECKING +#if CONFIG_VP9_HIGHBITDEPTH + dqcoeff[scan[c]] = highbd_check_range((vpx_read_bit(r) ? -v : v), + xd->bd); +#else + dqcoeff[scan[c]] = check_range(vpx_read_bit(r) ? -v : v); +#endif // CONFIG_VP9_HIGHBITDEPTH +#else + dqcoeff[scan[c]] = vpx_read_bit(r) ? -v : v; +#endif // CONFIG_COEFFICIENT_RANGE_CHECKING + token_cache[scan[c]] = vp9_pt_energy_class[token]; + ++c; + ctx = get_coef_context(nb, token_cache, c); + dqv = dq[1]; + } + + return c; +} + +static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l, + int x, int y, unsigned int tx_size_in_blocks) { + if (xd->max_blocks_wide) { + if (tx_size_in_blocks + x > xd->max_blocks_wide) + *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8; + } + if (xd->max_blocks_high) { + if (tx_size_in_blocks + y > xd->max_blocks_high) + *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8; + } +} + +int vp9_decode_block_tokens(MACROBLOCKD *xd, int plane, const scan_order *sc, + int x, int y, TX_SIZE tx_size, vpx_reader *r, + int seg_id) { + struct macroblockd_plane *const pd = &xd->plane[plane]; + const int16_t *const dequant = pd->seg_dequant[seg_id]; + int eob; + ENTROPY_CONTEXT *a = pd->above_context + x; + ENTROPY_CONTEXT *l = pd->left_context + y; + int ctx; + int ctx_shift_a = 0; + int ctx_shift_l = 0; + + switch (tx_size) { + case TX_4X4: + ctx = a[0] != 0; + ctx += l[0] != 0; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + a[0] = l[0] = (eob > 0); + break; + case TX_8X8: + get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8); + ctx = !!*(const uint16_t *)a; + ctx += !!*(const uint16_t *)l; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a; + *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l; + break; + case TX_16X16: + get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16); + ctx = !!*(const uint32_t *)a; + ctx += !!*(const uint32_t *)l; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a; + *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l; + break; + case TX_32X32: + get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32); + // NOTE: casting to uint64_t here is safe because the default memory + // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte + // boundaries. + ctx = !!*(const uint64_t *)a; + ctx += !!*(const uint64_t *)l; + eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, + dequant, ctx, sc->scan, sc->neighbors, r); + *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a; + *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l; + break; + default: + assert(0 && "Invalid transform size."); + eob = 0; + break; + } + + return eob; +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h new file mode 100644 index 0000000000..d242d4466e --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_detokenize.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_DECODER_VP9_DETOKENIZE_H_ +#define VP9_DECODER_VP9_DETOKENIZE_H_ + +#include "vpx_dsp/bitreader.h" +#include "vp9/decoder/vp9_decoder.h" +#include "vp9/common/vp9_scan.h" + +#ifdef __cplusplus +extern "C" { +#endif + +int vp9_decode_block_tokens(MACROBLOCKD *xd, + int plane, const scan_order *sc, + int x, int y, + TX_SIZE tx_size, vpx_reader *r, + int seg_id); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DETOKENIZE_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c new file mode 100644 index 0000000000..05b38538ae --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.c @@ -0,0 +1,76 @@ +/* + Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <assert.h> + +#include "vp9/common/vp9_entropy.h" + +#include "vp9/decoder/vp9_dsubexp.h" + +static int inv_recenter_nonneg(int v, int m) { + if (v > 2 * m) + return v; + + return (v & 1) ? m - ((v + 1) >> 1) : m + (v >> 1); +} + +static int decode_uniform(vpx_reader *r) { + const int l = 8; + const int m = (1 << l) - 191; + const int v = vpx_read_literal(r, l - 1); + return v < m ? v : (v << 1) - m + vpx_read_bit(r); +} + +static int inv_remap_prob(int v, int m) { + static uint8_t inv_map_table[MAX_PROB] = { + 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189, + 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, + 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, + 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 74, 75, 76, + 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92, + 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, + 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, + 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, + 142, 143, 144, 145, 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, + 158, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, + 174, 175, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, + 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206, + 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 222, + 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, + 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 253 + }; + assert(v < (int)(sizeof(inv_map_table) / sizeof(inv_map_table[0]))); + v = inv_map_table[v]; + m--; + if ((m << 1) <= MAX_PROB) { + return 1 + inv_recenter_nonneg(v, m); + } else { + return MAX_PROB - inv_recenter_nonneg(v, MAX_PROB - 1 - m); + } +} + +static int decode_term_subexp(vpx_reader *r) { + if (!vpx_read_bit(r)) + return vpx_read_literal(r, 4); + if (!vpx_read_bit(r)) + return vpx_read_literal(r, 4) + 16; + if (!vpx_read_bit(r)) + return vpx_read_literal(r, 5) + 32; + return decode_uniform(r) + 64; +} + +void vp9_diff_update_prob(vpx_reader *r, vpx_prob* p) { + if (vpx_read(r, DIFF_UPDATE_PROB)) { + const int delp = decode_term_subexp(r); + *p = (vpx_prob)inv_remap_prob(delp, *p); + } +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h new file mode 100644 index 0000000000..a8bcc70be9 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_dsubexp.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef VP9_DECODER_VP9_DSUBEXP_H_ +#define VP9_DECODER_VP9_DSUBEXP_H_ + +#include "vpx_dsp/bitreader.h" + +#ifdef __cplusplus +extern "C" { +#endif + +void vp9_diff_update_prob(vpx_reader *r, vpx_prob* p); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DSUBEXP_H_ diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dthread.c b/thirdparty/libvpx/vp9/decoder/vp9_dthread.c new file mode 100644 index 0000000000..14a71448fe --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_dthread.c @@ -0,0 +1,189 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "./vpx_config.h" +#include "vpx_mem/vpx_mem.h" +#include "vp9/common/vp9_reconinter.h" +#include "vp9/decoder/vp9_dthread.h" +#include "vp9/decoder/vp9_decoder.h" + +// #define DEBUG_THREAD + +// TODO(hkuang): Clean up all the #ifdef in this file. +void vp9_frameworker_lock_stats(VPxWorker *const worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const worker_data = worker->data1; + pthread_mutex_lock(&worker_data->stats_mutex); +#else + (void)worker; +#endif +} + +void vp9_frameworker_unlock_stats(VPxWorker *const worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const worker_data = worker->data1; + pthread_mutex_unlock(&worker_data->stats_mutex); +#else + (void)worker; +#endif +} + +void vp9_frameworker_signal_stats(VPxWorker *const worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const worker_data = worker->data1; + +// TODO(hkuang): Fix the pthread_cond_broadcast in windows wrapper. +#if defined(_WIN32) && !HAVE_PTHREAD_H + pthread_cond_signal(&worker_data->stats_cond); +#else + pthread_cond_broadcast(&worker_data->stats_cond); +#endif + +#else + (void)worker; +#endif +} + +// This macro prevents thread_sanitizer from reporting known concurrent writes. +#if defined(__has_feature) +#if __has_feature(thread_sanitizer) +#define BUILDING_WITH_TSAN +#endif +#endif + +// TODO(hkuang): Remove worker parameter as it is only used in debug code. +void vp9_frameworker_wait(VPxWorker *const worker, RefCntBuffer *const ref_buf, + int row) { +#if CONFIG_MULTITHREAD + if (!ref_buf) + return; + +#ifndef BUILDING_WITH_TSAN + // The following line of code will get harmless tsan error but it is the key + // to get best performance. + if (ref_buf->row >= row && ref_buf->buf.corrupted != 1) return; +#endif + + { + // Find the worker thread that owns the reference frame. If the reference + // frame has been fully decoded, it may not have owner. + VPxWorker *const ref_worker = ref_buf->frame_worker_owner; + FrameWorkerData *const ref_worker_data = + (FrameWorkerData *)ref_worker->data1; + const VP9Decoder *const pbi = ref_worker_data->pbi; + +#ifdef DEBUG_THREAD + { + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + printf("%d %p worker is waiting for %d %p worker (%d) ref %d \r\n", + worker_data->worker_id, worker, ref_worker_data->worker_id, + ref_buf->frame_worker_owner, row, ref_buf->row); + } +#endif + + vp9_frameworker_lock_stats(ref_worker); + while (ref_buf->row < row && pbi->cur_buf == ref_buf && + ref_buf->buf.corrupted != 1) { + pthread_cond_wait(&ref_worker_data->stats_cond, + &ref_worker_data->stats_mutex); + } + + if (ref_buf->buf.corrupted == 1) { + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + vp9_frameworker_unlock_stats(ref_worker); + vpx_internal_error(&worker_data->pbi->common.error, + VPX_CODEC_CORRUPT_FRAME, + "Worker %p failed to decode frame", worker); + } + vp9_frameworker_unlock_stats(ref_worker); + } +#else + (void)worker; + (void)ref_buf; + (void)row; + (void)ref_buf; +#endif // CONFIG_MULTITHREAD +} + +void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row) { +#if CONFIG_MULTITHREAD + VPxWorker *worker = buf->frame_worker_owner; + +#ifdef DEBUG_THREAD + { + FrameWorkerData *const worker_data = (FrameWorkerData *)worker->data1; + printf("%d %p worker decode to (%d) \r\n", worker_data->worker_id, + buf->frame_worker_owner, row); + } +#endif + + vp9_frameworker_lock_stats(worker); + buf->row = row; + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); +#else + (void)buf; + (void)row; +#endif // CONFIG_MULTITHREAD +} + +void vp9_frameworker_copy_context(VPxWorker *const dst_worker, + VPxWorker *const src_worker) { +#if CONFIG_MULTITHREAD + FrameWorkerData *const src_worker_data = (FrameWorkerData *)src_worker->data1; + FrameWorkerData *const dst_worker_data = (FrameWorkerData *)dst_worker->data1; + VP9_COMMON *const src_cm = &src_worker_data->pbi->common; + VP9_COMMON *const dst_cm = &dst_worker_data->pbi->common; + int i; + + // Wait until source frame's context is ready. + vp9_frameworker_lock_stats(src_worker); + while (!src_worker_data->frame_context_ready) { + pthread_cond_wait(&src_worker_data->stats_cond, + &src_worker_data->stats_mutex); + } + + dst_cm->last_frame_seg_map = src_cm->seg.enabled ? + src_cm->current_frame_seg_map : src_cm->last_frame_seg_map; + dst_worker_data->pbi->need_resync = src_worker_data->pbi->need_resync; + vp9_frameworker_unlock_stats(src_worker); + + dst_cm->bit_depth = src_cm->bit_depth; +#if CONFIG_VP9_HIGHBITDEPTH + dst_cm->use_highbitdepth = src_cm->use_highbitdepth; +#endif + dst_cm->prev_frame = src_cm->show_existing_frame ? + src_cm->prev_frame : src_cm->cur_frame; + dst_cm->last_width = !src_cm->show_existing_frame ? + src_cm->width : src_cm->last_width; + dst_cm->last_height = !src_cm->show_existing_frame ? + src_cm->height : src_cm->last_height; + dst_cm->subsampling_x = src_cm->subsampling_x; + dst_cm->subsampling_y = src_cm->subsampling_y; + dst_cm->frame_type = src_cm->frame_type; + dst_cm->last_show_frame = !src_cm->show_existing_frame ? + src_cm->show_frame : src_cm->last_show_frame; + for (i = 0; i < REF_FRAMES; ++i) + dst_cm->ref_frame_map[i] = src_cm->next_ref_frame_map[i]; + + memcpy(dst_cm->lf_info.lfthr, src_cm->lf_info.lfthr, + (MAX_LOOP_FILTER + 1) * sizeof(loop_filter_thresh)); + dst_cm->lf.last_sharpness_level = src_cm->lf.sharpness_level; + dst_cm->lf.filter_level = src_cm->lf.filter_level; + memcpy(dst_cm->lf.ref_deltas, src_cm->lf.ref_deltas, MAX_REF_LF_DELTAS); + memcpy(dst_cm->lf.mode_deltas, src_cm->lf.mode_deltas, MAX_MODE_LF_DELTAS); + dst_cm->seg = src_cm->seg; + memcpy(dst_cm->frame_contexts, src_cm->frame_contexts, + FRAME_CONTEXTS * sizeof(dst_cm->frame_contexts[0])); +#else + (void) dst_worker; + (void) src_worker; +#endif // CONFIG_MULTITHREAD +} diff --git a/thirdparty/libvpx/vp9/decoder/vp9_dthread.h b/thirdparty/libvpx/vp9/decoder/vp9_dthread.h new file mode 100644 index 0000000000..ba7c38a511 --- /dev/null +++ b/thirdparty/libvpx/vp9/decoder/vp9_dthread.h @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2014 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_DECODER_VP9_DTHREAD_H_ +#define VP9_DECODER_VP9_DTHREAD_H_ + +#include "./vpx_config.h" +#include "vpx_util/vpx_thread.h" +#include "vpx/internal/vpx_codec_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif + +struct VP9Common; +struct VP9Decoder; + +// WorkerData for the FrameWorker thread. It contains all the information of +// the worker and decode structures for decoding a frame. +typedef struct FrameWorkerData { + struct VP9Decoder *pbi; + const uint8_t *data; + const uint8_t *data_end; + size_t data_size; + void *user_priv; + int result; + int worker_id; + int received_frame; + + // scratch_buffer is used in frame parallel mode only. + // It is used to make a copy of the compressed data. + uint8_t *scratch_buffer; + size_t scratch_buffer_size; + +#if CONFIG_MULTITHREAD + pthread_mutex_t stats_mutex; + pthread_cond_t stats_cond; +#endif + + int frame_context_ready; // Current frame's context is ready to read. + int frame_decoded; // Finished decoding current frame. +} FrameWorkerData; + +void vp9_frameworker_lock_stats(VPxWorker *const worker); +void vp9_frameworker_unlock_stats(VPxWorker *const worker); +void vp9_frameworker_signal_stats(VPxWorker *const worker); + +// Wait until ref_buf has been decoded to row in real pixel unit. +// Note: worker may already finish decoding ref_buf and release it in order to +// start decoding next frame. So need to check whether worker is still decoding +// ref_buf. +void vp9_frameworker_wait(VPxWorker *const worker, RefCntBuffer *const ref_buf, + int row); + +// FrameWorker broadcasts its decoding progress so other workers that are +// waiting on it can resume decoding. +void vp9_frameworker_broadcast(RefCntBuffer *const buf, int row); + +// Copy necessary decoding context from src worker to dst worker. +void vp9_frameworker_copy_context(VPxWorker *const dst_worker, + VPxWorker *const src_worker); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // VP9_DECODER_VP9_DTHREAD_H_ diff --git a/thirdparty/libvpx/vp9/vp9_dx_iface.c b/thirdparty/libvpx/vp9/vp9_dx_iface.c new file mode 100644 index 0000000000..6531e2c618 --- /dev/null +++ b/thirdparty/libvpx/vp9/vp9_dx_iface.c @@ -0,0 +1,1093 @@ +/* + * Copyright (c) 2010 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include <stdlib.h> +#include <string.h> + +#include "./vpx_config.h" +#include "./vpx_version.h" + +#include "vpx/internal/vpx_codec_internal.h" +#include "vpx/vp8dx.h" +#include "vpx/vpx_decoder.h" +#include "vpx_dsp/bitreader_buffer.h" +#include "vpx_dsp/vpx_dsp_common.h" +#include "vpx_util/vpx_thread.h" + +#include "vp9/common/vp9_alloccommon.h" +#include "vp9/common/vp9_frame_buffers.h" + +#include "vp9/decoder/vp9_decodeframe.h" + +#include "vp9/vp9_dx_iface.h" +#include "vp9/vp9_iface_common.h" + +#define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) + +static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, + vpx_codec_priv_enc_mr_cfg_t *data) { + // This function only allocates space for the vpx_codec_alg_priv_t + // structure. More memory may be required at the time the stream + // information becomes known. + (void)data; + + if (!ctx->priv) { + vpx_codec_alg_priv_t *const priv = + (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); + if (priv == NULL) + return VPX_CODEC_MEM_ERROR; + + ctx->priv = (vpx_codec_priv_t *)priv; + ctx->priv->init_flags = ctx->init_flags; + priv->si.sz = sizeof(priv->si); + priv->flushed = 0; + // Only do frame parallel decode when threads > 1. + priv->frame_parallel_decode = + (ctx->config.dec && (ctx->config.dec->threads > 1) && + (ctx->init_flags & VPX_CODEC_USE_FRAME_THREADING)) ? 1 : 0; + if (ctx->config.dec) { + priv->cfg = *ctx->config.dec; + ctx->config.dec = &priv->cfg; + } + } + + return VPX_CODEC_OK; +} + +static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { + if (ctx->frame_workers != NULL) { + int i; + for (i = 0; i < ctx->num_frame_workers; ++i) { + VPxWorker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + vpx_get_worker_interface()->end(worker); + vp9_remove_common(&frame_worker_data->pbi->common); +#if CONFIG_VP9_POSTPROC + vp9_free_postproc_buffers(&frame_worker_data->pbi->common); +#endif + vp9_decoder_remove(frame_worker_data->pbi); + vpx_free(frame_worker_data->scratch_buffer); +#if CONFIG_MULTITHREAD + pthread_mutex_destroy(&frame_worker_data->stats_mutex); + pthread_cond_destroy(&frame_worker_data->stats_cond); +#endif + vpx_free(frame_worker_data); + } +#if CONFIG_MULTITHREAD + pthread_mutex_destroy(&ctx->buffer_pool->pool_mutex); +#endif + } + + if (ctx->buffer_pool) { + vp9_free_ref_frame_buffers(ctx->buffer_pool); + vp9_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers); + } + + vpx_free(ctx->frame_workers); + vpx_free(ctx->buffer_pool); + vpx_free(ctx); + return VPX_CODEC_OK; +} + +static int parse_bitdepth_colorspace_sampling( + BITSTREAM_PROFILE profile, struct vpx_read_bit_buffer *rb) { + vpx_color_space_t color_space; + if (profile >= PROFILE_2) + rb->bit_offset += 1; // Bit-depth 10 or 12. + color_space = (vpx_color_space_t)vpx_rb_read_literal(rb, 3); + if (color_space != VPX_CS_SRGB) { + rb->bit_offset += 1; // [16,235] (including xvycc) vs [0,255] range. + if (profile == PROFILE_1 || profile == PROFILE_3) { + rb->bit_offset += 2; // subsampling x/y. + rb->bit_offset += 1; // unused. + } + } else { + if (profile == PROFILE_1 || profile == PROFILE_3) { + rb->bit_offset += 1; // unused + } else { + // RGB is only available in version 1. + return 0; + } + } + return 1; +} + +static vpx_codec_err_t decoder_peek_si_internal(const uint8_t *data, + unsigned int data_sz, + vpx_codec_stream_info_t *si, + int *is_intra_only, + vpx_decrypt_cb decrypt_cb, + void *decrypt_state) { + int intra_only_flag = 0; + uint8_t clear_buffer[10]; + + if (data + data_sz <= data) + return VPX_CODEC_INVALID_PARAM; + + si->is_kf = 0; + si->w = si->h = 0; + + if (decrypt_cb) { + data_sz = VPXMIN(sizeof(clear_buffer), data_sz); + decrypt_cb(decrypt_state, data, clear_buffer, data_sz); + data = clear_buffer; + } + + // A maximum of 6 bits are needed to read the frame marker, profile and + // show_existing_frame. + if (data_sz < 1) + return VPX_CODEC_UNSUP_BITSTREAM; + + { + int show_frame; + int error_resilient; + struct vpx_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL }; + const int frame_marker = vpx_rb_read_literal(&rb, 2); + const BITSTREAM_PROFILE profile = vp9_read_profile(&rb); + + if (frame_marker != VP9_FRAME_MARKER) + return VPX_CODEC_UNSUP_BITSTREAM; + + if (profile >= MAX_PROFILES) + return VPX_CODEC_UNSUP_BITSTREAM; + + if (vpx_rb_read_bit(&rb)) { // show an existing frame + // If profile is > 2 and show_existing_frame is true, then at least 1 more + // byte (6+3=9 bits) is needed. + if (profile > 2 && data_sz < 2) + return VPX_CODEC_UNSUP_BITSTREAM; + vpx_rb_read_literal(&rb, 3); // Frame buffer to show. + return VPX_CODEC_OK; + } + + // For the rest of the function, a maximum of 9 more bytes are needed + // (computed by taking the maximum possible bits needed in each case). Note + // that this has to be updated if we read any more bits in this function. + if (data_sz < 10) + return VPX_CODEC_UNSUP_BITSTREAM; + + si->is_kf = !vpx_rb_read_bit(&rb); + show_frame = vpx_rb_read_bit(&rb); + error_resilient = vpx_rb_read_bit(&rb); + + if (si->is_kf) { + if (!vp9_read_sync_code(&rb)) + return VPX_CODEC_UNSUP_BITSTREAM; + + if (!parse_bitdepth_colorspace_sampling(profile, &rb)) + return VPX_CODEC_UNSUP_BITSTREAM; + vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); + } else { + intra_only_flag = show_frame ? 0 : vpx_rb_read_bit(&rb); + + rb.bit_offset += error_resilient ? 0 : 2; // reset_frame_context + + if (intra_only_flag) { + if (!vp9_read_sync_code(&rb)) + return VPX_CODEC_UNSUP_BITSTREAM; + if (profile > PROFILE_0) { + if (!parse_bitdepth_colorspace_sampling(profile, &rb)) + return VPX_CODEC_UNSUP_BITSTREAM; + } + rb.bit_offset += REF_FRAMES; // refresh_frame_flags + vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); + } + } + } + if (is_intra_only != NULL) + *is_intra_only = intra_only_flag; + return VPX_CODEC_OK; +} + +static vpx_codec_err_t decoder_peek_si(const uint8_t *data, + unsigned int data_sz, + vpx_codec_stream_info_t *si) { + return decoder_peek_si_internal(data, data_sz, si, NULL, NULL, NULL); +} + +static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx, + vpx_codec_stream_info_t *si) { + const size_t sz = (si->sz >= sizeof(vp9_stream_info_t)) + ? sizeof(vp9_stream_info_t) + : sizeof(vpx_codec_stream_info_t); + memcpy(si, &ctx->si, sz); + si->sz = (unsigned int)sz; + + return VPX_CODEC_OK; +} + +static void set_error_detail(vpx_codec_alg_priv_t *ctx, + const char *const error) { + ctx->base.err_detail = error; +} + +static vpx_codec_err_t update_error_state(vpx_codec_alg_priv_t *ctx, + const struct vpx_internal_error_info *error) { + if (error->error_code) + set_error_detail(ctx, error->has_detail ? error->detail : NULL); + + return error->error_code; +} + +static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { + int i; + + for (i = 0; i < ctx->num_frame_workers; ++i) { + VPxWorker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + VP9_COMMON *const cm = &frame_worker_data->pbi->common; + BufferPool *const pool = cm->buffer_pool; + + cm->new_fb_idx = INVALID_IDX; + cm->byte_alignment = ctx->byte_alignment; + cm->skip_loop_filter = ctx->skip_loop_filter; + + if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { + pool->get_fb_cb = ctx->get_ext_fb_cb; + pool->release_fb_cb = ctx->release_ext_fb_cb; + pool->cb_priv = ctx->ext_priv; + } else { + pool->get_fb_cb = vp9_get_frame_buffer; + pool->release_fb_cb = vp9_release_frame_buffer; + + if (vp9_alloc_internal_frame_buffers(&pool->int_frame_buffers)) + vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, + "Failed to initialize internal frame buffers"); + + pool->cb_priv = &pool->int_frame_buffers; + } + } +} + +static void set_default_ppflags(vp8_postproc_cfg_t *cfg) { + cfg->post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK; + cfg->deblocking_level = 4; + cfg->noise_level = 0; +} + +static void set_ppflags(const vpx_codec_alg_priv_t *ctx, + vp9_ppflags_t *flags) { + flags->post_proc_flag = + ctx->postproc_cfg.post_proc_flag; + + flags->deblocking_level = ctx->postproc_cfg.deblocking_level; + flags->noise_level = ctx->postproc_cfg.noise_level; +} + +static int frame_worker_hook(void *arg1, void *arg2) { + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)arg1; + const uint8_t *data = frame_worker_data->data; + (void)arg2; + + frame_worker_data->result = + vp9_receive_compressed_data(frame_worker_data->pbi, + frame_worker_data->data_size, + &data); + frame_worker_data->data_end = data; + + if (frame_worker_data->pbi->frame_parallel_decode) { + // In frame parallel decoding, a worker thread must successfully decode all + // the compressed data. + if (frame_worker_data->result != 0 || + frame_worker_data->data + frame_worker_data->data_size - 1 > data) { + VPxWorker *const worker = frame_worker_data->pbi->frame_worker_owner; + BufferPool *const pool = frame_worker_data->pbi->common.buffer_pool; + // Signal all the other threads that are waiting for this frame. + vp9_frameworker_lock_stats(worker); + frame_worker_data->frame_context_ready = 1; + lock_buffer_pool(pool); + frame_worker_data->pbi->cur_buf->buf.corrupted = 1; + unlock_buffer_pool(pool); + frame_worker_data->pbi->need_resync = 1; + vp9_frameworker_signal_stats(worker); + vp9_frameworker_unlock_stats(worker); + return 0; + } + } else if (frame_worker_data->result != 0) { + // Check decode result in serial decode. + frame_worker_data->pbi->cur_buf->buf.corrupted = 1; + frame_worker_data->pbi->need_resync = 1; + } + return !frame_worker_data->result; +} + +static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { + int i; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + + ctx->last_show_frame = -1; + ctx->next_submit_worker_id = 0; + ctx->last_submit_worker_id = 0; + ctx->next_output_worker_id = 0; + ctx->frame_cache_read = 0; + ctx->frame_cache_write = 0; + ctx->num_cache_frames = 0; + ctx->need_resync = 1; + ctx->num_frame_workers = + (ctx->frame_parallel_decode == 1) ? ctx->cfg.threads: 1; + if (ctx->num_frame_workers > MAX_DECODE_THREADS) + ctx->num_frame_workers = MAX_DECODE_THREADS; + ctx->available_threads = ctx->num_frame_workers; + ctx->flushed = 0; + + ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool)); + if (ctx->buffer_pool == NULL) + return VPX_CODEC_MEM_ERROR; + +#if CONFIG_MULTITHREAD + if (pthread_mutex_init(&ctx->buffer_pool->pool_mutex, NULL)) { + set_error_detail(ctx, "Failed to allocate buffer pool mutex"); + return VPX_CODEC_MEM_ERROR; + } +#endif + + ctx->frame_workers = (VPxWorker *) + vpx_malloc(ctx->num_frame_workers * sizeof(*ctx->frame_workers)); + if (ctx->frame_workers == NULL) { + set_error_detail(ctx, "Failed to allocate frame_workers"); + return VPX_CODEC_MEM_ERROR; + } + + for (i = 0; i < ctx->num_frame_workers; ++i) { + VPxWorker *const worker = &ctx->frame_workers[i]; + FrameWorkerData *frame_worker_data = NULL; + winterface->init(worker); + worker->data1 = vpx_memalign(32, sizeof(FrameWorkerData)); + if (worker->data1 == NULL) { + set_error_detail(ctx, "Failed to allocate frame_worker_data"); + return VPX_CODEC_MEM_ERROR; + } + frame_worker_data = (FrameWorkerData *)worker->data1; + frame_worker_data->pbi = vp9_decoder_create(ctx->buffer_pool); + if (frame_worker_data->pbi == NULL) { + set_error_detail(ctx, "Failed to allocate frame_worker_data"); + return VPX_CODEC_MEM_ERROR; + } + frame_worker_data->pbi->frame_worker_owner = worker; + frame_worker_data->worker_id = i; + frame_worker_data->scratch_buffer = NULL; + frame_worker_data->scratch_buffer_size = 0; + frame_worker_data->frame_context_ready = 0; + frame_worker_data->received_frame = 0; +#if CONFIG_MULTITHREAD + if (pthread_mutex_init(&frame_worker_data->stats_mutex, NULL)) { + set_error_detail(ctx, "Failed to allocate frame_worker_data mutex"); + return VPX_CODEC_MEM_ERROR; + } + + if (pthread_cond_init(&frame_worker_data->stats_cond, NULL)) { + set_error_detail(ctx, "Failed to allocate frame_worker_data cond"); + return VPX_CODEC_MEM_ERROR; + } +#endif + // If decoding in serial mode, FrameWorker thread could create tile worker + // thread or loopfilter thread. + frame_worker_data->pbi->max_threads = + (ctx->frame_parallel_decode == 0) ? ctx->cfg.threads : 0; + + frame_worker_data->pbi->inv_tile_order = ctx->invert_tile_order; + frame_worker_data->pbi->frame_parallel_decode = ctx->frame_parallel_decode; + frame_worker_data->pbi->common.frame_parallel_decode = + ctx->frame_parallel_decode; + worker->hook = (VPxWorkerHook)frame_worker_hook; + if (!winterface->reset(worker)) { + set_error_detail(ctx, "Frame Worker thread creation failed"); + return VPX_CODEC_MEM_ERROR; + } + } + + // If postprocessing was enabled by the application and a + // configuration has not been provided, default it. + if (!ctx->postproc_cfg_set && + (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) + set_default_ppflags(&ctx->postproc_cfg); + + init_buffer_callbacks(ctx); + + return VPX_CODEC_OK; +} + +static INLINE void check_resync(vpx_codec_alg_priv_t *const ctx, + const VP9Decoder *const pbi) { + // Clear resync flag if worker got a key frame or intra only frame. + if (ctx->need_resync == 1 && pbi->need_resync == 0 && + (pbi->common.intra_only || pbi->common.frame_type == KEY_FRAME)) + ctx->need_resync = 0; +} + +static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, + const uint8_t **data, unsigned int data_sz, + void *user_priv, int64_t deadline) { + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + (void)deadline; + + // Determine the stream parameters. Note that we rely on peek_si to + // validate that we have a buffer that does not wrap around the top + // of the heap. + if (!ctx->si.h) { + int is_intra_only = 0; + const vpx_codec_err_t res = + decoder_peek_si_internal(*data, data_sz, &ctx->si, &is_intra_only, + ctx->decrypt_cb, ctx->decrypt_state); + if (res != VPX_CODEC_OK) + return res; + + if (!ctx->si.is_kf && !is_intra_only) + return VPX_CODEC_ERROR; + } + + if (!ctx->frame_parallel_decode) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + frame_worker_data->data = *data; + frame_worker_data->data_size = data_sz; + frame_worker_data->user_priv = user_priv; + frame_worker_data->received_frame = 1; + + // Set these even if already initialized. The caller may have changed the + // decrypt config between frames. + frame_worker_data->pbi->decrypt_cb = ctx->decrypt_cb; + frame_worker_data->pbi->decrypt_state = ctx->decrypt_state; + + worker->had_error = 0; + winterface->execute(worker); + + // Update data pointer after decode. + *data = frame_worker_data->data_end; + + if (worker->had_error) + return update_error_state(ctx, &frame_worker_data->pbi->common.error); + + check_resync(ctx, frame_worker_data->pbi); + } else { + VPxWorker *const worker = &ctx->frame_workers[ctx->next_submit_worker_id]; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + // Copy context from last worker thread to next worker thread. + if (ctx->next_submit_worker_id != ctx->last_submit_worker_id) + vp9_frameworker_copy_context( + &ctx->frame_workers[ctx->next_submit_worker_id], + &ctx->frame_workers[ctx->last_submit_worker_id]); + + frame_worker_data->pbi->ready_for_new_data = 0; + // Copy the compressed data into worker's internal buffer. + // TODO(hkuang): Will all the workers allocate the same size + // as the size of the first intra frame be better? This will + // avoid too many deallocate and allocate. + if (frame_worker_data->scratch_buffer_size < data_sz) { + frame_worker_data->scratch_buffer = + (uint8_t *)vpx_realloc(frame_worker_data->scratch_buffer, data_sz); + if (frame_worker_data->scratch_buffer == NULL) { + set_error_detail(ctx, "Failed to reallocate scratch buffer"); + return VPX_CODEC_MEM_ERROR; + } + frame_worker_data->scratch_buffer_size = data_sz; + } + frame_worker_data->data_size = data_sz; + memcpy(frame_worker_data->scratch_buffer, *data, data_sz); + + frame_worker_data->frame_decoded = 0; + frame_worker_data->frame_context_ready = 0; + frame_worker_data->received_frame = 1; + frame_worker_data->data = frame_worker_data->scratch_buffer; + frame_worker_data->user_priv = user_priv; + + if (ctx->next_submit_worker_id != ctx->last_submit_worker_id) + ctx->last_submit_worker_id = + (ctx->last_submit_worker_id + 1) % ctx->num_frame_workers; + + ctx->next_submit_worker_id = + (ctx->next_submit_worker_id + 1) % ctx->num_frame_workers; + --ctx->available_threads; + worker->had_error = 0; + winterface->launch(worker); + } + + return VPX_CODEC_OK; +} + +static void wait_worker_and_cache_frame(vpx_codec_alg_priv_t *ctx) { + YV12_BUFFER_CONFIG sd; + vp9_ppflags_t flags = {0, 0, 0}; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + ctx->next_output_worker_id = + (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; + // TODO(hkuang): Add worker error handling here. + winterface->sync(worker); + frame_worker_data->received_frame = 0; + ++ctx->available_threads; + + check_resync(ctx, frame_worker_data->pbi); + + if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { + VP9_COMMON *const cm = &frame_worker_data->pbi->common; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + ctx->frame_cache[ctx->frame_cache_write].fb_idx = cm->new_fb_idx; + yuvconfig2image(&ctx->frame_cache[ctx->frame_cache_write].img, &sd, + frame_worker_data->user_priv); + ctx->frame_cache[ctx->frame_cache_write].img.fb_priv = + frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; + ctx->frame_cache_write = + (ctx->frame_cache_write + 1) % FRAME_CACHE_SIZE; + ++ctx->num_cache_frames; + } +} + +static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, + const uint8_t *data, unsigned int data_sz, + void *user_priv, long deadline) { + const uint8_t *data_start = data; + const uint8_t * const data_end = data + data_sz; + vpx_codec_err_t res; + uint32_t frame_sizes[8]; + int frame_count; + + if (data == NULL && data_sz == 0) { + ctx->flushed = 1; + return VPX_CODEC_OK; + } + + // Reset flushed when receiving a valid frame. + ctx->flushed = 0; + + // Initialize the decoder workers on the first frame. + if (ctx->frame_workers == NULL) { + const vpx_codec_err_t res = init_decoder(ctx); + if (res != VPX_CODEC_OK) + return res; + } + + res = vp9_parse_superframe_index(data, data_sz, frame_sizes, &frame_count, + ctx->decrypt_cb, ctx->decrypt_state); + if (res != VPX_CODEC_OK) + return res; + + if (ctx->frame_parallel_decode) { + // Decode in frame parallel mode. When decoding in this mode, the frame + // passed to the decoder must be either a normal frame or a superframe with + // superframe index so the decoder could get each frame's start position + // in the superframe. + if (frame_count > 0) { + int i; + + for (i = 0; i < frame_count; ++i) { + const uint8_t *data_start_copy = data_start; + const uint32_t frame_size = frame_sizes[i]; + if (data_start < data + || frame_size > (uint32_t) (data_end - data_start)) { + set_error_detail(ctx, "Invalid frame size in index"); + return VPX_CODEC_CORRUPT_FRAME; + } + + if (ctx->available_threads == 0) { + // No more threads for decoding. Wait until the next output worker + // finishes decoding. Then copy the decoded frame into cache. + if (ctx->num_cache_frames < FRAME_CACHE_SIZE) { + wait_worker_and_cache_frame(ctx); + } else { + // TODO(hkuang): Add unit test to test this path. + set_error_detail(ctx, "Frame output cache is full."); + return VPX_CODEC_ERROR; + } + } + + res = decode_one(ctx, &data_start_copy, frame_size, user_priv, + deadline); + if (res != VPX_CODEC_OK) + return res; + data_start += frame_size; + } + } else { + if (ctx->available_threads == 0) { + // No more threads for decoding. Wait until the next output worker + // finishes decoding. Then copy the decoded frame into cache. + if (ctx->num_cache_frames < FRAME_CACHE_SIZE) { + wait_worker_and_cache_frame(ctx); + } else { + // TODO(hkuang): Add unit test to test this path. + set_error_detail(ctx, "Frame output cache is full."); + return VPX_CODEC_ERROR; + } + } + + res = decode_one(ctx, &data, data_sz, user_priv, deadline); + if (res != VPX_CODEC_OK) + return res; + } + } else { + // Decode in serial mode. + if (frame_count > 0) { + int i; + + for (i = 0; i < frame_count; ++i) { + const uint8_t *data_start_copy = data_start; + const uint32_t frame_size = frame_sizes[i]; + vpx_codec_err_t res; + if (data_start < data + || frame_size > (uint32_t) (data_end - data_start)) { + set_error_detail(ctx, "Invalid frame size in index"); + return VPX_CODEC_CORRUPT_FRAME; + } + + res = decode_one(ctx, &data_start_copy, frame_size, user_priv, + deadline); + if (res != VPX_CODEC_OK) + return res; + + data_start += frame_size; + } + } else { + while (data_start < data_end) { + const uint32_t frame_size = (uint32_t) (data_end - data_start); + const vpx_codec_err_t res = decode_one(ctx, &data_start, frame_size, + user_priv, deadline); + if (res != VPX_CODEC_OK) + return res; + + // Account for suboptimal termination by the encoder. + while (data_start < data_end) { + const uint8_t marker = read_marker(ctx->decrypt_cb, + ctx->decrypt_state, data_start); + if (marker) + break; + ++data_start; + } + } + } + } + + return res; +} + +static void release_last_output_frame(vpx_codec_alg_priv_t *ctx) { + RefCntBuffer *const frame_bufs = ctx->buffer_pool->frame_bufs; + // Decrease reference count of last output frame in frame parallel mode. + if (ctx->frame_parallel_decode && ctx->last_show_frame >= 0) { + BufferPool *const pool = ctx->buffer_pool; + lock_buffer_pool(pool); + decrease_ref_count(ctx->last_show_frame, frame_bufs, pool); + unlock_buffer_pool(pool); + } +} + +static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, + vpx_codec_iter_t *iter) { + vpx_image_t *img = NULL; + + // Only return frame when all the cpu are busy or + // application fluhsed the decoder in frame parallel decode. + if (ctx->frame_parallel_decode && ctx->available_threads > 0 && + !ctx->flushed) { + return NULL; + } + + // Output the frames in the cache first. + if (ctx->num_cache_frames > 0) { + release_last_output_frame(ctx); + ctx->last_show_frame = ctx->frame_cache[ctx->frame_cache_read].fb_idx; + if (ctx->need_resync) + return NULL; + img = &ctx->frame_cache[ctx->frame_cache_read].img; + ctx->frame_cache_read = (ctx->frame_cache_read + 1) % FRAME_CACHE_SIZE; + --ctx->num_cache_frames; + return img; + } + + // iter acts as a flip flop, so an image is only returned on the first + // call to get_frame. + if (*iter == NULL && ctx->frame_workers != NULL) { + do { + YV12_BUFFER_CONFIG sd; + vp9_ppflags_t flags = {0, 0, 0}; + const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); + VPxWorker *const worker = + &ctx->frame_workers[ctx->next_output_worker_id]; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + ctx->next_output_worker_id = + (ctx->next_output_worker_id + 1) % ctx->num_frame_workers; + if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) + set_ppflags(ctx, &flags); + // Wait for the frame from worker thread. + if (winterface->sync(worker)) { + // Check if worker has received any frames. + if (frame_worker_data->received_frame == 1) { + ++ctx->available_threads; + frame_worker_data->received_frame = 0; + check_resync(ctx, frame_worker_data->pbi); + } + if (vp9_get_raw_frame(frame_worker_data->pbi, &sd, &flags) == 0) { + VP9_COMMON *const cm = &frame_worker_data->pbi->common; + RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; + release_last_output_frame(ctx); + ctx->last_show_frame = frame_worker_data->pbi->common.new_fb_idx; + if (ctx->need_resync) + return NULL; + yuvconfig2image(&ctx->img, &sd, frame_worker_data->user_priv); + ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; + img = &ctx->img; + return img; + } + } else { + // Decoding failed. Release the worker thread. + frame_worker_data->received_frame = 0; + ++ctx->available_threads; + ctx->need_resync = 1; + if (ctx->flushed != 1) + return NULL; + } + } while (ctx->next_output_worker_id != ctx->next_submit_worker_id); + } + return NULL; +} + +static vpx_codec_err_t decoder_set_fb_fn( + vpx_codec_alg_priv_t *ctx, + vpx_get_frame_buffer_cb_fn_t cb_get, + vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { + if (cb_get == NULL || cb_release == NULL) { + return VPX_CODEC_INVALID_PARAM; + } else if (ctx->frame_workers == NULL) { + // If the decoder has already been initialized, do not accept changes to + // the frame buffer functions. + ctx->get_ext_fb_cb = cb_get; + ctx->release_ext_fb_cb = cb_release; + ctx->ext_priv = cb_priv; + return VPX_CODEC_OK; + } + + return VPX_CODEC_ERROR; +} + +static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, + va_list args) { + vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (data) { + vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data; + YV12_BUFFER_CONFIG sd; + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + image2yuvconfig(&frame->img, &sd); + return vp9_set_reference_dec(&frame_worker_data->pbi->common, + (VP9_REFFRAME)frame->frame_type, &sd); + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + +static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, + va_list args) { + vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (data) { + vpx_ref_frame_t *frame = (vpx_ref_frame_t *) data; + YV12_BUFFER_CONFIG sd; + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + image2yuvconfig(&frame->img, &sd); + return vp9_copy_reference_dec(frame_worker_data->pbi, + (VP9_REFFRAME)frame->frame_type, &sd); + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + +static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, + va_list args) { + vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (data) { + YV12_BUFFER_CONFIG* fb; + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + fb = get_ref_frame(&frame_worker_data->pbi->common, data->idx); + if (fb == NULL) return VPX_CODEC_ERROR; + yuvconfig2image(&data->img, fb, NULL); + return VPX_CODEC_OK; + } else { + return VPX_CODEC_INVALID_PARAM; + } +} + +static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, + va_list args) { +#if CONFIG_VP9_POSTPROC + vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); + + if (data) { + ctx->postproc_cfg_set = 1; + ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); + return VPX_CODEC_OK; + } else { + return VPX_CODEC_INVALID_PARAM; + } +#else + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +#endif +} + +static vpx_codec_err_t ctrl_set_dbg_options(vpx_codec_alg_priv_t *ctx, + va_list args) { + (void)ctx; + (void)args; + return VPX_CODEC_INCAPABLE; +} + +static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const update_info = va_arg(args, int *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (update_info) { + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + *update_info = frame_worker_data->pbi->refresh_frame_flags; + return VPX_CODEC_OK; + } else { + return VPX_CODEC_ERROR; + } + } + + return VPX_CODEC_INVALID_PARAM; +} + +static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *corrupted = va_arg(args, int *); + + if (corrupted) { + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + RefCntBuffer *const frame_bufs = + frame_worker_data->pbi->common.buffer_pool->frame_bufs; + if (frame_worker_data->pbi->common.frame_to_show == NULL) + return VPX_CODEC_ERROR; + if (ctx->last_show_frame >= 0) + *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted; + return VPX_CODEC_OK; + } else { + return VPX_CODEC_ERROR; + } + } + + return VPX_CODEC_INVALID_PARAM; +} + +static vpx_codec_err_t ctrl_get_frame_size(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const frame_size = va_arg(args, int *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (frame_size) { + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + const VP9_COMMON *const cm = &frame_worker_data->pbi->common; + frame_size[0] = cm->width; + frame_size[1] = cm->height; + return VPX_CODEC_OK; + } else { + return VPX_CODEC_ERROR; + } + } + + return VPX_CODEC_INVALID_PARAM; +} + +static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx, + va_list args) { + int *const render_size = va_arg(args, int *); + + // Only support this function in serial decode. + if (ctx->frame_parallel_decode) { + set_error_detail(ctx, "Not supported in frame parallel decode"); + return VPX_CODEC_INCAPABLE; + } + + if (render_size) { + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + const VP9_COMMON *const cm = &frame_worker_data->pbi->common; + render_size[0] = cm->render_width; + render_size[1] = cm->render_height; + return VPX_CODEC_OK; + } else { + return VPX_CODEC_ERROR; + } + } + + return VPX_CODEC_INVALID_PARAM; +} + +static vpx_codec_err_t ctrl_get_bit_depth(vpx_codec_alg_priv_t *ctx, + va_list args) { + unsigned int *const bit_depth = va_arg(args, unsigned int *); + VPxWorker *const worker = &ctx->frame_workers[ctx->next_output_worker_id]; + + if (bit_depth) { + if (worker) { + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + const VP9_COMMON *const cm = &frame_worker_data->pbi->common; + *bit_depth = cm->bit_depth; + return VPX_CODEC_OK; + } else { + return VPX_CODEC_ERROR; + } + } + + return VPX_CODEC_INVALID_PARAM; +} + +static vpx_codec_err_t ctrl_set_invert_tile_order(vpx_codec_alg_priv_t *ctx, + va_list args) { + ctx->invert_tile_order = va_arg(args, int); + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_set_decryptor(vpx_codec_alg_priv_t *ctx, + va_list args) { + vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *); + ctx->decrypt_cb = init ? init->decrypt_cb : NULL; + ctx->decrypt_state = init ? init->decrypt_state : NULL; + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_set_byte_alignment(vpx_codec_alg_priv_t *ctx, + va_list args) { + const int legacy_byte_alignment = 0; + const int min_byte_alignment = 32; + const int max_byte_alignment = 1024; + const int byte_alignment = va_arg(args, int); + + if (byte_alignment != legacy_byte_alignment && + (byte_alignment < min_byte_alignment || + byte_alignment > max_byte_alignment || + (byte_alignment & (byte_alignment - 1)) != 0)) + return VPX_CODEC_INVALID_PARAM; + + ctx->byte_alignment = byte_alignment; + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = + (FrameWorkerData *)worker->data1; + frame_worker_data->pbi->common.byte_alignment = byte_alignment; + } + return VPX_CODEC_OK; +} + +static vpx_codec_err_t ctrl_set_skip_loop_filter(vpx_codec_alg_priv_t *ctx, + va_list args) { + ctx->skip_loop_filter = va_arg(args, int); + + if (ctx->frame_workers) { + VPxWorker *const worker = ctx->frame_workers; + FrameWorkerData *const frame_worker_data = (FrameWorkerData *)worker->data1; + frame_worker_data->pbi->common.skip_loop_filter = ctx->skip_loop_filter; + } + + return VPX_CODEC_OK; +} + +static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { + {VP8_COPY_REFERENCE, ctrl_copy_reference}, + + // Setters + {VP8_SET_REFERENCE, ctrl_set_reference}, + {VP8_SET_POSTPROC, ctrl_set_postproc}, + {VP8_SET_DBG_COLOR_REF_FRAME, ctrl_set_dbg_options}, + {VP8_SET_DBG_COLOR_MB_MODES, ctrl_set_dbg_options}, + {VP8_SET_DBG_COLOR_B_MODES, ctrl_set_dbg_options}, + {VP8_SET_DBG_DISPLAY_MV, ctrl_set_dbg_options}, + {VP9_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order}, + {VPXD_SET_DECRYPTOR, ctrl_set_decryptor}, + {VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment}, + {VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter}, + + // Getters + {VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates}, + {VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted}, + {VP9_GET_REFERENCE, ctrl_get_reference}, + {VP9D_GET_DISPLAY_SIZE, ctrl_get_render_size}, + {VP9D_GET_BIT_DEPTH, ctrl_get_bit_depth}, + {VP9D_GET_FRAME_SIZE, ctrl_get_frame_size}, + + { -1, NULL}, +}; + +#ifndef VERSION_STRING +#define VERSION_STRING +#endif +CODEC_INTERFACE(vpx_codec_vp9_dx) = { + "WebM Project VP9 Decoder" VERSION_STRING, + VPX_CODEC_INTERNAL_ABI_VERSION, + VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC | + VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // vpx_codec_caps_t + decoder_init, // vpx_codec_init_fn_t + decoder_destroy, // vpx_codec_destroy_fn_t + decoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t + { // NOLINT + decoder_peek_si, // vpx_codec_peek_si_fn_t + decoder_get_si, // vpx_codec_get_si_fn_t + decoder_decode, // vpx_codec_decode_fn_t + decoder_get_frame, // vpx_codec_frame_get_fn_t + decoder_set_fb_fn, // vpx_codec_set_fb_fn_t + }, + { // NOLINT + 0, + NULL, // vpx_codec_enc_cfg_map_t + NULL, // vpx_codec_encode_fn_t + NULL, // vpx_codec_get_cx_data_fn_t + NULL, // vpx_codec_enc_config_set_fn_t + NULL, // vpx_codec_get_global_headers_fn_t + NULL, // vpx_codec_get_preview_frame_fn_t + NULL // vpx_codec_enc_mr_get_mem_loc_fn_t + } +}; diff --git a/thirdparty/libvpx/vp9/vp9_dx_iface.h b/thirdparty/libvpx/vp9/vp9_dx_iface.h new file mode 100644 index 0000000000..e0e948e16c --- /dev/null +++ b/thirdparty/libvpx/vp9/vp9_dx_iface.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2015 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef VP9_VP9_DX_IFACE_H_ +#define VP9_VP9_DX_IFACE_H_ + +#include "vp9/decoder/vp9_decoder.h" + +typedef vpx_codec_stream_info_t vp9_stream_info_t; + +// This limit is due to framebuffer numbers. +// TODO(hkuang): Remove this limit after implementing ondemand framebuffers. +#define FRAME_CACHE_SIZE 6 // Cache maximum 6 decoded frames. + +typedef struct cache_frame { + int fb_idx; + vpx_image_t img; +} cache_frame; + +struct vpx_codec_alg_priv { + vpx_codec_priv_t base; + vpx_codec_dec_cfg_t cfg; + vp9_stream_info_t si; + int postproc_cfg_set; + vp8_postproc_cfg_t postproc_cfg; + vpx_decrypt_cb decrypt_cb; + void *decrypt_state; + vpx_image_t img; + int img_avail; + int flushed; + int invert_tile_order; + int last_show_frame; // Index of last output frame. + int byte_alignment; + int skip_loop_filter; + + // Frame parallel related. + int frame_parallel_decode; // frame-based threading. + VPxWorker *frame_workers; + int num_frame_workers; + int next_submit_worker_id; + int last_submit_worker_id; + int next_output_worker_id; + int available_threads; + cache_frame frame_cache[FRAME_CACHE_SIZE]; + int frame_cache_write; + int frame_cache_read; + int num_cache_frames; + int need_resync; // wait for key/intra-only frame + // BufferPool that holds all reference frames. Shared by all the FrameWorkers. + BufferPool *buffer_pool; + + // External frame buffer info to save for VP9 common. + void *ext_priv; // Private data associated with the external frame buffers. + vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb; + vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb; +}; + +#endif // VP9_VP9_DX_IFACE_H_ diff --git a/thirdparty/libvpx/vp9/vp9_iface_common.h b/thirdparty/libvpx/vp9/vp9_iface_common.h new file mode 100644 index 0000000000..938d4224ba --- /dev/null +++ b/thirdparty/libvpx/vp9/vp9_iface_common.h @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2013 The WebM project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef VP9_VP9_IFACE_COMMON_H_ +#define VP9_VP9_IFACE_COMMON_H_ + +#include "vpx_ports/mem.h" + +static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, + void *user_priv) { + /** vpx_img_wrap() doesn't allow specifying independent strides for + * the Y, U, and V planes, nor other alignment adjustments that + * might be representable by a YV12_BUFFER_CONFIG, so we just + * initialize all the fields.*/ + int bps; + if (!yv12->subsampling_y) { + if (!yv12->subsampling_x) { + img->fmt = VPX_IMG_FMT_I444; + bps = 24; + } else { + img->fmt = VPX_IMG_FMT_I422; + bps = 16; + } + } else { + if (!yv12->subsampling_x) { + img->fmt = VPX_IMG_FMT_I440; + bps = 16; + } else { + img->fmt = VPX_IMG_FMT_I420; + bps = 12; + } + } + img->cs = yv12->color_space; + img->range = yv12->color_range; + img->bit_depth = 8; + img->w = yv12->y_stride; + img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); + img->d_w = yv12->y_crop_width; + img->d_h = yv12->y_crop_height; + img->r_w = yv12->render_width; + img->r_h = yv12->render_height; + img->x_chroma_shift = yv12->subsampling_x; + img->y_chroma_shift = yv12->subsampling_y; + img->planes[VPX_PLANE_Y] = yv12->y_buffer; + img->planes[VPX_PLANE_U] = yv12->u_buffer; + img->planes[VPX_PLANE_V] = yv12->v_buffer; + img->planes[VPX_PLANE_ALPHA] = NULL; + img->stride[VPX_PLANE_Y] = yv12->y_stride; + img->stride[VPX_PLANE_U] = yv12->uv_stride; + img->stride[VPX_PLANE_V] = yv12->uv_stride; + img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; +#if CONFIG_VP9_HIGHBITDEPTH + if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) { + // vpx_image_t uses byte strides and a pointer to the first byte + // of the image. + img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH); + img->bit_depth = yv12->bit_depth; + img->planes[VPX_PLANE_Y] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->y_buffer); + img->planes[VPX_PLANE_U] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->u_buffer); + img->planes[VPX_PLANE_V] = (uint8_t*)CONVERT_TO_SHORTPTR(yv12->v_buffer); + img->planes[VPX_PLANE_ALPHA] = NULL; + img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride; + img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride; + img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride; + img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride; + } +#endif // CONFIG_VP9_HIGHBITDEPTH + img->bps = bps; + img->user_priv = user_priv; + img->img_data = yv12->buffer_alloc; + img->img_data_owner = 0; + img->self_allocd = 0; +} + +static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, + YV12_BUFFER_CONFIG *yv12) { + yv12->y_buffer = img->planes[VPX_PLANE_Y]; + yv12->u_buffer = img->planes[VPX_PLANE_U]; + yv12->v_buffer = img->planes[VPX_PLANE_V]; + + yv12->y_crop_width = img->d_w; + yv12->y_crop_height = img->d_h; + yv12->render_width = img->r_w; + yv12->render_height = img->r_h; + yv12->y_width = img->d_w; + yv12->y_height = img->d_h; + + yv12->uv_width = img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 + : yv12->y_width; + yv12->uv_height = img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 + : yv12->y_height; + yv12->uv_crop_width = yv12->uv_width; + yv12->uv_crop_height = yv12->uv_height; + + yv12->y_stride = img->stride[VPX_PLANE_Y]; + yv12->uv_stride = img->stride[VPX_PLANE_U]; + yv12->color_space = img->cs; + yv12->color_range = img->range; + +#if CONFIG_VP9_HIGHBITDEPTH + if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { + // In vpx_image_t + // planes point to uint8 address of start of data + // stride counts uint8s to reach next row + // In YV12_BUFFER_CONFIG + // y_buffer, u_buffer, v_buffer point to uint16 address of data + // stride and border counts in uint16s + // This means that all the address calculations in the main body of code + // should work correctly. + // However, before we do any pixel operations we need to cast the address + // to a uint16 ponter and double its value. + yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer); + yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer); + yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer); + yv12->y_stride >>= 1; + yv12->uv_stride >>= 1; + yv12->flags = YV12_FLAG_HIGHBITDEPTH; + } else { + yv12->flags = 0; + } + yv12->border = (yv12->y_stride - img->w) / 2; +#else + yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; +#endif // CONFIG_VP9_HIGHBITDEPTH + yv12->subsampling_x = img->x_chroma_shift; + yv12->subsampling_y = img->y_chroma_shift; + return VPX_CODEC_OK; +} + +#endif // VP9_VP9_IFACE_COMMON_H_ |