Source code
Revision control
Copy as Markdown
Other Tools
/*
* VP9 compatible video decoder
*
* Copyright (C) 2013 Ronald S. Bultje <rsbultje gmail com>
* Copyright (C) 2013 Clément Bœsch <u pkh me>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/common.h"
#include "bit_depth_template.c"
#include "vp9dsp.h"
#if BIT_DEPTH != 12
// FIXME see whether we can merge parts of this (perhaps at least 4x4 and 8x8)
// back with h264pred.[ch]
static void vert_4x4_c(uint8_t *restrict _dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 p4 = AV_RN4PA(top);
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, p4);
AV_WN4PA(dst + stride * 1, p4);
AV_WN4PA(dst + stride * 2, p4);
AV_WN4PA(dst + stride * 3, p4);
}
static void vert_8x8_c(uint8_t *restrict _dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
#if BIT_DEPTH == 8
uint64_t p8 = AV_RN64A(top);
#else
pixel4 p4a = AV_RN4PA(top + 0);
pixel4 p4b = AV_RN4PA(top + 4);
#endif
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
#if BIT_DEPTH == 8
AV_WN64A(dst, p8);
#else
AV_WN4PA(dst + 0, p4a);
AV_WN4PA(dst + 4, p4b);
#endif
dst += stride;
}
}
static void vert_16x16_c(uint8_t *restrict _dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
#if BIT_DEPTH == 8
uint64_t p8a = AV_RN64A(top);
uint64_t p8b = AV_RN64A(top + 8);
#else
pixel4 p4a = AV_RN4PA(top + 0);
pixel4 p4b = AV_RN4PA(top + 4);
pixel4 p4c = AV_RN4PA(top + 8);
pixel4 p4d = AV_RN4PA(top + 12);
#endif
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
#if BIT_DEPTH == 8
AV_WN64A(dst + 0, p8a);
AV_WN64A(dst + 8, p8b);
#else
AV_WN4PA(dst + 0, p4a);
AV_WN4PA(dst + 4, p4b);
AV_WN4PA(dst + 8, p4c);
AV_WN4PA(dst + 12, p4d);
#endif
dst += stride;
}
}
static void vert_32x32_c(uint8_t *restrict _dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
#if BIT_DEPTH == 8
uint64_t p8a = AV_RN64A(top);
uint64_t p8b = AV_RN64A(top + 8);
uint64_t p8c = AV_RN64A(top + 16);
uint64_t p8d = AV_RN64A(top + 24);
#else
pixel4 p4a = AV_RN4PA(top + 0);
pixel4 p4b = AV_RN4PA(top + 4);
pixel4 p4c = AV_RN4PA(top + 8);
pixel4 p4d = AV_RN4PA(top + 12);
pixel4 p4e = AV_RN4PA(top + 16);
pixel4 p4f = AV_RN4PA(top + 20);
pixel4 p4g = AV_RN4PA(top + 24);
pixel4 p4h = AV_RN4PA(top + 28);
#endif
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
#if BIT_DEPTH == 8
AV_WN64A(dst + 0, p8a);
AV_WN64A(dst + 8, p8b);
AV_WN64A(dst + 16, p8c);
AV_WN64A(dst + 24, p8d);
#else
AV_WN4PA(dst + 0, p4a);
AV_WN4PA(dst + 4, p4b);
AV_WN4PA(dst + 8, p4c);
AV_WN4PA(dst + 12, p4d);
AV_WN4PA(dst + 16, p4e);
AV_WN4PA(dst + 20, p4f);
AV_WN4PA(dst + 24, p4g);
AV_WN4PA(dst + 28, p4h);
#endif
dst += stride;
}
}
static void hor_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, PIXEL_SPLAT_X4(left[3]));
AV_WN4PA(dst + stride * 1, PIXEL_SPLAT_X4(left[2]));
AV_WN4PA(dst + stride * 2, PIXEL_SPLAT_X4(left[1]));
AV_WN4PA(dst + stride * 3, PIXEL_SPLAT_X4(left[0]));
}
static void hor_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
pixel4 p4 = PIXEL_SPLAT_X4(left[7 - y]);
AV_WN4PA(dst + 0, p4);
AV_WN4PA(dst + 4, p4);
dst += stride;
}
}
static void hor_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
pixel4 p4 = PIXEL_SPLAT_X4(left[15 - y]);
AV_WN4PA(dst + 0, p4);
AV_WN4PA(dst + 4, p4);
AV_WN4PA(dst + 8, p4);
AV_WN4PA(dst + 12, p4);
dst += stride;
}
}
static void hor_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
pixel4 p4 = PIXEL_SPLAT_X4(left[31 - y]);
AV_WN4PA(dst + 0, p4);
AV_WN4PA(dst + 4, p4);
AV_WN4PA(dst + 8, p4);
AV_WN4PA(dst + 12, p4);
AV_WN4PA(dst + 16, p4);
AV_WN4PA(dst + 20, p4);
AV_WN4PA(dst + 24, p4);
AV_WN4PA(dst + 28, p4);
dst += stride;
}
}
#endif /* BIT_DEPTH != 12 */
static void tm_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
int y, tl = top[-1];
stride /= sizeof(pixel);
for (y = 0; y < 4; y++) {
int l_m_tl = left[3 - y] - tl;
dst[0] = av_clip_pixel(top[0] + l_m_tl);
dst[1] = av_clip_pixel(top[1] + l_m_tl);
dst[2] = av_clip_pixel(top[2] + l_m_tl);
dst[3] = av_clip_pixel(top[3] + l_m_tl);
dst += stride;
}
}
static void tm_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
int y, tl = top[-1];
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
int l_m_tl = left[7 - y] - tl;
dst[0] = av_clip_pixel(top[0] + l_m_tl);
dst[1] = av_clip_pixel(top[1] + l_m_tl);
dst[2] = av_clip_pixel(top[2] + l_m_tl);
dst[3] = av_clip_pixel(top[3] + l_m_tl);
dst[4] = av_clip_pixel(top[4] + l_m_tl);
dst[5] = av_clip_pixel(top[5] + l_m_tl);
dst[6] = av_clip_pixel(top[6] + l_m_tl);
dst[7] = av_clip_pixel(top[7] + l_m_tl);
dst += stride;
}
}
static void tm_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
int y, tl = top[-1];
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
int l_m_tl = left[15 - y] - tl;
dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl);
dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl);
dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl);
dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl);
dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl);
dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl);
dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl);
dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl);
dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl);
dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl);
dst[10] = av_clip_pixel(top[10] + l_m_tl);
dst[11] = av_clip_pixel(top[11] + l_m_tl);
dst[12] = av_clip_pixel(top[12] + l_m_tl);
dst[13] = av_clip_pixel(top[13] + l_m_tl);
dst[14] = av_clip_pixel(top[14] + l_m_tl);
dst[15] = av_clip_pixel(top[15] + l_m_tl);
dst += stride;
}
}
static void tm_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
int y, tl = top[-1];
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
int l_m_tl = left[31 - y] - tl;
dst[ 0] = av_clip_pixel(top[ 0] + l_m_tl);
dst[ 1] = av_clip_pixel(top[ 1] + l_m_tl);
dst[ 2] = av_clip_pixel(top[ 2] + l_m_tl);
dst[ 3] = av_clip_pixel(top[ 3] + l_m_tl);
dst[ 4] = av_clip_pixel(top[ 4] + l_m_tl);
dst[ 5] = av_clip_pixel(top[ 5] + l_m_tl);
dst[ 6] = av_clip_pixel(top[ 6] + l_m_tl);
dst[ 7] = av_clip_pixel(top[ 7] + l_m_tl);
dst[ 8] = av_clip_pixel(top[ 8] + l_m_tl);
dst[ 9] = av_clip_pixel(top[ 9] + l_m_tl);
dst[10] = av_clip_pixel(top[10] + l_m_tl);
dst[11] = av_clip_pixel(top[11] + l_m_tl);
dst[12] = av_clip_pixel(top[12] + l_m_tl);
dst[13] = av_clip_pixel(top[13] + l_m_tl);
dst[14] = av_clip_pixel(top[14] + l_m_tl);
dst[15] = av_clip_pixel(top[15] + l_m_tl);
dst[16] = av_clip_pixel(top[16] + l_m_tl);
dst[17] = av_clip_pixel(top[17] + l_m_tl);
dst[18] = av_clip_pixel(top[18] + l_m_tl);
dst[19] = av_clip_pixel(top[19] + l_m_tl);
dst[20] = av_clip_pixel(top[20] + l_m_tl);
dst[21] = av_clip_pixel(top[21] + l_m_tl);
dst[22] = av_clip_pixel(top[22] + l_m_tl);
dst[23] = av_clip_pixel(top[23] + l_m_tl);
dst[24] = av_clip_pixel(top[24] + l_m_tl);
dst[25] = av_clip_pixel(top[25] + l_m_tl);
dst[26] = av_clip_pixel(top[26] + l_m_tl);
dst[27] = av_clip_pixel(top[27] + l_m_tl);
dst[28] = av_clip_pixel(top[28] + l_m_tl);
dst[29] = av_clip_pixel(top[29] + l_m_tl);
dst[30] = av_clip_pixel(top[30] + l_m_tl);
dst[31] = av_clip_pixel(top[31] + l_m_tl);
dst += stride;
}
}
#if BIT_DEPTH != 12
static void dc_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] +
top[0] + top[1] + top[2] + top[3] + 4) >> 3);
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, dc);
AV_WN4PA(dst + stride * 1, dc);
AV_WN4PA(dst + stride * 2, dc);
AV_WN4PA(dst + stride * 3, dc);
}
static void dc_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
left[6] + left[7] + top[0] + top[1] + top[2] + top[3] +
top[4] + top[5] + top[6] + top[7] + 8) >> 4);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
dst += stride;
}
}
static void dc_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
left[13] + left[14] + left[15] + top[0] + top[1] + top[2] + top[3] +
top[4] + top[5] + top[6] + top[7] + top[8] + top[9] + top[10] +
top[11] + top[12] + top[13] + top[14] + top[15] + 16) >> 5);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
dst += stride;
}
}
static void dc_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] + left[6] +
left[7] + left[8] + left[9] + left[10] + left[11] + left[12] +
left[13] + left[14] + left[15] + left[16] + left[17] + left[18] +
left[19] + left[20] + left[21] + left[22] + left[23] + left[24] +
left[25] + left[26] + left[27] + left[28] + left[29] + left[30] +
left[31] + top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
top[6] + top[7] + top[8] + top[9] + top[10] + top[11] + top[12] +
top[13] + top[14] + top[15] + top[16] + top[17] + top[18] + top[19] +
top[20] + top[21] + top[22] + top[23] + top[24] + top[25] + top[26] +
top[27] + top[28] + top[29] + top[30] + top[31] + 32) >> 6);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
AV_WN4PA(dst + 16, dc);
AV_WN4PA(dst + 20, dc);
AV_WN4PA(dst + 24, dc);
AV_WN4PA(dst + 28, dc);
dst += stride;
}
}
static void dc_left_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
pixel4 dc = PIXEL_SPLAT_X4((left[0] + left[1] + left[2] + left[3] + 2) >> 2);
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, dc);
AV_WN4PA(dst + stride * 1, dc);
AV_WN4PA(dst + stride * 2, dc);
AV_WN4PA(dst + stride * 3, dc);
}
static void dc_left_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] +
left[4] + left[5] + left[6] + left[7] + 4) >> 3);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
dst += stride;
}
}
static void dc_left_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
left[12] + left[13] + left[14] + left[15] + 8) >> 4);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
dst += stride;
}
}
static void dc_left_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *_left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
const pixel *left = (const pixel *) _left;
pixel4 dc = PIXEL_SPLAT_X4
((left[0] + left[1] + left[2] + left[3] + left[4] + left[5] +
left[6] + left[7] + left[8] + left[9] + left[10] + left[11] +
left[12] + left[13] + left[14] + left[15] + left[16] + left[17] +
left[18] + left[19] + left[20] + left[21] + left[22] + left[23] +
left[24] + left[25] + left[26] + left[27] + left[28] + left[29] +
left[30] + left[31] + 16) >> 5);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
AV_WN4PA(dst + 16, dc);
AV_WN4PA(dst + 20, dc);
AV_WN4PA(dst + 24, dc);
AV_WN4PA(dst + 28, dc);
dst += stride;
}
}
static void dc_top_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4((top[0] + top[1] + top[2] + top[3] + 2) >> 2);
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, dc);
AV_WN4PA(dst + stride * 1, dc);
AV_WN4PA(dst + stride * 2, dc);
AV_WN4PA(dst + stride * 3, dc);
}
static void dc_top_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((top[0] + top[1] + top[2] + top[3] +
top[4] + top[5] + top[6] + top[7] + 4) >> 3);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
dst += stride;
}
}
static void dc_top_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
top[12] + top[13] + top[14] + top[15] + 8) >> 4);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
dst += stride;
}
}
static void dc_top_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *_top)
{
pixel *dst = (pixel *) _dst;
const pixel *top = (const pixel *) _top;
pixel4 dc = PIXEL_SPLAT_X4
((top[0] + top[1] + top[2] + top[3] + top[4] + top[5] +
top[6] + top[7] + top[8] + top[9] + top[10] + top[11] +
top[12] + top[13] + top[14] + top[15] + top[16] + top[17] +
top[18] + top[19] + top[20] + top[21] + top[22] + top[23] +
top[24] + top[25] + top[26] + top[27] + top[28] + top[29] +
top[30] + top[31] + 16) >> 5);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, dc);
AV_WN4PA(dst + 4, dc);
AV_WN4PA(dst + 8, dc);
AV_WN4PA(dst + 12, dc);
AV_WN4PA(dst + 16, dc);
AV_WN4PA(dst + 20, dc);
AV_WN4PA(dst + 24, dc);
AV_WN4PA(dst + 28, dc);
dst += stride;
}
}
#endif /* BIT_DEPTH != 12 */
static void dc_128_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, val);
AV_WN4PA(dst + stride * 1, val);
AV_WN4PA(dst + stride * 2, val);
AV_WN4PA(dst + stride * 3, val);
}
static void dc_128_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
dst += stride;
}
}
static void dc_128_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
AV_WN4PA(dst + 8, val);
AV_WN4PA(dst + 12, val);
dst += stride;
}
}
static void dc_128_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4(128 << (BIT_DEPTH - 8));
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
AV_WN4PA(dst + 8, val);
AV_WN4PA(dst + 12, val);
AV_WN4PA(dst + 16, val);
AV_WN4PA(dst + 20, val);
AV_WN4PA(dst + 24, val);
AV_WN4PA(dst + 28, val);
dst += stride;
}
}
static void dc_127_4x4_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
stride /= sizeof(pixel);
AV_WN4PA(dst + stride * 0, val);
AV_WN4PA(dst + stride * 1, val);
AV_WN4PA(dst + stride * 2, val);
AV_WN4PA(dst + stride * 3, val);}
static void dc_127_8x8_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 8; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
dst += stride;
}
}
static void dc_127_16x16_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 16; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
AV_WN4PA(dst + 8, val);
AV_WN4PA(dst + 12, val);
dst += stride;
}
}
static void dc_127_32x32_c(uint8_t *_dst, ptrdiff_t stride,
const uint8_t *left, const uint8_t *top)
{
pixel *dst = (pixel *) _dst;
pixel4 val = PIXEL_SPLAT_X4((128 << (BIT_DEPTH - 8)) - 1);
int y;
stride /= sizeof(pixel);
for (y = 0; y < 32; y++) {
AV_WN4PA(dst + 0, val);
AV_WN4PA(dst + 4, val);
AV_WN4PA(dst + 8, val);
AV_WN4PA(dst + 12, val);
AV_WN4PA(dst + 16, val);
AV_WN4PA(dst + 20, val);
AV_WN4PA(dst + 24, val);