Source code

Revision control

Copy as Markdown

Other Tools

/*
* Copyright © 2008 Mozilla Corporation
* Copyright © 2010 Nokia Corporation
*
* Permission to use, copy, modify, distribute, and sell this software and its
* documentation for any purpose is hereby granted without fee, provided that
* the above copyright notice appear in all copies and that both that
* copyright notice and this permission notice appear in supporting
* documentation, and that the name of Mozilla Corporation not be used in
* advertising or publicity pertaining to distribution of the software without
* specific, written prior permission. Mozilla Corporation makes no
* representations about the suitability of this software for any purpose. It
* is provided "as is" without express or implied warranty.
*
* THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS
* SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS, IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
* SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING
* OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
* SOFTWARE.
*
* Author: Jeff Muizelaar (jeff@infidigm.net)
*
*/
/* Prevent the stack from becoming executable */
#if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits
#endif
.text
.arch armv6
.object_arch armv4
.arm
.altmacro
.p2align 2
#include "pixman-arm-asm.h"
pixman_syntax_unified
/*
* Note: This code is only using armv5te instructions (not even armv6),
* but is scheduled for ARM Cortex-A8 pipeline. So it might need to
* be split into a few variants, tuned for each microarchitecture.
*
* TODO: In order to get good performance on ARM9/ARM11 cores (which don't
* have efficient write combining), it needs to be changed to use 16-byte
* aligned writes using STM instruction.
*
* Nearest scanline scaler macro template uses the following arguments:
* fname - name of the function to generate
* bpp_shift - (1 << bpp_shift) is the size of pixel in bytes
* t - type suffix for LDR/STR instructions
* prefetch_distance - prefetch in the source image by that many
* pixels ahead
* prefetch_braking_distance - stop prefetching when that many pixels are
* remaining before the end of scanline
*/
.macro generate_nearest_scanline_func fname, bpp_shift, t, \
prefetch_distance, \
prefetch_braking_distance
pixman_asm_function \fname
W .req r0
DST .req r1
SRC .req r2
VX .req r3
UNIT_X .req ip
TMP1 .req r4
TMP2 .req r5
VXMASK .req r6
PF_OFFS .req r7
SRC_WIDTH_FIXED .req r8
ldr UNIT_X, [sp]
push {r4, r5, r6, r7, r8, r10}
mvn VXMASK, #((1 << \bpp_shift) - 1)
ldr SRC_WIDTH_FIXED, [sp, #28]
/* define helper macro */
.macro scale_2_pixels
ldr\()\t TMP1, [SRC, TMP1]
and TMP2, VXMASK, VX, asr #(16 - \bpp_shift)
adds VX, VX, UNIT_X
str\()\t TMP1, [DST], #(1 << \bpp_shift)
9: subspl VX, VX, SRC_WIDTH_FIXED
bpl 9b
ldr\()\t TMP2, [SRC, TMP2]
and TMP1, VXMASK, VX, asr #(16 - \bpp_shift)
adds VX, VX, UNIT_X
str\()\t TMP2, [DST], #(1 << \bpp_shift)
9: subspl VX, VX, SRC_WIDTH_FIXED
bpl 9b
.endm
/* now do the scaling */
and TMP1, VXMASK, VX, asr #(16 - \bpp_shift)
adds VX, VX, UNIT_X
9: subspl VX, VX, SRC_WIDTH_FIXED
bpl 9b
subs W, W, #(8 + \prefetch_braking_distance)
blt 2f
/* calculate prefetch offset */
mov PF_OFFS, #\prefetch_distance
mla PF_OFFS, UNIT_X, PF_OFFS, VX
1: /* main loop, process 8 pixels per iteration with prefetch */
pld [SRC, PF_OFFS, asr #(16 - \bpp_shift)]
add PF_OFFS, PF_OFFS, UNIT_X, lsl #3
scale_2_pixels
scale_2_pixels
scale_2_pixels
scale_2_pixels
subs W, W, #8
bge 1b
2:
subs W, W, #(4 - 8 - \prefetch_braking_distance)
blt 2f
1: /* process the remaining pixels */
scale_2_pixels
scale_2_pixels
subs W, W, #4
bge 1b
2:
tst W, #2
beq 2f
scale_2_pixels
2:
tst W, #1
ldr\()\t\()ne TMP1, [SRC, TMP1]
str\()\t\()ne TMP1, [DST]
/* cleanup helper macro */
.purgem scale_2_pixels
.unreq DST
.unreq SRC
.unreq W
.unreq VX
.unreq UNIT_X
.unreq TMP1
.unreq TMP2
.unreq VXMASK
.unreq PF_OFFS
.unreq SRC_WIDTH_FIXED
/* return */
pop {r4, r5, r6, r7, r8, r10}
bx lr
pixman_end_asm_function
.endm
generate_nearest_scanline_func \
pixman_scaled_nearest_scanline_0565_0565_SRC_asm_armv6, 1, h, 80, 32
generate_nearest_scanline_func \
pixman_scaled_nearest_scanline_8888_8888_SRC_asm_armv6, 2, , 48, 32