Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- From 4ad26efdc8b9dc6a393138bb62bf42ff48bf335e Mon Sep 17 00:00:00 2001
- From: Themaister <maister@archlinux.us>
- Date: Tue, 8 Nov 2011 16:16:59 +0100
- Subject: [PATCH] Use more accurate conversion for rgb15/16 to rgb24/32
- (C/MMX).
- ---
- libswscale/rgb2rgb.c | 36 ++++++------
- libswscale/rgb2rgb_template.c | 57 ++++++------------
- libswscale/x86/rgb2rgb.c | 2 +
- libswscale/x86/rgb2rgb_template.c | 116 +++++++++++++++++++-----------------
- 4 files changed, 99 insertions(+), 112 deletions(-)
- diff --git a/libswscale/rgb2rgb.c b/libswscale/rgb2rgb.c
- index 9a7f698..adb3005 100644
- --- a/libswscale/rgb2rgb.c
- +++ b/libswscale/rgb2rgb.c
- @@ -171,13 +171,13 @@ void rgb16tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
- bgr = *s++;
- #if HAVE_BIGENDIAN
- *d++ = 255;
- - *d++ = (bgr&0x1F)<<3;
- - *d++ = (bgr&0x7E0)>>3;
- - *d++ = (bgr&0xF800)>>8;
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
- + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
- #else
- - *d++ = (bgr&0xF800)>>8;
- - *d++ = (bgr&0x7E0)>>3;
- - *d++ = (bgr&0x1F)<<3;
- + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
- + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- *d++ = 255;
- #endif
- }
- @@ -192,9 +192,9 @@ void rgb16to24(const uint8_t *src, uint8_t *dst, int src_size)
- while (s < end) {
- register uint16_t bgr;
- bgr = *s++;
- - *d++ = (bgr&0xF800)>>8;
- - *d++ = (bgr&0x7E0)>>3;
- - *d++ = (bgr&0x1F)<<3;
- + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
- + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- }
- }
- @@ -231,13 +231,13 @@ void rgb15tobgr32(const uint8_t *src, uint8_t *dst, int src_size)
- bgr = *s++;
- #if HAVE_BIGENDIAN
- *d++ = 255;
- - *d++ = (bgr&0x1F)<<3;
- - *d++ = (bgr&0x3E0)>>2;
- - *d++ = (bgr&0x7C00)>>7;
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
- + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
- #else
- - *d++ = (bgr&0x7C00)>>7;
- - *d++ = (bgr&0x3E0)>>2;
- - *d++ = (bgr&0x1F)<<3;
- + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
- + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- *d++ = 255;
- #endif
- }
- @@ -252,9 +252,9 @@ void rgb15to24(const uint8_t *src, uint8_t *dst, int src_size)
- while (s < end) {
- register uint16_t bgr;
- bgr = *s++;
- - *d++ = (bgr&0x7C00)>>7;
- - *d++ = (bgr&0x3E0)>>2;
- - *d++ = (bgr&0x1F)<<3;
- + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
- + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- }
- }
- diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c
- index 0734e88..6363bc1 100644
- --- a/libswscale/rgb2rgb_template.c
- +++ b/libswscale/rgb2rgb_template.c
- @@ -227,27 +227,6 @@ static inline void rgb24to15_c(const uint8_t *src, uint8_t *dst, int src_size)
- }
- }
- -/*
- - I use less accurate approximation here by simply left-shifting the input
- - value and filling the low order bits with zeroes. This method improves PNG
- - compression but this scheme cannot reproduce white exactly, since it does
- - not generate an all-ones maximum value; the net effect is to darken the
- - image slightly.
- -
- - The better method should be "left bit replication":
- -
- - 4 3 2 1 0
- - ---------
- - 1 1 0 1 1
- -
- - 7 6 5 4 3 2 1 0
- - ----------------
- - 1 1 0 1 1 1 1 0
- - |=======| |===|
- - | leftmost bits repeated to fill open bits
- - |
- - original bits
- -*/
- static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size)
- {
- const uint16_t *end;
- @@ -257,9 +236,9 @@ static inline void rgb15tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size
- while (s < end) {
- register uint16_t bgr;
- bgr = *s++;
- - *d++ = (bgr&0x1F)<<3;
- - *d++ = (bgr&0x3E0)>>2;
- - *d++ = (bgr&0x7C00)>>7;
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
- + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
- }
- }
- @@ -272,9 +251,9 @@ static inline void rgb16tobgr24_c(const uint8_t *src, uint8_t *dst, int src_size
- while (s < end) {
- register uint16_t bgr;
- bgr = *s++;
- - *d++ = (bgr&0x1F)<<3;
- - *d++ = (bgr&0x7E0)>>3;
- - *d++ = (bgr&0xF800)>>8;
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
- + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
- }
- }
- @@ -289,13 +268,13 @@ static inline void rgb15to32_c(const uint8_t *src, uint8_t *dst, int src_size)
- bgr = *s++;
- #if HAVE_BIGENDIAN
- *d++ = 255;
- - *d++ = (bgr&0x7C00)>>7;
- - *d++ = (bgr&0x3E0)>>2;
- - *d++ = (bgr&0x1F)<<3;
- + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
- + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- #else
- - *d++ = (bgr&0x1F)<<3;
- - *d++ = (bgr&0x3E0)>>2;
- - *d++ = (bgr&0x7C00)>>7;
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
- + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
- *d++ = 255;
- #endif
- }
- @@ -312,13 +291,13 @@ static inline void rgb16to32_c(const uint8_t *src, uint8_t *dst, int src_size)
- bgr = *s++;
- #if HAVE_BIGENDIAN
- *d++ = 255;
- - *d++ = (bgr&0xF800)>>8;
- - *d++ = (bgr&0x7E0)>>3;
- - *d++ = (bgr&0x1F)<<3;
- + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
- + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- #else
- - *d++ = (bgr&0x1F)<<3;
- - *d++ = (bgr&0x7E0)>>3;
- - *d++ = (bgr&0xF800)>>8;
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
- + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
- *d++ = 255;
- #endif
- }
- diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c
- index ed7f5ad..3fffc39 100644
- --- a/libswscale/x86/rgb2rgb.c
- +++ b/libswscale/x86/rgb2rgb.c
- @@ -68,6 +68,8 @@ DECLARE_ASM_CONST(8, uint64_t, blue_16mask) = 0x0000001f0000001fULL;
- DECLARE_ASM_CONST(8, uint64_t, red_15mask) = 0x00007c0000007c00ULL;
- DECLARE_ASM_CONST(8, uint64_t, green_15mask) = 0x000003e0000003e0ULL;
- DECLARE_ASM_CONST(8, uint64_t, blue_15mask) = 0x0000001f0000001fULL;
- +DECLARE_ASM_CONST(8, uint64_t, mul1524) = 0x0021002100210021ULL; /* a + (a << 5) used in rgb{15,16} -> rgb{24,32} */
- +DECLARE_ASM_CONST(8, uint64_t, mul1624) = 0x0041004100410041ULL; /* a + (a << 6) used in green channel for rgb16 -> rgb{24,32} */
- #define RGB2YUV_SHIFT 8
- #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
- diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c
- index baef3f8..3acd5e9 100644
- --- a/libswscale/x86/rgb2rgb_template.c
- +++ b/libswscale/x86/rgb2rgb_template.c
- @@ -801,27 +801,6 @@ static inline void RENAME(rgb24to15)(const uint8_t *src, uint8_t *dst, int src_s
- }
- }
- -/*
- - I use less accurate approximation here by simply left-shifting the input
- - value and filling the low order bits with zeroes. This method improves PNG
- - compression but this scheme cannot reproduce white exactly, since it does
- - not generate an all-ones maximum value; the net effect is to darken the
- - image slightly.
- -
- - The better method should be "left bit replication":
- -
- - 4 3 2 1 0
- - ---------
- - 1 1 0 1 1
- -
- - 7 6 5 4 3 2 1 0
- - ----------------
- - 1 1 0 1 1 1 1 0
- - |=======| |===|
- - | leftmost bits repeated to fill open bits
- - |
- - original bits
- -*/
- static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int src_size)
- {
- const uint16_t *end;
- @@ -840,9 +819,13 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
- "pand %2, %%mm0 \n\t"
- "pand %3, %%mm1 \n\t"
- "pand %4, %%mm2 \n\t"
- - "psllq $3, %%mm0 \n\t"
- - "psrlq $2, %%mm1 \n\t"
- - "psrlq $7, %%mm2 \n\t"
- + "psrlq $10, %%mm2 \n\t"
- + "pmullw %6, %%mm0 \n\t"
- + "pmullw %6, %%mm1 \n\t"
- + "pmullw %6, %%mm2 \n\t"
- + "psrlw $2, %%mm0 \n\t"
- + "psrlw $7, %%mm1 \n\t"
- + "psrlw $2, %%mm2 \n\t"
- "movq %%mm0, %%mm3 \n\t"
- "movq %%mm1, %%mm4 \n\t"
- "movq %%mm2, %%mm5 \n\t"
- @@ -870,9 +853,13 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
- "pand %2, %%mm0 \n\t"
- "pand %3, %%mm1 \n\t"
- "pand %4, %%mm2 \n\t"
- - "psllq $3, %%mm0 \n\t"
- - "psrlq $2, %%mm1 \n\t"
- - "psrlq $7, %%mm2 \n\t"
- + "psrlq $10, %%mm2 \n\t"
- + "pmullw %6, %%mm0 \n\t"
- + "pmullw %6, %%mm1 \n\t"
- + "pmullw %6, %%mm2 \n\t"
- + "psrlw $2, %%mm0 \n\t"
- + "psrlw $7, %%mm1 \n\t"
- + "psrlw $2, %%mm2 \n\t"
- "movq %%mm0, %%mm3 \n\t"
- "movq %%mm1, %%mm4 \n\t"
- "movq %%mm2, %%mm5 \n\t"
- @@ -892,7 +879,7 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
- "por %%mm5, %%mm3 \n\t"
- :"=m"(*d)
- - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
- + :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mmx_null),"m"(mul1524)
- :"memory");
- /* borrowed 32 to 24 */
- __asm__ volatile(
- @@ -919,9 +906,9 @@ static inline void RENAME(rgb15tobgr24)(const uint8_t *src, uint8_t *dst, int sr
- while (s < end) {
- register uint16_t bgr;
- bgr = *s++;
- - *d++ = (bgr&0x1F)<<3;
- - *d++ = (bgr&0x3E0)>>2;
- - *d++ = (bgr&0x7C00)>>7;
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
- + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
- }
- }
- @@ -943,9 +930,14 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
- "pand %2, %%mm0 \n\t"
- "pand %3, %%mm1 \n\t"
- "pand %4, %%mm2 \n\t"
- - "psllq $3, %%mm0 \n\t"
- - "psrlq $3, %%mm1 \n\t"
- - "psrlq $8, %%mm2 \n\t"
- + "psrlq $5, %%mm1 \n\t"
- + "psrlq $11, %%mm2 \n\t"
- + "pmullw %6, %%mm0 \n\t"
- + "pmullw %7, %%mm1 \n\t"
- + "pmullw %6, %%mm2 \n\t"
- + "psrlw $2, %%mm0 \n\t"
- + "psrlw $4, %%mm1 \n\t"
- + "psrlw $2, %%mm2 \n\t"
- "movq %%mm0, %%mm3 \n\t"
- "movq %%mm1, %%mm4 \n\t"
- "movq %%mm2, %%mm5 \n\t"
- @@ -973,9 +965,14 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
- "pand %2, %%mm0 \n\t"
- "pand %3, %%mm1 \n\t"
- "pand %4, %%mm2 \n\t"
- - "psllq $3, %%mm0 \n\t"
- - "psrlq $3, %%mm1 \n\t"
- - "psrlq $8, %%mm2 \n\t"
- + "psrlq $5, %%mm1 \n\t"
- + "psrlq $11, %%mm2 \n\t"
- + "pmullw %6, %%mm0 \n\t"
- + "pmullw %7, %%mm1 \n\t"
- + "pmullw %6, %%mm2 \n\t"
- + "psrlw $2, %%mm0 \n\t"
- + "psrlw $4, %%mm1 \n\t"
- + "psrlw $2, %%mm2 \n\t"
- "movq %%mm0, %%mm3 \n\t"
- "movq %%mm1, %%mm4 \n\t"
- "movq %%mm2, %%mm5 \n\t"
- @@ -994,7 +991,7 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
- "por %%mm4, %%mm3 \n\t"
- "por %%mm5, %%mm3 \n\t"
- :"=m"(*d)
- - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
- + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null),"m"(mul1524),"m"(mul1624)
- :"memory");
- /* borrowed 32 to 24 */
- __asm__ volatile(
- @@ -1021,9 +1018,9 @@ static inline void RENAME(rgb16tobgr24)(const uint8_t *src, uint8_t *dst, int sr
- while (s < end) {
- register uint16_t bgr;
- bgr = *s++;
- - *d++ = (bgr&0x1F)<<3;
- - *d++ = (bgr&0x7E0)>>3;
- - *d++ = (bgr&0xF800)>>8;
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
- + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
- }
- }
- @@ -1066,12 +1063,16 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
- "pand %2, %%mm0 \n\t"
- "pand %3, %%mm1 \n\t"
- "pand %4, %%mm2 \n\t"
- - "psllq $3, %%mm0 \n\t"
- - "psrlq $2, %%mm1 \n\t"
- - "psrlq $7, %%mm2 \n\t"
- + "psrlq $10, %%mm2 \n\t"
- + "pmullw %5, %%mm0 \n\t"
- + "pmullw %5, %%mm1 \n\t"
- + "pmullw %5, %%mm2 \n\t"
- + "psrlw $2, %%mm0 \n\t"
- + "psrlw $7, %%mm1 \n\t"
- + "psrlw $2, %%mm2 \n\t"
- PACK_RGB32
- :"=m"(*d)
- - :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
- + :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r),"m"(mul1524)
- :"memory");
- d += 16;
- s += 4;
- @@ -1081,9 +1082,9 @@ static inline void RENAME(rgb15to32)(const uint8_t *src, uint8_t *dst, int src_s
- while (s < end) {
- register uint16_t bgr;
- bgr = *s++;
- - *d++ = (bgr&0x1F)<<3;
- - *d++ = (bgr&0x3E0)>>2;
- - *d++ = (bgr&0x7C00)>>7;
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- + *d++ = ((bgr&0x3E0)>>2) | ((bgr&0x3E0)>>7);
- + *d++ = ((bgr&0x7C00)>>7) | ((bgr&0x7C00)>>12);
- *d++ = 255;
- }
- }
- @@ -1108,12 +1109,17 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
- "pand %2, %%mm0 \n\t"
- "pand %3, %%mm1 \n\t"
- "pand %4, %%mm2 \n\t"
- - "psllq $3, %%mm0 \n\t"
- - "psrlq $3, %%mm1 \n\t"
- - "psrlq $8, %%mm2 \n\t"
- + "psrlq $5, %%mm1 \n\t"
- + "psrlq $11, %%mm2 \n\t"
- + "pmullw %5, %%mm0 \n\t"
- + "pmullw %6, %%mm1 \n\t"
- + "pmullw %5, %%mm2 \n\t"
- + "psrlw $2, %%mm0 \n\t"
- + "psrlw $4, %%mm1 \n\t"
- + "psrlw $2, %%mm2 \n\t"
- PACK_RGB32
- :"=m"(*d)
- - :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
- + :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mul1524),"m"(mul1624)
- :"memory");
- d += 16;
- s += 4;
- @@ -1123,9 +1129,9 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, int src_s
- while (s < end) {
- register uint16_t bgr;
- bgr = *s++;
- - *d++ = (bgr&0x1F)<<3;
- - *d++ = (bgr&0x7E0)>>3;
- - *d++ = (bgr&0xF800)>>8;
- + *d++ = ((bgr&0x1F)<<3) | ((bgr&0x1F)>>2);
- + *d++ = ((bgr&0x7E0)>>3) | ((bgr&0x7E0)>>9);
- + *d++ = ((bgr&0xF800)>>8) | ((bgr&0xF800)>>13);
- *d++ = 255;
- }
- }
- --
- 1.7.7.2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement