diff '--color=auto' -Naur a/xine/adjustable_scr.c b/xine/adjustable_scr.c --- a/xine/adjustable_scr.c 2023-11-05 16:04:06.000000000 +0100 +++ b/xine/adjustable_scr.c 2024-07-14 12:45:40.101222775 +0200 @@ -273,13 +273,13 @@ set_pivot( this ); this->buffering = 1; this->buffering_start_time = time_ms(); - LOGMSG("start buffering at %"PRId64, this->cur_pts); + LOGMSG("start buffering at %" PRId64, this->cur_pts); } } else { if (this->buffering) { set_pivot( this ); this->buffering = 0; - LOGMSG("stop buffering at %"PRId64" (buffering took %"PRIu64" ms)", + LOGMSG("stop buffering at %" PRId64" (buffering took %" PRIu64" ms)", this->cur_pts, elapsed(this->buffering_start_time)); } } diff '--color=auto' -Naur a/xine/demux_xvdr.c b/xine/demux_xvdr.c --- a/xine/demux_xvdr.c 2023-11-05 16:04:06.000000000 +0100 +++ b/xine/demux_xvdr.c 2024-07-14 12:53:38.491572095 +0200 @@ -221,7 +221,7 @@ int still_mode = (int)this->stream->metronom->get_option(this->stream->metronom, XVDR_METRONOM_STILL_MODE); int trick_speed = (int)this->stream->metronom->get_option(this->stream->metronom, XVDR_METRONOM_TRICK_SPEED); if (still_mode > 0 || trick_speed > 0) { - LOGMSG("Skipping new pts %"PRId64" (still=%d trickspeed=%d)", buf->pts, still_mode, trick_speed); + LOGMSG("Skipping new pts %" PRId64" (still=%d trickspeed=%d)", buf->pts, still_mode, trick_speed); return; } } @@ -233,7 +233,7 @@ if (this->send_newpts || (this->last_pts[video] && abs(diff)>WRAP_THRESHOLD)) { - LOGVERBOSE("New PTS: %"PRId64" (%s)", buf->pts, video ? "VIDEO" : "AUDIO"); + LOGVERBOSE("New PTS: %" PRId64" (%s)", buf->pts, video ? "VIDEO" : "AUDIO"); if (this->buf_flag_seek) { _x_demux_control_newpts(this->stream, buf->pts, BUF_FLAG_SEEK); this->buf_flag_seek = 0; diff '--color=auto' -Naur a/xine/vo_lastpts.c b/xine/vo_lastpts.c --- a/xine/vo_lastpts.c 2023-11-05 16:04:06.000000000 +0100 +++ b/xine/vo_lastpts.c 2024-07-14 12:45:40.101222775 +0200 @@ -83,7 +83,7 @@ if (this->xvdr_metronom) { ASSERT_RET(this->xvdr_metronom->set_option, return); - LOGVERBOSE("last pts %"PRId64, vo_img->pts); + LOGVERBOSE("last pts %" PRId64, vo_img->pts); this->xvdr_metronom->set_option(this->xvdr_metronom, XVDR_METRONOM_LAST_VO_PTS, vo_img->pts); } diff '--color=auto' -Naur a/xine/xvdr_metronom.c b/xine/xvdr_metronom.c --- a/xine/xvdr_metronom.c 2023-11-05 16:04:06.000000000 +0100 +++ b/xine/xvdr_metronom.c 2024-07-14 12:45:40.101222775 +0200 @@ -63,11 +63,11 @@ int64_t dv = this->vid_pts - this->disc_pts; int64_t d_min = min64(da, dv); LOGMSG(" stream A-V diff %d ms", (int)(this->vid_pts - this->aud_pts)/90); - LOGMSG(" reported stream start at pts %"PRId64, this->disc_pts); - LOGMSG(" output fifo end at: audio %"PRId64" video %"PRId64, this->aud_pts, this->vid_pts); - LOGMSG(" dA %"PRId64" dV %"PRId64, da, dv); + LOGMSG(" reported stream start at pts %" PRId64, this->disc_pts); + LOGMSG(" output fifo end at: audio %" PRId64" video %" PRId64, this->aud_pts, this->vid_pts); + LOGMSG(" dA %" PRId64" dV %" PRId64, da, dv); if (d_min < 0 && d_min > -10*90000) { - LOGMSG(" *** output is late %"PRId64" ticks (%"PRId64" ms) ***", d_min, -d_min/90); + LOGMSG(" *** output is late %" PRId64" ticks (%" PRId64" ms) ***", d_min, -d_min/90); this->scr->jump(this->scr, d_min); } this->buffering = 0; diff '--color=auto' -Naur a/xine_input_vdr.c b/xine_input_vdr.c --- a/xine_input_vdr.c 2023-11-05 16:04:06.000000000 +0100 +++ b/xine_input_vdr.c 2024-07-14 12:48:56.149379516 +0200 @@ -2125,7 +2125,7 @@ LOGMSG("vdr_flush_engine: guard > curpos, flush skipped"); return; } - LOGMSG("vdr_flush_engine: %"PRIu64" < current position %"PRIu64", flush skipped", + LOGMSG("vdr_flush_engine: %" PRIu64" < current position %" PRIu64", flush skipped", discard_index, this->curpos); return; } @@ -4040,7 +4040,7 @@ mutex_lock_cancellable(&this->lock); if (this->discard_index < this->discard_index_ds) - LOGVERBOSE("wait_stream_sync: waiting for engine_flushed condition %"PRIu64"<%"PRIu64, + LOGVERBOSE("wait_stream_sync: waiting for engine_flushed condition %" PRIu64"<%" PRIu64, this->discard_index, this->discard_index_ds); counter = 100; @@ -4055,7 +4055,7 @@ if (this->discard_index < this->curpos) { /* may be less if server-side fifo was cleared */ - LOGMSG("wait_stream_sync: discard_index %"PRIu64" != curpos %"PRIu64" ! (diff %"PRId64")", + LOGMSG("wait_stream_sync: discard_index %" PRIu64" != curpos %" PRIu64" ! (diff %" PRId64")", this->discard_index, this->curpos, (int64_t)(this->discard_index - this->curpos)); } @@ -4065,7 +4065,7 @@ mutex_unlock_cancellable(&this->lock); if (synced) { - LOGVERBOSE("wait_stream_sync: streams synced at %"PRIu64"/%"PRIu64, + LOGVERBOSE("wait_stream_sync: streams synced at %" PRIu64"/%" PRIu64, this->discard_index_ds, sync_index); return 0; } @@ -4078,7 +4078,7 @@ errno = EINTR; } else if (counter <= 0) { - LOGMSG("wait_stream_sync: Timed out ! diff %"PRId64, + LOGMSG("wait_stream_sync: Timed out ! diff %" PRId64, (int64_t)(sync_index - this->discard_index_ds)); errno = EAGAIN; } diff '--color=auto' -Naur a/xine_post_swscale.c b/xine_post_swscale.c --- a/xine_post_swscale.c 2023-11-05 16:04:06.000000000 +0100 +++ b/xine_post_swscale.c 2024-07-14 12:45:40.109222750 +0200 @@ -561,26 +561,26 @@ /* store ebx (PIC) */ "mov %%"REGB", "_oldbx" \n\t" #endif - "movl "_src_row_size", %%"REGC" \n\t" - "shrl $3, %%"REGC" \n\t" /* 8 bytes a time */ - "mov "_srcp1", %%"REGSI" \n\t" /* top of 2 src lines to get */ - "mov "_srcp2", %%"REGD" \n\t" /* next " */ - "mov "_vWorkYW", %%"REGDI" \n\t" /* luma work destination line */ - "mov "_vWorkUVW", %%"REGB" \n\t" /* luma work destination line */ - "xor %%"REGA", %%"REGA" \n\t" + "movl "_src_row_size", %%" REGC" \n\t" + "shrl $3, %%" REGC" \n\t" /* 8 bytes a time */ + "mov "_srcp1", %%" REGSI" \n\t" /* top of 2 src lines to get */ + "mov "_srcp2", %%" REGD" \n\t" /* next " */ + "mov "_vWorkYW", %%" REGDI" \n\t" /* luma work destination line */ + "mov "_vWorkUVW", %%" REGB" \n\t" /* luma work destination line */ + "xor %%" REGA", %%" REGA" \n\t" #if !defined(__x86_64__) /* Let's check here to see if we are on a P4 or higher and can use SSE2 instructions. * This first loop is not the performance bottleneck anyway but it is trivial to tune * using SSE2 if we have proper alignment. */ "testl $1, "_SSE2enabledW" \n\t" /* is SSE2 supported?*/ - "jz "vMaybeSSEMMX"f \n\t" /* n, can't do anyway*/ + "jz " vMaybeSSEMMX"f \n\t" /* n, can't do anyway*/ #endif - "cmpl $2, %%"REGC" \n\t" /* we have at least 16 bytes, 2 qwords? */ - "jl "vMaybeSSEMMX"f \n\t" /* n, don't bother*/ + "cmpl $2, %%" REGC" \n\t" /* we have at least 16 bytes, 2 qwords? */ + "jl " vMaybeSSEMMX"f \n\t" /* n, don't bother*/ - "shrl $1, %%"REGC" \n\t" /* do 16 bytes at a time instead*/ - "decl %%"REGC" \n" /* jigger loop ct */ + "shrl $1, %%" REGC" \n\t" /* do 16 bytes at a time instead*/ + "decl %%" REGC" \n" /* jigger loop ct */ ".align 16 \n\t" @@ -589,14 +589,14 @@ "movdqu "_vWeight2", %%xmm6 \n\t" "movdqu "_YMask", %%xmm7 \n" - ""vLoopSSE2_Fetch": \n\t" + "" vLoopSSE2_Fetch": \n\t" #ifdef PREFETCH - " prefetcht0 16(%%"REGSI", %%"REGA", 2) \n\t" - " prefetcht0 16(%%"REGD", %%"REGA", 2) \n" + " prefetcht0 16(%%" REGSI", %%" REGA", 2) \n\t" + " prefetcht0 16(%%" REGD", %%" REGA", 2) \n" #endif - ""vLoopSSE2": \n\t" - " movdqu (%%"REGSI", %%"REGA", 2), %%xmm1 \n\t" /* top of 2 lines to interpolate */ - " movdqu (%%"REGD", %%"REGA", 2), %%xmm2 \n\t" /* 2nd of 2 lines */ + "" vLoopSSE2": \n\t" + " movdqu (%%" REGSI", %%" REGA", 2), %%xmm1 \n\t" /* top of 2 lines to interpolate */ + " movdqu (%%" REGD", %%" REGA", 2), %%xmm2 \n\t" /* 2nd of 2 lines */ " movdqa %%xmm1, %%xmm3 \n\t" /* get chroma bytes */ " pand %%xmm7, %%xmm1 \n\t" /* keep only luma */ @@ -614,9 +614,9 @@ " paddusw %%xmm0, %%xmm1 \n\t" /* round */ " psrlw $8, %%xmm1 \n\t" /* right adjust luma */ #ifdef STREAMING_STORE_TMP - " movntdq %%xmm1, (%%"REGDI", %%"REGA", 2) \n\t" /* save lumas in our work area */ + " movntdq %%xmm1, (%%" REGDI", %%" REGA", 2) \n\t" /* save lumas in our work area */ #else - " movdqu %%xmm1, (%%"REGDI", %%"REGA", 2) \n\t" /* save lumas in our work area */ + " movdqu %%xmm1, (%%" REGDI", %%" REGA", 2) \n\t" /* save lumas in our work area */ #endif " paddw %%xmm4, %%xmm3 \n\t" /* combine chromas */ " paddusw %%xmm0, %%xmm3 \n\t" /* round */ @@ -624,36 +624,36 @@ " packuswb %%xmm3, %%xmm3 \n\t" /* pack UV's into low dword */ " movdq2q %%xmm3, %%mm1 \n\t" /* save in our work area */ #ifdef STREAMING_STORE_TMP - " movntq %%mm1, (%%"REGB", %%"REGA") \n\t" /* save in our work area */ + " movntq %%mm1, (%%" REGB", %%" REGA") \n\t" /* save in our work area */ #else - " movq %%mm1, (%%"REGB", %%"REGA") \n\t" /* save in our work area */ + " movq %%mm1, (%%" REGB", %%" REGA") \n\t" /* save in our work area */ #endif - " lea 8(%%"REGA"), %%"REGA" \n\t" - " decl %%"REGC" \n\t" + " lea 8(%%" REGA"), %%" REGA" \n\t" + " decl %%" REGC" \n\t" - " jg "vLoopSSE2_Fetch"b \n\t" /* if not on last one loop, prefetch */ - " jz "vLoopSSE2"b \n\t" /* or just loop, or not */ + " jg " vLoopSSE2_Fetch"b \n\t" /* if not on last one loop, prefetch */ + " jz " vLoopSSE2"b \n\t" /* or just loop, or not */ /* done with our SSE2 fortified loop but we may need to pick up the spare change */ #ifdef STREAMING_STORE_TMP " sfence \n\t" #endif - " movl "_src_row_size", %%"REGC" \n\t" /* get count again */ - " andl $15, %%"REGC" \n\t" /* just need mod 16 */ + " movl "_src_row_size", %%" REGC" \n\t" /* get count again */ + " andl $15, %%" REGC" \n\t" /* just need mod 16 */ " movq "_YMask", %%mm7 \n\t" /* useful luma mask constant - lazy dupl init */ " movq "_vWeight1", %%mm5 \n\t" " movq "_vWeight2", %%mm6 \n\t" " movq "_FPround1", %%mm0 \n\t" /* useful rounding constant */ - " shrl $3, %%"REGC" \n\t" /* 8 bytes at a time, any? */ - " jz "MoreSpareChange"f \n" /* n, did them all */ + " shrl $3, %%" REGC" \n\t" /* 8 bytes at a time, any? */ + " jz " MoreSpareChange"f \n" /* n, did them all */ /* Let's check here to see if we are on a P2 or Athlon and can use SSEMMX instructions. * This first loop is not the performance bottleneck anyway but it is trivial to tune * using SSE if we have proper alignment. */ - ""vMaybeSSEMMX": \n\t" + "" vMaybeSSEMMX": \n\t" " movq "_YMask", %%mm7 \n\t" /* useful luma mask constant - lazy dupl init */ " movq "_vWeight1", %%mm5 \n\t" @@ -661,19 +661,19 @@ " movq "_FPround1", %%mm0 \n\t" /* useful rounding constant */ #if !defined(__x86_64__) " testl $1, "_SSEMMXenabledW" \n\t" /* MMXEXTsupported? */ - " jz "vLoopMMX"f \n\t" /* n, can't do anyway */ + " jz " vLoopMMX"f \n\t" /* n, can't do anyway */ #endif - " decl %%"REGC" \n" /* jigger loop ctr */ + " decl %%" REGC" \n" /* jigger loop ctr */ ".align 16 \n" - ""vLoopSSEMMX_Fetch": \n\t" + "" vLoopSSEMMX_Fetch": \n\t" #ifdef PREFETCH - " prefetcht0 8(%%"REGSI", %%"REGA", 2) \n\t" - " prefetcht0 8(%%"REGD", %%"REGA", 2) \n" + " prefetcht0 8(%%" REGSI", %%" REGA", 2) \n\t" + " prefetcht0 8(%%" REGD", %%" REGA", 2) \n" #endif - ""vLoopSSEMMX": \n\t" - " movq (%%"REGSI", %%"REGA", 2), %%mm1 \n\t" /* top of 2 lines to interpolate */ - " movq (%%"REGD", %%"REGA", 2), %%mm2 \n\t" /* 2nd of 2 lines */ + "" vLoopSSEMMX": \n\t" + " movq (%%" REGSI", %%" REGA", 2), %%mm1 \n\t" /* top of 2 lines to interpolate */ + " movq (%%" REGD", %%" REGA", 2), %%mm2 \n\t" /* 2nd of 2 lines */ " movq %%mm1, %%mm3 \n\t" /* copy top bytes */ " pand %%mm7, %%mm1 \n\t" /* keep only luma */ @@ -693,30 +693,30 @@ " paddusw %%mm0, %%mm1 \n\t" /* round */ " psrlw $8, %%mm1 \n\t" /* right adjust luma */ #ifdef STREAMING_STORE_TMP - " movntq %%mm1, (%%"REGDI", %%"REGA", 2) \n\t" /* save lumas in our work area */ + " movntq %%mm1, (%%" REGDI", %%" REGA", 2) \n\t" /* save lumas in our work area */ #else - " movq %%mm1, (%%"REGDI", %%"REGA", 2) \n\t" /* save lumas in our work area */ + " movq %%mm1, (%%" REGDI", %%" REGA", 2) \n\t" /* save lumas in our work area */ #endif " paddw %%mm4, %%mm3 \n\t" /* combine chromas */ " paddusw %%mm0, %%mm3 \n\t" /* round */ " psrlw $8, %%mm3 \n\t" /* right adjust chroma */ " packuswb %%mm3, %%mm3 \n\t" /* pack UV's into low dword */ - " movd %%mm3, (%%"REGB", %%"REGA") \n\t" /* save in our work area */ + " movd %%mm3, (%%" REGB", %%" REGA") \n\t" /* save in our work area */ - " lea 4(%%"REGA"), %%"REGA" \n\t" - " decl %%"REGC" \n\t" - " jg "vLoopSSEMMX_Fetch"b \n\t" /* if not on last one loop, prefetch */ - " jz "vLoopSSEMMX"b \n\t" /* or just loop, or not */ + " lea 4(%%" REGA"), %%" REGA" \n\t" + " decl %%" REGC" \n\t" + " jg " vLoopSSEMMX_Fetch"b \n\t" /* if not on last one loop, prefetch */ + " jz " vLoopSSEMMX"b \n\t" /* or just loop, or not */ #ifdef STREAMING_STORE_TMP " sfence \n\t" #endif - " jmp "MoreSpareChange"f \n" /* all done with vertical */ + " jmp " MoreSpareChange"f \n" /* all done with vertical */ ".align 16 \n" - ""vLoopMMX": \n\t" + "" vLoopMMX": \n\t" - " movq (%%"REGSI", %%"REGA", 2), %%mm1 \n\t" /* top of 2 lines to interpolate */ - " movq (%%"REGD", %%"REGA", 2), %%mm2 \n\t" /* 2nd of 2 lines */ + " movq (%%" REGSI", %%" REGA", 2), %%mm1 \n\t" /* top of 2 lines to interpolate */ + " movq (%%" REGD", %%" REGA", 2), %%mm2 \n\t" /* 2nd of 2 lines */ " movq %%mm1, %%mm3 \n\t" /* copy top bytes */ " pand %%mm7, %%mm1 \n\t" /* keep only luma */ @@ -735,79 +735,79 @@ " paddw %%mm2, %%mm1 \n\t" /* combine lumas */ " paddusw %%mm0, %%mm1 \n\t" /* round */ " psrlw $8, %%mm1 \n\t" /* right adjust luma */ - " movq %%mm1, (%%"REGDI", %%"REGA", 2) \n\t" /* save lumas in our work area */ + " movq %%mm1, (%%" REGDI", %%" REGA", 2) \n\t" /* save lumas in our work area */ " paddw %%mm4, %%mm3 \n\t" /* combine chromas */ " paddusw %%mm0, %%mm3 \n\t" /* round */ " psrlw $8, %%mm3 \n\t" /* right adjust chroma */ " packuswb %%mm3, %%mm3 \n\t" /* pack UV's into low dword */ - " movd %%mm3, (%%"REGB", %%"REGA") \n\t" /* save in our work area */ + " movd %%mm3, (%%" REGB", %%" REGA") \n\t" /* save in our work area */ - " lea 4(%%"REGA"), %%"REGA" \n\t" - " loop "vLoopMMX"b \n" + " lea 4(%%" REGA"), %%" REGA" \n\t" + " loop " vLoopMMX"b \n" /* Add a little code here to check if we have 2 more pixels to do and, if so, make one * more pass thru vLoopMMX. We were processing in multiples of 4 pixels and alway have * an even number so there will never be more than 2 left. trbarry 7/29/2002 */ - ""MoreSpareChange": \n\t" + "" MoreSpareChange": \n\t" - " cmpl "_EndOffset", %%"REGEA" \n\t" /* did we get them all */ - " jnl "DoHorizontal"f \n\t" /* yes, else have 2 left */ - " movl $1, %%"REGC" \n\t" /* jigger loop ct */ - " sub $2, %%"REGA" \n\t" /* back up 2 pixels (4 bytes, but eax carried as 1/2) */ - " jmp "vLoopMMX"b \n" + " cmpl "_EndOffset", %%" REGEA" \n\t" /* did we get them all */ + " jnl " DoHorizontal"f \n\t" /* yes, else have 2 left */ + " movl $1, %%" REGC" \n\t" /* jigger loop ct */ + " sub $2, %%" REGA" \n\t" /* back up 2 pixels (4 bytes, but eax carried as 1/2) */ + " jmp " vLoopMMX"b \n" /* We've taken care of the vertical scaling, now do horizontal */ - ""DoHorizontal": \n\t" + "" DoHorizontal": \n\t" " movq "_YMask", %%mm7 \n\t" /* useful 0U0U.. mask constant */ " movq "_FPround2", %%mm6 \n\t" /* useful rounding constant, dwords */ - " mov "_pControl", %%"REGSI" \n\t" /* @ horiz control bytes */ - " movl "_row_size", %%"REGC" \n\t" - " shrl $2, %%"REGC" \n\t" /* bytes a time, 2 pixels */ - " mov "_vWorkYW", %%"REGD" \n\t" /* our luma data, as 0Y0Y 0Y0Y.. */ - " mov "_dstp", %%"REGDI" \n\t" /* the destination line */ - " mov "_vWorkUVW", %%"REGB" \n" /* chroma data, as UVUV UVUV... */ + " mov "_pControl", %%" REGSI" \n\t" /* @ horiz control bytes */ + " movl "_row_size", %%" REGC" \n\t" + " shrl $2, %%" REGC" \n\t" /* bytes a time, 2 pixels */ + " mov "_vWorkYW", %%" REGD" \n\t" /* our luma data, as 0Y0Y 0Y0Y.. */ + " mov "_dstp", %%" REGDI" \n\t" /* the destination line */ + " mov "_vWorkUVW", %%" REGB" \n" /* chroma data, as UVUV UVUV... */ ".align 16 \n" - ""hLoopMMX": \n\t" + "" hLoopMMX": \n\t" /* x86_64: must use movl (accessing table of uint32's) */ - " movl 16(%%"REGSI"), %%"REGEA" \n\t" /* get data offset in pixels, 1st pixel pair */ - " movd (%%"REGD", %%"REGA", 2), %%mm0 \n\t" /* copy luma pair */ - " shr $1, %%"REGA" \n\t" /* div offset by 2 */ - " movd (%%"REGB", %%"REGA", 2), %%mm1 \n\t" /* copy UV pair VUVU */ + " movl 16(%%" REGSI"), %%" REGEA" \n\t" /* get data offset in pixels, 1st pixel pair */ + " movd (%%" REGD", %%" REGA", 2), %%mm0 \n\t" /* copy luma pair */ + " shr $1, %%" REGA" \n\t" /* div offset by 2 */ + " movd (%%" REGB", %%" REGA", 2), %%mm1 \n\t" /* copy UV pair VUVU */ " psllw $8, %%mm1 \n\t" /* shift out V, keep 0000U0U0 */ /* we need to use both even and odd croma from same location - trb 9/2002 */ - " punpckldq (%%"REGB", %%"REGA", 2), %%mm1 \r\n" /* copy UV pair VUVU */ + " punpckldq (%%" REGB", %%" REGA", 2), %%mm1 \r\n" /* copy UV pair VUVU */ " psrlw $8, %%mm1 \r\n" /* shift out U0, keep 0V0V 0U0U */ - " movl 20(%%"REGSI"), %%"REGEA" \r\n" /* get data offset in pixels, 2nd pixel pair */ - " punpckldq (%%"REGD", %%"REGA", 2), %%mm0 \r\n" /* copy luma pair */ + " movl 20(%%" REGSI"), %%" REGEA" \r\n" /* get data offset in pixels, 2nd pixel pair */ + " punpckldq (%%" REGD", %%" REGA", 2), %%mm0 \r\n" /* copy luma pair */ - " pmaddwd (%%"REGSI"), %%mm0 \r\n" /* mult and sum lumas by ctl weights */ + " pmaddwd (%%" REGSI"), %%mm0 \r\n" /* mult and sum lumas by ctl weights */ " paddusw %%mm6, %%mm0 \r\n" /* round */ " psrlw $8, %%mm0 \r\n" /* right just 2 luma pixel value 000Y,000Y */ - " pmaddwd 8(%%"REGSI"), %%mm1 \r\n" /* mult and sum chromas by ctl weights */ + " pmaddwd 8(%%" REGSI"), %%mm1 \r\n" /* mult and sum chromas by ctl weights */ " paddusw %%mm6, %%mm1 \r\n" /* round */ " pslld $8, %%mm1 \r\n" /* shift into low bytes of different words */ " pand %%mm7, %%mm1 \r\n" /* keep only 2 chroma values 0V00,0U00 */ " por %%mm1, %%mm0 \r\n" /* combine luma and chroma, 0V0Y,0U0Y */ " packuswb %%mm0, %%mm0 \r\n" /* pack all into low dword, xxxxVYUY */ - " movd %%mm0, (%%"REGDI") \n\t" /* done with 2 pixels */ + " movd %%mm0, (%%" REGDI") \n\t" /* done with 2 pixels */ - " lea 24(%%"REGSI"), %%"REGSI" \n\t" /* bump to next control bytest */ - " lea 4(%%"REGDI"), %%"REGDI" \n\t" /* bump to next output pixel addr */ + " lea 24(%%" REGSI"), %%" REGSI" \n\t" /* bump to next control bytest */ + " lea 4(%%" REGDI"), %%" REGDI" \n\t" /* bump to next output pixel addr */ - " loop "hLoopMMX"b \n\t" /* loop for more */ + " loop " hLoopMMX"b \n\t" /* loop for more */ "emms \n\t" /* done with one line */ #if !defined(__x86_64__) - "mov "_oldbx", %%"REGB" \n\t" + "mov "_oldbx", %%" REGB" \n\t" #endif :: "m" /*0*/(FPround1), @@ -900,30 +900,30 @@ srcp2 = (y < dst_height-1) ? srcp1 + src_pitch : srcp1; __asm__ __volatile__( - "movl "_src_row_size", %%"REGC" \n\t" - "shr $3, %%"REGC" \n\t" /* 8 bytes a time */ - "mov "_srcp1", %%"REGSI" \n\t" /* top of 2 src lines to get */ - "mov "_srcp2", %%"REGD" \n\t" /* next " */ - "mov "_vWorkYW", %%"REGDI" \n\t" /* luma work destination line */ - "xor %%"REGA", %%"REGA" \n\t" + "movl "_src_row_size", %%" REGC" \n\t" + "shr $3, %%" REGC" \n\t" /* 8 bytes a time */ + "mov "_srcp1", %%" REGSI" \n\t" /* top of 2 src lines to get */ + "mov "_srcp2", %%" REGD" \n\t" /* next " */ + "mov "_vWorkYW", %%" REGDI" \n\t" /* luma work destination line */ + "xor %%" REGA", %%" REGA" \n\t" #if !defined(__x86_64__) /* Let's check here to see if we are on a P4 or higher and can use SSE2 instructions. * This first loop is not the performance bottleneck anyway but it is trivial to tune * using SSE2 if we have proper alignment. */ "testl $1, "_SSE2enabledW" \n\t" /* is SSE2 supported? */ - "jz "vMaybeSSEMMX"f \n\t" /* n, can't do anyway */ + "jz " vMaybeSSEMMX"f \n\t" /* n, can't do anyway */ #endif - "cmpl $2, %%"REGC" \n\t" /* we have at least 16 byts, 2 qwords? */ - "jl "vMaybeSSEMMX"f \n\t" /* n, don't bother */ + "cmpl $2, %%" REGC" \n\t" /* we have at least 16 byts, 2 qwords? */ + "jl " vMaybeSSEMMX"f \n\t" /* n, don't bother */ - "mov %%"REGSI", %%"REGB" \n\t" - "or %%"REGD", %%"REGB" \n\t" - "test $15, %%"REGB" \n\t" /* both src rows 16 byte aligned? */ - "jnz "vMaybeSSEMMX"f \n\t" /* n, don't use sse2 */ + "mov %%" REGSI", %%" REGB" \n\t" + "or %%" REGD", %%" REGB" \n\t" + "test $15, %%" REGB" \n\t" /* both src rows 16 byte aligned? */ + "jnz " vMaybeSSEMMX"f \n\t" /* n, don't use sse2 */ - "shr $1, %%"REGC" \n\t" /* do 16 bytes at a time instead */ - "dec %%"REGC" \n\t" /* jigger loop ct */ + "shr $1, %%" REGC" \n\t" /* do 16 bytes at a time instead */ + "dec %%" REGC" \n\t" /* jigger loop ct */ "movdqu "_FPround1", %%xmm0 \n\t" "movdqu "_vWeight1", %%xmm5 \n\t" @@ -931,15 +931,15 @@ "pxor %%xmm7, %%xmm7 \n" ".align 16 \n" - ""vLoopSSE2_Fetch": \n\t" + "" vLoopSSE2_Fetch": \n\t" #ifdef PREFETCH - " prefetcht0 16(%%"REGSI", %%"REGA", 2) \n\t" - " prefetcht0 16(%%"REGD", %%"REGA", 2) \n" + " prefetcht0 16(%%" REGSI", %%" REGA", 2) \n\t" + " prefetcht0 16(%%" REGD", %%" REGA", 2) \n" #endif - ""vLoopSSE2": \n\t" + "" vLoopSSE2": \n\t" /* we're already checked pointers to be on dqword aligned */ - " movdqa (%%"REGSI", %%"REGA"), %%xmm1 \n\t" /* top of 2 lines to interpolate */ - " movdqa (%%"REGD", %%"REGA"), %%xmm3 \n\t" /* 2nd of 2 lines */ + " movdqa (%%" REGSI", %%" REGA"), %%xmm1 \n\t" /* top of 2 lines to interpolate */ + " movdqa (%%" REGD", %%" REGA"), %%xmm3 \n\t" /* 2nd of 2 lines */ " movdqa %%xmm1, %%xmm2 \n\t" " movdqa %%xmm3, %%xmm4 \n\t" @@ -964,34 +964,34 @@ " packuswb %%xmm2, %%xmm1 \n\t" /* pack words to our 16 byte answer */ #ifdef STREAMING_STORE_TMP - " movntdq %%xmm1, (%%"REGDI", %%"REGA") \n\t" /* save lumas in our work area */ + " movntdq %%xmm1, (%%" REGDI", %%" REGA") \n\t" /* save lumas in our work area */ #else - " movdqu %%xmm1, (%%"REGDI", %%"REGA") \n\t" /* save lumas in our work area */ + " movdqu %%xmm1, (%%" REGDI", %%" REGA") \n\t" /* save lumas in our work area */ #endif - " lea 16(%%"REGA"), %%"REGA" \n\t" - " decl %%"REGC" \n\t" + " lea 16(%%" REGA"), %%" REGA" \n\t" + " decl %%" REGC" \n\t" - " jg "vLoopSSE2_Fetch"b \n\t" /* if not on last one loop, prefetch */ - " jz "vLoopSSE2"b \n\t" /* or just loop, or not */ + " jg " vLoopSSE2_Fetch"b \n\t" /* if not on last one loop, prefetch */ + " jz " vLoopSSE2"b \n\t" /* or just loop, or not */ /* done with our SSE2 fortified loop but we may need to pick up the spare change */ #ifdef STREAMING_STORE_TMP " sfence \n\t" #endif - " movl "_src_row_size", %%"REGC" \n\t" /* get count again */ - " andl $15, %%"REGC" \n\t" /* just need mod 16 */ + " movl "_src_row_size", %%" REGC" \n\t" /* get count again */ + " andl $15, %%" REGC" \n\t" /* just need mod 16 */ " movq "_vWeight1", %%mm5 \n\t" " movq "_vWeight2", %%mm6 \n\t" " movq "_FPround1", %%mm0 \n\t" /* useful rounding constant */ - " shrl $3, %%"REGC" \n\t" /* 8 bytes at a time, any? */ - " jz "MoreSpareChange"f \n" /* n, did them all */ + " shrl $3, %%" REGC" \n\t" /* 8 bytes at a time, any? */ + " jz " MoreSpareChange"f \n" /* n, did them all */ /* Let's check here to see if we are on a P2 or Athlon and can use SSEMMX instructions. * This first loop is not the performance bottleneck anyway but it is trivial to tune * using SSE if we have proper alignment. */ - ""vMaybeSSEMMX": \n\t" + "" vMaybeSSEMMX": \n\t" " movq "_vWeight1", %%mm5 \n\t" " movq "_vWeight2", %%mm6 \n\t" @@ -999,20 +999,20 @@ " pxor %%mm7, %%mm7 \n\t" #if !defined(__x86_64__) " testl $1, "_SSEMMXenabledW" \n\t"/* MMXEXTsupported? */ - " jz "vLoopMMX"f \n\t" /* n, can't do anyway */ + " jz " vLoopMMX"f \n\t" /* n, can't do anyway */ #endif - " decl %%"REGC" \n" /* jigger loop ctr */ + " decl %%" REGC" \n" /* jigger loop ctr */ ".align 16 \n" - ""vLoopSSEMMX_Fetch": \n\t" + "" vLoopSSEMMX_Fetch": \n\t" #ifdef PREFETCH - " prefetcht0 8(%%"REGSI", %%"REGA") \n\t" - " prefetcht0 8(%%"REGD", %%"REGA") \n" + " prefetcht0 8(%%" REGSI", %%" REGA") \n\t" + " prefetcht0 8(%%" REGD", %%" REGA") \n" #endif - ""vLoopSSEMMX": \n\t" + "" vLoopSSEMMX": \n\t" - " movq (%%"REGSI", %%"REGA"), %%mm1 \n\t" /* top of 2 lines to interpolate */ - " movq (%%"REGD", %%"REGA"), %%mm3 \n\t" /* 2nd of 2 lines */ + " movq (%%" REGSI", %%" REGA"), %%mm1 \n\t" /* top of 2 lines to interpolate */ + " movq (%%" REGD", %%" REGA"), %%mm3 \n\t" /* 2nd of 2 lines */ " movq %%mm1, %%mm2 \n\t" " movq %%mm3, %%mm4 \n\t" @@ -1038,25 +1038,25 @@ " packuswb %%mm2, %%mm1 \n\t" /* pack words to our 8 byte answer */ #ifdef STREAMING_STORE_TMP - " movntq %%mm1, (%%"REGDI", %%"REGA") \n\t" /* save lumas in our work area */ + " movntq %%mm1, (%%" REGDI", %%" REGA") \n\t" /* save lumas in our work area */ #else - " movq %%mm1, (%%"REGDI", %%"REGA") \n\t" /* save lumas in our work area */ + " movq %%mm1, (%%" REGDI", %%" REGA") \n\t" /* save lumas in our work area */ #endif - " lea 8(%%"REGA"), %%"REGA" \n\t" - " decl %%"REGC" \n\t" + " lea 8(%%" REGA"), %%" REGA" \n\t" + " decl %%" REGC" \n\t" - " jg "vLoopSSEMMX_Fetch"b \n\t" /* if not on last one loop, prefetch */ - " jz "vLoopSSEMMX"b \n\t" /* or just loop, or not */ + " jg " vLoopSSEMMX_Fetch"b \n\t" /* if not on last one loop, prefetch */ + " jz " vLoopSSEMMX"b \n\t" /* or just loop, or not */ #ifdef STREAMING_STORE_TMP " sfence \n\t" #endif - " jmp "MoreSpareChange"f \n" /* all done with vertical */ + " jmp " MoreSpareChange"f \n" /* all done with vertical */ ".align 16 \n" - ""vLoopMMX": \n\t" + "" vLoopMMX": \n\t" - " movq (%%"REGSI", %%"REGA"), %%mm1 \n\t" /* top of 2 lines to interpolate */ - " movq (%%"REGD", %%"REGA"), %%mm3 \n\t" /* 2nd of 2 lines */ + " movq (%%" REGSI", %%" REGA"), %%mm1 \n\t" /* top of 2 lines to interpolate */ + " movq (%%" REGD", %%" REGA"), %%mm3 \n\t" /* 2nd of 2 lines */ " movq %%mm1, %%mm2 \n\t" " movq %%mm3, %%mm4 \n\t" @@ -1081,84 +1081,84 @@ " psrlw $8, %%mm2 \n\t" /* right adjust luma */ " packuswb %%mm2, %%mm1 \n\t" /* pack words to our 8 byte answer */ - " movq %%mm1, (%%"REGDI", %%"REGA") \n\t" /* save lumas in our work area */ + " movq %%mm1, (%%" REGDI", %%" REGA") \n\t" /* save lumas in our work area */ - " lea 8(%%"REGA"), %%"REGA" \n\t" - " loop "vLoopMMX"b \n" + " lea 8(%%" REGA"), %%" REGA" \n\t" + " loop " vLoopMMX"b \n" /* Add a little code here to check if we have more pixels to do and, if so, make one * more pass thru vLoopMMX. We were processing in multiples of 8 pixels and alway have * an even number so there will never be more than 7 left. */ - ""MoreSpareChange": \n\t" + "" MoreSpareChange": \n\t" - " cmpl "_src_row_size", %%"REGEA" \n\t" /* did we get them all */ - " jnl "DoHorizontal"f \n\t" /* yes, else have 2 left */ - " movl $1, %%"REGC" \n\t" /* jigger loop ct */ - " movl "_src_row_size", %%"REGEA" \n\t" - " sub $8, %%"REGA" \n\t" /* back up to last 8 pixels */ - " jmp "vLoopMMX"b \n" + " cmpl "_src_row_size", %%" REGEA" \n\t" /* did we get them all */ + " jnl " DoHorizontal"f \n\t" /* yes, else have 2 left */ + " movl $1, %%" REGC" \n\t" /* jigger loop ct */ + " movl "_src_row_size", %%" REGEA" \n\t" + " sub $8, %%" REGA" \n\t" /* back up to last 8 pixels */ + " jmp " vLoopMMX"b \n" /* We've taken care of the vertical scaling, now do horizontal */ - ""DoHorizontal": \n\t" + "" DoHorizontal": \n\t" " pxor %%mm7, %%mm7 \n\t" " movq "_FPround2", %%mm6 \n\t" /* useful rounding constant, dwords */ - " mov "_pControl", %%"REGSI" \n\t" /* @ horiz control bytes */ - " movl "_row_size", %%"REGC" \n\t" - " shrl $2, %%"REGC" \n\t" /* 4 bytes a time, 4 pixels */ - " mov "_vWorkYW", %%"REGD" \n\t" /* our luma data, as 0Y0Y 0Y0Y.. */ - " mov "_dstp", %%"REGDI" \n\t" /* the destination line */ + " mov "_pControl", %%" REGSI" \n\t" /* @ horiz control bytes */ + " movl "_row_size", %%" REGC" \n\t" + " shrl $2, %%" REGC" \n\t" /* 4 bytes a time, 4 pixels */ + " mov "_vWorkYW", %%" REGD" \n\t" /* our luma data, as 0Y0Y 0Y0Y.. */ + " mov "_dstp", %%" REGDI" \n\t" /* the destination line */ #if !defined(__x86_64__) " testl $1, "_SSEMMXenabledW" \n\t" /* MMXEXTsupported? */ - " jz "hLoopMMX"f \n\t" /* n, can't do anyway */ + " jz " hLoopMMX"f \n\t" /* n, can't do anyway */ #endif /* With SSE support we will make 8 pixels (from 8 pairs) at a time */ - " shrl $1, %%"REGC" \n\t" /* 8 bytes a time instead of 4 */ - " jz "LessThan8"f \n" + " shrl $1, %%" REGC" \n\t" /* 8 bytes a time instead of 4 */ + " jz " LessThan8"f \n" ".align 16 \n" - ""hLoopMMXSSE": \n\t" + "" hLoopMMXSSE": \n\t" /* handle first 2 pixels */ /* phi: must use movl here (x86_64, reading from table of uint_32's) */ - " movl 16(%%"REGSI"), %%"REGEA" \n\t" /* get data offset in pixels, 1st pixel pair */ - " movl 20(%%"REGSI"), %%"REGEB" \r\n" /* get data offset in pixels, 2nd pixel pair */ + " movl 16(%%" REGSI"), %%" REGEA" \n\t" /* get data offset in pixels, 1st pixel pair */ + " movl 20(%%" REGSI"), %%" REGEB" \r\n" /* get data offset in pixels, 2nd pixel pair */ - " movd (%%"REGD", %%"REGA"), %%mm0 \n\t" /* copy luma pair 0000xxYY */ - " punpcklwd (%%"REGD", %%"REGB"), %%mm0 \r\n" /* 2nd luma pair, now xxxxYYYY */ + " movd (%%" REGD", %%" REGA"), %%mm0 \n\t" /* copy luma pair 0000xxYY */ + " punpcklwd (%%" REGD", %%" REGB"), %%mm0 \r\n" /* 2nd luma pair, now xxxxYYYY */ " punpcklbw %%mm7, %%mm0 \n\t" /* make words out of bytes, 0Y0Y0Y0Y */ - " movl 16+24(%%"REGSI"), %%"REGEA" \n\t" /* get data offset in pixels, 3st pixel pair */ - " movl 20+24(%%"REGSI"), %%"REGEB" \r\n" /* get data offset in pixels, 4nd pixel pair */ - " pmaddwd (%%"REGSI"), %%mm0 \n\t" /* mult and sum lumas by ctl weights */ + " movl 16+24(%%" REGSI"), %%" REGEA" \n\t" /* get data offset in pixels, 3st pixel pair */ + " movl 20+24(%%" REGSI"), %%" REGEB" \r\n" /* get data offset in pixels, 4nd pixel pair */ + " pmaddwd (%%" REGSI"), %%mm0 \n\t" /* mult and sum lumas by ctl weights */ " paddusw %%mm6, %%mm0 \n\t" /* round */ " psrlw $8, %%mm0 \n\t" /* right just 4 luma pixel value 0Y0Y0Y0Y */ /* handle 3rd and 4th pixel pairs */ - " movd (%%"REGD", %%"REGA"), %%mm1 \n\t" /* copy luma pair 0000xxYY */ - " punpcklwd (%%"REGD", %%"REGB"), %%mm1 \r\n" /* 2nd luma pair, now xxxxYYYY */ + " movd (%%" REGD", %%" REGA"), %%mm1 \n\t" /* copy luma pair 0000xxYY */ + " punpcklwd (%%" REGD", %%" REGB"), %%mm1 \r\n" /* 2nd luma pair, now xxxxYYYY */ " punpcklbw %%mm7, %%mm1 \n\t" /* make words out of bytes, 0Y0Y0Y0Y */ - " movl 16+48(%%"REGSI"), %%"REGEA" \n\t" /* get data offset in pixels, 5st pixel pair */ - " movl 20+48(%%"REGSI"), %%"REGEB" \r\n" /* get data offset in pixels, 6nd pixel pair */ - " pmaddwd 24(%%"REGSI"), %%mm1 \n\t" /* mult and sum lumas by ctl weights */ + " movl 16+48(%%" REGSI"), %%" REGEA" \n\t" /* get data offset in pixels, 5st pixel pair */ + " movl 20+48(%%" REGSI"), %%" REGEB" \r\n" /* get data offset in pixels, 6nd pixel pair */ + " pmaddwd 24(%%" REGSI"), %%mm1 \n\t" /* mult and sum lumas by ctl weights */ " paddusw %%mm6, %%mm1 \n\t" /* round */ " psrlw $8, %%mm1 \n\t" /* right just 4 luma pixel value 0Y0Y0Y0Y */ /* handle 5th and 6th pixel pairs */ - " movd (%%"REGD", %%"REGA"), %%mm2 \n\t" /* copy luma pair 0000xxYY */ - " punpcklwd (%%"REGD", %%"REGB"), %%mm2 \r\n" /* 2nd luma pair, now xxxxYYYY */ + " movd (%%" REGD", %%" REGA"), %%mm2 \n\t" /* copy luma pair 0000xxYY */ + " punpcklwd (%%" REGD", %%" REGB"), %%mm2 \r\n" /* 2nd luma pair, now xxxxYYYY */ " punpcklbw %%mm7, %%mm2 \n\t" /* make words out of bytes, 0Y0Y0Y0Y */ - " movl 16+72(%%"REGSI"), %%"REGEA" \n\t" /* get data offset in pixels, 7st pixel pair */ - " movl 20+72(%%"REGSI"), %%"REGEB" \r\n" /* get data offset in pixels, 8nd pixel pair */ - " pmaddwd 48(%%"REGSI"), %%mm2 \n\t" /* mult and sum lumas by ctl weights */ + " movl 16+72(%%" REGSI"), %%" REGEA" \n\t" /* get data offset in pixels, 7st pixel pair */ + " movl 20+72(%%" REGSI"), %%" REGEB" \r\n" /* get data offset in pixels, 8nd pixel pair */ + " pmaddwd 48(%%" REGSI"), %%mm2 \n\t" /* mult and sum lumas by ctl weights */ " paddusw %%mm6, %%mm2 \n\t" /* round */ " psrlw $8, %%mm2 \n\t" /* right just 4 luma pixel value 0Y0Y0Y0Y */ /* handle 7th and 8th pixel pairs */ - " movd (%%"REGD", %%"REGA"), %%mm3 \n\t" /* copy luma pair 0000xxYY */ - " punpcklwd (%%"REGD", %%"REGB"), %%mm3 \r\n" /* 2nd luma pair, now xxxxYYYY */ + " movd (%%" REGD", %%" REGA"), %%mm3 \n\t" /* copy luma pair 0000xxYY */ + " punpcklwd (%%" REGD", %%" REGB"), %%mm3 \r\n" /* 2nd luma pair, now xxxxYYYY */ " punpcklbw %%mm7, %%mm3 \n\t" /* make words out of bytes, 0Y0Y0Y0Y */ - " pmaddwd 72(%%"REGSI"), %%mm3 \n\t" /* mult and sum lumas by ctl weights */ + " pmaddwd 72(%%" REGSI"), %%mm3 \n\t" /* mult and sum lumas by ctl weights */ " paddusw %%mm6, %%mm3 \n\t" /* round */ " psrlw $8, %%mm3 \n\t" /* right just 4 luma pixel value 0Y0Y0Y0Y */ @@ -1167,99 +1167,99 @@ " packuswb %%mm3, %%mm2 \n\t" /* pack into qword, 0Y0Y0Y0Y */ " packuswb %%mm2, %%mm0 \n\t" /* and again into YYYYYYYY */ #ifdef STREAMING_STORE - " movntq %%mm0, (%%"REGDI") \n\t" /* done with 4 pixels */ + " movntq %%mm0, (%%" REGDI") \n\t" /* done with 4 pixels */ #else - " movq %%mm0, (%%"REGDI") \n\t" /* done with 4 pixels */ + " movq %%mm0, (%%" REGDI") \n\t" /* done with 4 pixels */ #endif - " lea 96(%%"REGSI"), %%"REGSI" \n\t" - " lea 8(%%"REGDI"), %%"REGDI" \n\t" - " decl %%"REGC" \n\t" - " jg "hLoopMMXSSE"b \n\t" /* loop for more */ + " lea 96(%%" REGSI"), %%" REGSI" \n\t" + " lea 8(%%" REGDI"), %%" REGDI" \n\t" + " decl %%" REGC" \n\t" + " jg " hLoopMMXSSE"b \n\t" /* loop for more */ #ifdef STREAMING_STORE " sfence \n" #endif - ""LessThan8": \n\t" - " movl "_row_size", %%"REGC" \n\t" - " andl $7, %%"REGC" \n\t" /* we have done all but maybe this */ - " shrl $2, %%"REGC" \n\t" /* now do only 4 bytes at a time */ - " jz "LessThan4"f \n" + "" LessThan8": \n\t" + " movl "_row_size", %%" REGC" \n\t" + " andl $7, %%" REGC" \n\t" /* we have done all but maybe this */ + " shrl $2, %%" REGC" \n\t" /* now do only 4 bytes at a time */ + " jz " LessThan4"f \n" ".align 16 \n" - ""hLoopMMX": \n\t" + "" hLoopMMX": \n\t" /* handle first 2 pixels */ - " movl 16(%%"REGSI"), %%"REGEA" \n\t" /* get data offset in pixels, 1st pixel pair */ - " movl 20(%%"REGSI"), %%"REGEB" \r\n" /* get data offset in pixels, 2nd pixel pair */ - " movd (%%"REGD", %%"REGA"), %%mm0 \n\t" /* copy luma pair 0000xxYY */ - " punpcklwd (%%"REGD", %%"REGB"), %%mm0 \r\n" /* 2nd luma pair, now xxxxYYYY */ + " movl 16(%%" REGSI"), %%" REGEA" \n\t" /* get data offset in pixels, 1st pixel pair */ + " movl 20(%%" REGSI"), %%" REGEB" \r\n" /* get data offset in pixels, 2nd pixel pair */ + " movd (%%" REGD", %%" REGA"), %%mm0 \n\t" /* copy luma pair 0000xxYY */ + " punpcklwd (%%" REGD", %%" REGB"), %%mm0 \r\n" /* 2nd luma pair, now xxxxYYYY */ " punpcklbw %%mm7, %%mm0 \n\t" /* make words out of bytes, 0Y0Y0Y0Y */ - " movl 16+24(%%"REGSI"), %%"REGEA" \n\t" /* get data offset in pixels, 3st pixel pair */ - " movl 20+24(%%"REGSI"), %%"REGEB" \r\n" /* get data offset in pixels, 4nd pixel pair */ - " pmaddwd (%%"REGSI"), %%mm0 \n\t" /* mult and sum lumas by ctl weights */ + " movl 16+24(%%" REGSI"), %%" REGEA" \n\t" /* get data offset in pixels, 3st pixel pair */ + " movl 20+24(%%" REGSI"), %%" REGEB" \r\n" /* get data offset in pixels, 4nd pixel pair */ + " pmaddwd (%%" REGSI"), %%mm0 \n\t" /* mult and sum lumas by ctl weights */ " paddusw %%mm6, %%mm0 \n\t" /* round */ " psrlw $8, %%mm0 \n\t" /* right just 4 luma pixel value 0Y0Y0Y0Y */ /* handle 3rd and 4th pixel pairs */ - " movd (%%"REGD", %%"REGA"), %%mm1 \n\t" /* copy luma pair 0000xxYY */ - " punpckldq (%%"REGD", %%"REGB"), %%mm1 \r\n" /* 2nd luma pair, now xxxxYYYY */ + " movd (%%" REGD", %%" REGA"), %%mm1 \n\t" /* copy luma pair 0000xxYY */ + " punpckldq (%%" REGD", %%" REGB"), %%mm1 \r\n" /* 2nd luma pair, now xxxxYYYY */ " punpcklbw %%mm7, %%mm1 \n\t" /* make words out of bytes, 0Y0Y0Y0Y */ - " pmaddwd 24(%%"REGSI"), %%mm1 \n\t" /* mult and sum lumas by ctl weights */ + " pmaddwd 24(%%" REGSI"), %%mm1 \n\t" /* mult and sum lumas by ctl weights */ " paddusw %%mm6, %%mm1 \n\t" /* round */ " psrlw $8, %%mm1 \n\t" /* right just 4 luma pixel value 0Y0Y0Y0Y */ /* combine, store, and loop */ " packuswb %%mm1, %%mm0 \n\t" /* pack into qword, 0Y0Y0Y0Y */ " packuswb %%mm7, %%mm0 \n\t" /* and again into 0000YYYY */ - " movd %%mm0, (%%"REGDI") \n\t" /* done with 4 pixels */ - " lea 48(%%"REGSI"), %%"REGSI" \n\t" - " lea 4(%%"REGDI"), %%"REGDI" \n\t" + " movd %%mm0, (%%" REGDI") \n\t" /* done with 4 pixels */ + " lea 48(%%" REGSI"), %%" REGSI" \n\t" + " lea 4(%%" REGDI"), %%" REGDI" \n\t" - " loop "hLoopMMX"b \n" /* loop for more */ + " loop " hLoopMMX"b \n" /* loop for more */ /* test to see if we have a mod 4 size row, if not then more spare change */ - ""LessThan4": \n\t" - " movl "_row_size", %%"REGC" \n\t" - " andl $3, %%"REGC" \n\t" /* remainder side mod 4 */ - " cmpl $2, %%"REGC" \n\t" - " jl "LastOne"f \n\t" /* none, none */ + "" LessThan4": \n\t" + " movl "_row_size", %%" REGC" \n\t" + " andl $3, %%" REGC" \n\t" /* remainder side mod 4 */ + " cmpl $2, %%" REGC" \n\t" + " jl " LastOne"f \n\t" /* none, none */ /* handle 2 more pixels */ - " movl 16(%%"REGSI"), %%"REGEA" \n\t" /* get data offset in pixels, 1st pixel pair */ - " movl 20(%%"REGSI"), %%"REGEB" \r\n" /* get data offset in pixels, 2nd pixel pair */ - " movd (%%"REGD", %%"REGA"), %%mm0 \n\t" /* copy luma pair 0000xxYY */ - " punpcklwd (%%"REGD", %%"REGB"), %%mm0 \r\n" /* 2nd luma pair, now xxxxYYYY */ + " movl 16(%%" REGSI"), %%" REGEA" \n\t" /* get data offset in pixels, 1st pixel pair */ + " movl 20(%%" REGSI"), %%" REGEB" \r\n" /* get data offset in pixels, 2nd pixel pair */ + " movd (%%" REGD", %%" REGA"), %%mm0 \n\t" /* copy luma pair 0000xxYY */ + " punpcklwd (%%" REGD", %%" REGB"), %%mm0 \r\n" /* 2nd luma pair, now xxxxYYYY */ " punpcklbw %%mm7, %%mm0 \n\t" /* make words out of bytes, 0Y0Y0Y0Y */ - " pmaddwd (%%"REGSI"), %%mm0 \n\t" /* mult and sum lumas by ctl weights */ + " pmaddwd (%%" REGSI"), %%mm0 \n\t" /* mult and sum lumas by ctl weights */ " paddusw %%mm6, %%mm0 \n\t" /* round */ " psrlw $8, %%mm0 \n\t" /* right just 4 luma pixel value 0Y0Y0Y0Y */ " packuswb %%mm7, %%mm0 \n\t" /* pack into qword, 00000Y0Y */ " packuswb %%mm7, %%mm0 \n\t" /* and again into 000000YY */ - " movd %%mm0, (%%"REGDI") \n\t" /* store, we are guarrenteed room in buffer (8 byte mult) */ - " subl $2, %%"REGC" \n\t" + " movd %%mm0, (%%" REGDI") \n\t" /* store, we are guarrenteed room in buffer (8 byte mult) */ + " subl $2, %%" REGC" \n\t" - " lea 24(%%"REGSI"), %%"REGSI" \n\t" /* bump to next control bytes */ - " lea 2(%%"REGDI"), %%"REGDI" \n" /* bump to next output pixel addr */ + " lea 24(%%" REGSI"), %%" REGSI" \n\t" /* bump to next control bytes */ + " lea 2(%%" REGDI"), %%" REGDI" \n" /* bump to next output pixel addr */ /* maybe one last pixel */ - ""LastOne": \n\t" - " cmpl $0, %%"REGC" \r\n" /* still more ? */ - " jz "AllDone"f \r\n" /* n, done */ - " movl 16(%%"REGSI"), %%"REGEA" \n\t" /* get data offset in pixels, 1st pixel pair */ - " movd (%%"REGD", %%"REGA"), %%mm0 \n\t" /* copy luma pair 0000xxYY */ + "" LastOne": \n\t" + " cmpl $0, %%" REGC" \r\n" /* still more ? */ + " jz " AllDone"f \r\n" /* n, done */ + " movl 16(%%" REGSI"), %%" REGEA" \n\t" /* get data offset in pixels, 1st pixel pair */ + " movd (%%" REGD", %%" REGA"), %%mm0 \n\t" /* copy luma pair 0000xxYY */ " punpcklbw %%mm7, %%mm0 \n\t" /* make words out of bytes, 0Y0Y0Y0Y */ - " pmaddwd (%%"REGSI"), %%mm0 \n\t" /* mult and sum lumas by ctl weights */ + " pmaddwd (%%" REGSI"), %%mm0 \n\t" /* mult and sum lumas by ctl weights */ " paddusw %%mm6, %%mm0 \n\t" /* round */ " psrlw $8, %%mm0 \n\t" /* right just 4 luma pixel value 0Y0Y0Y0Y */ - " movd %%mm0, %%"REGEA" \n\t" - " movb %%al, (%%"REGDI") \n" /* store last one */ + " movd %%mm0, %%" REGEA" \n\t" + " movb %%al, (%%" REGDI") \n" /* store last one */ - ""AllDone": \n\t" + "" AllDone": \n\t" " emms \n\t" #if !defined(__x86_64__) - "mov "_oldbx", %%"REGB" \n\t" + "mov "_oldbx", %%" REGB" \n\t" #endif :: "m" /*0*/(FPround1),