注册 登录
编程论坛 汇编论坛

求助,在SSE指令中如何做到循环移动内存呢?或者交换字节次序,帮忙优化下面代码?

cuczhangyi 发布于 2010-01-21 12:31, 945 次点击

我这样的做法,效率太低,有没有高手能够帮助优化下?
int nSize = 16000000;
    LPDWORD pSrcData = new DWORD[nSize];
    LPDWORD pDesData = new DWORD[nSize];
    int i = 0;
    for(i = 0; i < nSize; i++)
    {
        pSrcData[i] = 0xff807060;
        pDesData[i] = 0;
    }

    DWORD dwTick3 = GetTickCount();        // MMX
    int nSSESize = nSize / 4;
    DWORD xx4[] = { 0x000000ff,0x000000ff,0x000000ff,0x000000ff };
    DWORD xx5[] = { 0x00ff0000,0x00ff0000,0x00ff0000,0x00ff0000 };
    DWORD xx6[] = { 0xff00ff00,0xff00ff00,0xff00ff00,0xff00ff00 };
    __asm
    {
        mov ecx,nSSESize
        mov esi,pSrcData
        mov edi,pDesData
        movdqu xmm4,[xx4]
        movdqu xmm5,[xx5]
        movdqu xmm6,[xx6]
__LOOP2:
        movdqa xmm0, [esi]
        movdqa xmm1, xmm0
        psrld xmm1,16        // 0x0000aarr0000aarr
        pand xmm1,xmm4
        movdqa xmm2,xmm1
        movdqa xmm1,xmm0
        pslld xmm1,16        // 0xggbb0000ggbb0000
        pand xmm1,xmm5
        por xmm2,xmm1        // 0x00rr00bb00rr00bb
        pand xmm0,xmm6        // 0XAA00GG00AA00GG00
        por xmm2,xmm0        // 0xaarrggbbaarrggbb

        movdqa [edi],xmm2
        add esi,16
        add edi,16
        dec ecx
        jnz __LOOP2
        emms
    }
    dwTick3 = GetTickCount() - dwTick3;
1 回复
#2
cuczhangyi2010-01-21 16:30
痛不欲生啊~~~~              
1