Mercurial > hg > jgplsrc
diff vasm.h @ 0:e0bbaa717f41 draft default tip
lol J
author | Jordi GutiƩrrez Hermoso <jordigh@octave.org> |
---|---|
date | Mon, 25 Nov 2013 11:56:30 -0500 (2013-11-25) |
parents | |
children |
line wrap: on
line diff
new file mode 100644 --- /dev/null +++ b/vasm.h @@ -0,0 +1,517 @@ +/* Copyright 1990-2011, Jsoftware Inc. All rights reserved. */ +/* License in license.txt. */ +/* */ +/* Verbs: Assembly Routines for Integer + * - with Overflow */ + +/* fvv zv=.xv+yv 0<n */ +/* fv1 zv=.xv+y 0<n */ +/* f1v zv=.x +yv 0<n */ +/* frv zv=.xv+zv 0<n */ +/* fr z =.+/ xv 1<n */ +/* fp zv=.+/\ xv 1<n */ +/* fs zv=.+/\.xv 1<n */ + +#ifndef NOASM /* builder defines NOASM to do I overflow in C instead of asm */ + +#if SY_WIN32 && !SY_64 && !SY_WINCE +#define OVF + +#define PLUSVV(m,z,x,y) \ +{ \ +__asm mov ecx,m \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm mov edx,y \ +__asm sub edi,4 \ +__asm sub esi,4 \ +__asm sub edx,4 \ +__asm pvv20: mov eax,[esi+ecx*4] \ +__asm add eax,[edx+ecx*4] \ +__asm jo pvv30 \ +__asm mov [edi+ecx*4],eax \ +__asm loop pvv20 \ +__asm jmp pvv40 \ +__asm pvv30: mov er,EWOV \ +__asm pvv40: \ +} + +#define PLUS1V(n,z,u,y) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov edx,u \ +__asm mov esi,y \ +__asm sub edi,4 \ +__asm sub esi,4 \ +__asm p1v20: mov eax,[esi+ecx*4] \ +__asm add eax,edx \ +__asm jo p1v30 \ +__asm mov [edi+ecx*4],eax \ +__asm loop p1v20 \ +__asm jmp p1v40 \ +__asm p1v30: mov er,EWOV \ +__asm p1v40: \ +} + +#define PLUSV1(n,z,x,v) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm mov edx,v \ +__asm mov esi,x \ +__asm sub edi,4 \ +__asm sub esi,4 \ +__asm pv120: mov eax,[esi+ecx*4] \ +__asm add eax,edx \ +__asm jo pv130 \ +__asm mov [edi+ecx*4],eax \ +__asm loop pv120 \ +__asm jmp pv140 \ +__asm pv130: mov er,EWOV \ +__asm pv140: \ +} + +#define PLUSRV(d,z,x) \ +{ \ +__asm mov ecx,d \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm sub edi,4 \ +__asm sub esi,4 \ +__asm prv20: mov eax,[esi+ecx*4] \ +__asm add eax,[edi+ecx*4] \ +__asm jo prv30 \ +__asm mov [edi+ecx*4],eax \ +__asm loop prv20 \ +__asm jmp prv40 \ +__asm prv30: mov er,EWOV \ +__asm prv40: \ +} + +#define PLUSR(n,z,x) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm sub esi,4 \ +__asm xor eax,eax \ +__asm pr20: add eax,[esi+ecx*4] \ +__asm jo pr30 \ +__asm loop pr20 \ +__asm mov [edi],eax \ +__asm jmp pr40 \ +__asm pr30: mov er,EWOV \ +__asm pr40: \ +} + +#define PLUSP(n,z,x) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm xor edx,edx \ +__asm xor eax,eax \ +__asm mov ebx,ecx \ +__asm sar ecx,1 \ +__asm pp20: add eax,[esi+edx*4] \ +__asm jo pp30 \ +__asm mov [edi+edx*4],eax \ +__asm inc edx \ +__asm add eax,[esi+edx*4] \ +__asm jo pp30 \ +__asm mov [edi+edx*4],eax \ +__asm inc edx \ +__asm loop pp20 \ +__asm and ebx,1 \ +__asm jz pp40 \ +__asm add eax,[esi+edx*4] \ +__asm jo pp30 \ +__asm mov [edi+edx*4],eax \ +__asm jmp pp40 \ +__asm pp30: mov er,EWOV \ +__asm pp40: \ +} + +#define PLUSS(n,z,x) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm sub esi,4 \ +__asm sub edi,4 \ +__asm xor eax,eax \ +__asm ps20: add eax,[esi+ecx*4] \ +__asm jo ps30 \ +__asm mov [edi+ecx*4],eax \ +__asm loop ps20 \ +__asm jmp ps40 \ +__asm ps30: mov er,EWOV \ +__asm ps40: \ +} + + +#define MINUSVV(m,z,x,y) \ +{ \ +__asm mov ecx,m \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm mov edx,y \ +__asm sub edi,4 \ +__asm sub esi,4 \ +__asm sub edx,4 \ +__asm mvv20: mov eax,[esi+ecx*4] \ +__asm sub eax,[edx+ecx*4] \ +__asm jo mvv30 \ +__asm mov [edi+ecx*4],eax \ +__asm loop mvv20 \ +__asm jmp mvv40 \ +__asm mvv30: mov er,EWOV \ +__asm mvv40: \ +} + +#define MINUS1V(n,z,u,y) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm sub edi,4 \ +__asm mov edx,u \ +__asm mov esi,y \ +__asm sub esi,4 \ +__asm m1v20: mov eax,edx \ +__asm sub eax,[esi+ecx*4] \ +__asm jo m1v30 \ +__asm mov [edi+ecx*4],eax \ +__asm loop m1v20 \ +__asm jmp m1v40 \ +__asm m1v30: mov er,EWOV \ +__asm m1v40: \ +} + +#define MINUSV1(n,z,x,v) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm mov edx,v \ +__asm sub edi,4 \ +__asm sub esi,4 \ +__asm mv120: mov eax,[esi+ecx*4] \ +__asm sub eax,edx \ +__asm jo mv130 \ +__asm mov [edi+ecx*4],eax \ +__asm loop mv120 \ +__asm jmp mv140 \ +__asm mv130: mov er,EWOV \ +__asm mv140: \ +} + +#define MINUSRV(d,z,x) \ +{ \ +__asm mov ecx,d \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm sub edi,4 \ +__asm sub esi,4 \ +__asm mrv20: mov eax,[esi+ecx*4] \ +__asm sub eax,[edi+ecx*4] \ +__asm jo mrv30 \ +__asm mov [edi+ecx*4],eax \ +__asm loop mrv20 \ +__asm jmp mrv40 \ +__asm mrv30: mov er,EWOV \ +__asm mrv40: \ +} + +#define MINUSR(n,z,x) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm sub esi,4 \ +__asm xor eax,eax \ +__asm mr20: mov edx,[esi+ecx*4] \ +__asm sub edx,eax \ +__asm jo mr30 \ +__asm mov eax,edx \ +__asm loop mr20 \ +__asm mov [edi],eax \ +__asm jmp mr40 \ +__asm mr30: mov er,EWOV \ +__asm mr40: \ +} + +#define MINUSP(n,z,x) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm xor edx,edx \ +__asm xor eax,eax \ +__asm mov ebx,ecx \ +__asm sar ecx,1 \ +__asm mp20: add eax,[esi+edx*4] \ +__asm jo mp30 \ +__asm mov [edi+edx*4],eax \ +__asm inc edx \ +__asm sub eax,[esi+edx*4] \ +__asm jo mp30 \ +__asm mov [edi+edx*4],eax \ +__asm inc edx \ +__asm loop mp20 \ +__asm and ebx,1 \ +__asm jz mp40 \ +__asm add eax,[esi+edx*4] \ +__asm jo mp30 \ +__asm mov [edi+edx*4],eax \ +__asm jmp mp40 \ +__asm mp30: mov er,EWOV \ +__asm mp40: \ +} + +#define MINUSS(n,z,x) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm sub esi,4 \ +__asm sub edi,4 \ +__asm xor eax,eax \ +__asm ms20: mov edx,[esi+ecx*4] \ +__asm sub edx,eax \ +__asm jo ms30 \ +__asm mov eax,edx \ +__asm mov [edi+ecx*4],eax \ +__asm loop ms20 \ +__asm jmp ms40 \ +__asm ms30: mov er,EWOV \ +__asm ms40: \ +} + + +#define TYMESVV(m,z,x,y) \ +{ \ +__asm mov ecx,m \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm mov edx,y \ +__asm sub edi,4 \ +__asm sub esi,4 \ +__asm sub edx,4 \ +__asm tvv20: mov eax,[esi+ecx*4] \ +__asm imul eax,[edx+ecx*4] \ +__asm jo tvv30 \ +__asm mov [edi+ecx*4],eax \ +__asm loop tvv20 \ +__asm jmp tvv40 \ +__asm tvv30: mov er,EWOV \ +__asm tvv40: \ +} + +#define TYMES1V(n,z,u,y) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov edx,u \ +__asm mov esi,y \ +__asm sub edi,4 \ +__asm sub esi,4 \ +__asm t1v20: mov eax,[esi+ecx*4] \ +__asm imul eax,edx \ +__asm jo t1v30 \ +__asm mov [edi+ecx*4],eax \ +__asm loop t1v20 \ +__asm jmp t1v40 \ +__asm t1v30: mov er,EWOV \ +__asm t1v40: \ +} + +#define TYMESV1(n,z,x,v) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm mov edx,v \ +__asm sub edi,4 \ +__asm sub esi,4 \ +__asm tv120: mov eax,[esi+ecx*4] \ +__asm imul eax,edx \ +__asm jo tv130 \ +__asm mov [edi+ecx*4],eax \ +__asm loop tv120 \ +__asm jmp tv140 \ +__asm tv130: mov er,EWOV \ +__asm tv140: \ +} + +#define TYMESRV(d,z,x) \ +{ \ +__asm mov ecx,d \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm sub edi,4 \ +__asm sub esi,4 \ +__asm trv20: mov eax,[esi+ecx*4] \ +__asm imul eax,[edi+ecx*4] \ +__asm jo trv30 \ +__asm mov [edi+ecx*4],eax \ +__asm loop trv20 \ +__asm jmp trv40 \ +__asm trv30: mov er,EWOV \ +__asm trv40: \ +} + +#define TYMESR(n,z,x) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm sub esi,4 \ +__asm mov eax,1 \ +__asm tr20: imul eax,[esi+ecx*4] \ +__asm jo tr30 \ +__asm loop tr20 \ +__asm mov [edi],eax \ +__asm jmp tr40 \ +__asm tr30: mov er,EWOV \ +__asm tr40: \ +} + +#define TYMESP(n,z,x) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm xor edx,edx \ +__asm mov eax,1 \ +__asm tp20: imul eax,[esi+edx*4] \ +__asm jo tp30 \ +__asm mov [edi+edx*4],eax \ +__asm inc edx \ +__asm loop tp20 \ +__asm jmp tp40 \ +__asm tp30: mov er,EWOV \ +__asm tp40: \ +} + +#define TYMESS(n,z,x) \ +{ \ +__asm mov ecx,n \ +__asm mov edi,z \ +__asm mov esi,x \ +__asm sub esi,4 \ +__asm sub edi,4 \ +__asm mov eax,1 \ +__asm ts20: imul eax,[esi+ecx*4] \ +__asm jo ts30 \ +__asm mov [edi+ecx*4],eax \ +__asm loop ts20 \ +__asm jmp ts40 \ +__asm ts30: mov er,EWOV \ +__asm ts40: \ +} + +#endif + +#if SY_64 /* win64 and linux64 asm routines */ +#define OVF + +C asmplusvv(I,I*,I*,I*); +C asmminusvv(I,I*,I*,I*); +C asmtymesvv(I,I*,I*,I*); + +C asmplus1v(I,I*,I,I*); +C asmminus1v(I,I*,I,I*); +C asmtymes1v(I,I*,I,I*); + +C asmminusv1(I,I*,I*,I); + +C asmplusr(I,I*,I*); +C asmminusr(I,I*,I*); +C asmtymesr(I,I*,I*); + +C asmplusrv(I,I*,I*); +C asmminusrv(I,I*,I*); +C asmtymesrv(I,I*,I*); + +C asmpluss(I,I*,I*); +C asmminuss(I,I*,I*); +C asmtymess(I,I*,I*); + +C asmplusp(I,I*,I*); +C asmminusp(I,I*,I*); +C asmtymesp(I,I*,I*); + +C asminnerprod(I,I*,I*,I*,I); +C asminnerprodx(I,I*,I,I*); + +#define PLUSVV(m,z,x,y) er=asmplusvv(m,z,x,y) +#define MINUSVV(m,z,x,y) er=asmminusvv(m,z,x,y) +#define TYMESVV(m,z,x,y) er=asmtymesvv(m,z,x,y) + +#define PLUSRV(d,z,x) er=asmplusrv(d,z,x) +#define MINUSRV(d,z,x) er=asmminusrv(d,z,x) +#define TYMESRV(d,z,x) er=asmtymesrv(d,z,x) + +#define PLUS1V(n,z,u,y) er=asmplus1v(n,z,u,y) +#define MINUS1V(n,z,u,y) er=asmminus1v(n,z,u,y) +#define TYMES1V(n,z,u,y) er=asmtymes1v(n,z,u,y) + +#define PLUSV1(n,z,x,v) PLUS1V(n,z,v,x) +#define MINUSV1(n,z,x,v) er=asmminusv1(n,z,x,v) +#define TYMESV1(n,z,x,v) TYMES1V(n,z,v,x) + +#define PLUSR(n,z,x) er=asmplusr(n,z,x) +#define MINUSR(n,z,x) er=asmminusr(n,z,x) +#define TYMESR(n,z,x) er=asmtymesr(n,z,x) + +#define PLUSS(n,z,x) er=asmpluss(n,z,x) +#define MINUSS(n,z,x) er=asmminuss(n,z,x) +#define TYMESS(n,z,x) er=asmtymess(n,z,x) + +#define PLUSP(n,z,x) er=asmplusp(n,z,x) +#define MINUSP(n,z,x) er=asmminusp(n,z,x) +#define TYMESP(n,z,x) er=asmtymesp(n,z,x) + +#endif + +#endif + +/* C routines for platforms without asm support */ +#ifndef OVF + +#if SY_64 +#define DI LD +#else +#define DI D +#endif + +#define PLUSVV(m,z,x,y) {B p; DO(m, p=0>*x; *z=*x+*y; BOV(p==0>*y&&p!=0>*z); z++; x++; y++;);} +#define MINUSVV(m,z,x,y) {B p; DO(m, p=0>*x; *z=*x-*y; BOV(p!=0>*y&&p!=0>*z); z++; x++; y++;);} +#define TYMESVV(m,z,x,y) {DI t; DO(m, t=*x*(DI)*y; *z=(I)t; BOV(t<IMIN||IMAX<t ); z++; x++; y++;);} + +#define PLUS1V(n,z,u,y) {B p=0>u; DO(n, z[i]=u+y[i]; BOV(p==0>y[i]&&p!=0>z[i]););} +#define MINUS1V(n,z,u,y) {B p=0>u; DO(n, z[i]=u-y[i]; BOV(p!=0>y[i]&&p!=0>z[i]););} +#define TYMES1V(n,z,u,y) {DI d=u,t; DO(n, t=d*y[i]; z[i]=(I)t; BOV(t<IMIN||IMAX<t ););} + +#define PLUSV1(n,z,x,v) PLUS1V(n,z,v,x) +#define TYMESV1(n,z,x,v) TYMES1V(n,z,v,x) +#define MINUSV1(n,z,x,v) {B p=0>v; DO(n, z[i]=x[i]-v; BOV(p!=0>x[i]&&p==0>z[i]););} + +#define PLUSP(n,z,x) {B p;I s=0; DO(n, p=0>s; *z=s+=*x; BOV(p==0>*x&&p!=0>s); z++; x++;);} +#define MINUSP(n,z,x) {B p=0;DI t=0; DO(n, t=p?t-*x:t+*x; *z=(I)t; BOV(t<IMIN||IMAX<t ); z++; x++; p=!p;);} +#define TYMESP(n,z,x) {DI t=1; DO(n, t*=*x; *z=(I)t; BOV(t<IMIN||IMAX<t ); z++; x++;);} + +#define PLUSR(n,z,x) {B p;I s=0; DO(n, p=0>s; s+=*x; BOV(p==0>*x&&p!=0>s); x++; ); *z=s;} +#define MINUSR(n,z,x) {B p=0;DI t=0; DO(n, t=p?t-*x:t+*x; BOV(t<IMIN||IMAX<t ); x++; p=!p;); *z=(I)t;} +#define TYMESR(n,z,x) {DI t=1; DO(n, t*=*x; BOV(t<IMIN||IMAX<t ); x++; ); *z=(I)t;} + +#define PLUSRV(d,z,x) {B p; DO(d, p=0>*z; *z+=*x; BOV(p==0>*x&&p!=0>*z); x++; z++;);} +#define MINUSRV(d,z,x) {DI t; DO(d, t=*x-(DI)*z; *z=(I)t; BOV(t<IMIN||IMAX<t ); x++; z++;);} +#define TYMESRV(d,z,x) {DI t; DO(d, t=*x*(DI)*z; *z=(I)t; BOV(t<IMIN||IMAX<t ); x++; z++;);} + +#define PLUSS(n,z,x) {B p;I s=0; x+=n; z+=n; DO(n, --x; p=0>s; *--z=s+=*x; BOV(p==0>*x&&p!=0>s););} +#define MINUSS(n,z,x) {B p;I s=0; x+=n; z+=n; DO(n, --x; p=0>s; *--z=s=*x-s; BOV(p!=0>*x&&p==0>s););} +#define TYMESS(n,z,x) {DI t=1; x+=n; z+=n; DO(n, --x; t*=*x; *--z=(I)t; BOV(t<IMIN||IMAX<t ););} + +#endif