// major polynomial functions #define DEBUG 0 #define M_INT long long int #define LONG long long int #define ULONG unsigned long long int #define UINT32 unsigned int #define UINT64 unsigned long long typedef enum{false, true} bool; #include "int128g.c" #include #include #include #include LONG * array(int n) { return (LONG *) malloc(n*sizeof(LONG)); } LONG * array64s( LONG n ) { LONG *A; if(n<0) { printf("array64s: n=%lld must be >= 0\n"); exit(1); } n = n * sizeof( LONG ); A = (LONG *) malloc( n ); if( A==0 ) { printf("array64s: malloc failed\n"); exit(1); } return A; } int * arrayI(int n) { return (int *) malloc(n*sizeof(int)); } extern ULONG seed, mult; LONG rand64s(LONG p) { LONG x,y; seed = mult*seed; x = seed >> 32; seed = mult*seed; y = seed >> 32; x = (x<<31) | y; x = x % p; return(x); } /******************************************************************************************/ /* Roman's assembler utilities */ /******************************************************************************************/ #define WORDSIZE 64 typedef unsigned long M_UINT; /* [x0:x1] := a*b */ #define MUL211(x0,x1,a,b) __asm__(\ " mulq %3 \n\t" \ : "=a"(x0), "=d"(x1) : "0"(a), "r"(b) : "cc") /* x0 := [x0:x1] / a */ /* x1 := [x0:x1] % a */ #define DIV21H(x0,x1,a) __asm__(\ " divq %2 \n\t" \ : "=a"(x0), "=d"(x1) : "r"(a), "0"(x0), "1"(x1) : "cc") /* floor of log2 */ M_UINT ilog2new(M_UINT x) { M_UINT y; for (y=0; x >>= 1; y++); return y; } /* reciprocal of full word */ M_UINT recipnew(M_UINT p) { M_UINT u0, u1; u1 = -(p+1); u0 = -1; DIV21H(u0,u1,p); return u0; } /* compute reciprocal and shift */ void init(M_UINT d, M_UINT *r, M_UINT *s) { int l; l = ilog2new(d); if( d != (M_UINT)1 << l ) l++; d = d << (WORDSIZE-l); *r = recipnew(d); *s = l-1; } /* compute remainder via reciprocal */ inline M_UINT rem(M_UINT n, M_UINT d, M_UINT r, M_UINT s) { M_UINT u0,u1,q; MUL211(u0,u1,n,r); q = (n - u1) >> 1; q = (q + u1) >> s; return n - q*d; } /* z += a1:a0 */ #define zadd(z,a0,a1) __asm__(\ " addq %4, %0 \n\t" \ " adcq %5, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]) : "0"(z[0]), "1"(z[1]), "r"(a0), "r"(a1)) /* z -= a1:a0 */ #define zsub(z,a0,a1) __asm__(\ " subq %4, %0 \n\t" \ " sbbq %5, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]) : "0"(z[0]), "1"(z[1]), "r"(a0), "r"(a1)) /* z = a*b */ #define zmul(z,a,b) __asm__(\ " mulq %%rdx \n\t" \ : "=a"(z[0]), "=d"(z[1]) : "a"(a), "d"(b)) /* z += a*b */ #define zfma(z,a,b) do { \ unsigned long u,v; \ __asm__( \ " mulq %%rdx \n\t" \ " addq %%rax, %0 \n\t" \ " adcq %%rdx, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]), "=a"(u), "=d"(v) : "0"(z[0]), "1"(z[1]), "a"(a), "d"(b));\ } while (0) /* z -= a*b */ #define zfms(z,a,b) do { \ unsigned long u,v; \ __asm__( \ " mulq %%rdx \n\t" \ " subq %%rax, %0 \n\t" \ " sbbq %%rdx, %1 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]), "=a"(u), "=d"(v) : "0"(z[0]), "1"(z[1]), "a"(a), "d"(b));\ } while (0) /* z[0] = z % p */ /* z[1] = z / p */ /* quotient can overflow */ #define zdiv(z,p) __asm__(\ " divq %4 \n\t" \ : "=a"(z[1]), "=d"(z[0]) : "a"(z[0]), "d"(z[1]), "r"(p)) /* z = z % p safe */ #define zmod(z,p) __asm__(\ " divq %4 \n\t" \ " xorq %0, %0 \n\t" \ : "=a"(z[1]), "=d"(z[0]) : "a"(z[0]), "d"(z[1] < p ? z[1] : z[1] % p), "r"(p)) /* z = z << s */ #define zshl(z,s) __asm__(\ " shldq %%cl, %0, %1 \n\t" \ " shlq %%cl, %0 \n\t" \ : "=&r"(z[0]), "=&r"(z[1]) : "0"(z[0]), "1"(z[1]), "c"(s)) typedef struct P_INT P_INT; struct P_INT { M_INT p; /* prime */ M_INT s; /* shift */ M_INT d; /* normalized */ M_INT v; /* reciprocal */ }; /* initialize mod p struct */ inline P_INT modinit64s(M_INT p) { P_INT m; /* most significant bit location 63..0 */ __asm__ __volatile__("bsrq %0, %0" : "=r"(m.s) : "0"(p)); m.p = p; m.s = 64-m.s-1; m.d = p << m.s; /* reciprocal: (2^128-1)/n - 2^64 */ /* [(2^64-n-1)*2^64 + (2^64-1)]/n */ __asm__ __volatile__(\ " movq %1, %%rdx \n\t" \ " addq $1, %%rdx \n\t" \ " negq %%rdx \n\t" \ " movq $-1, %%rax \n\t" \ " divq %1 \n\t" \ : "=a"(m.v) : "r"(m.d) : "%rdx"); return m; } inline M_INT mulmodinv64s(M_INT a, M_INT b, P_INT m) { M_INT q, r; /* 0: compute a*b = u1:u0, lshift by s, r9:r8 = (u1+1):u0 */ /* 1: compute u1*v = q1:q0, q1:q0 += (u1+1):u0, in rdx:rax */ /* 2: compute r = u0-q1*d in r8 */ /* 3: if r > q0 then q1 = q1-1, r = r+d (possible) */ /* 4: if r >= d then q1 = q1+1, r = r-d (unlikely) */ __asm__ __volatile__(\ "0: mulq %%rdx \n\t" \ " shldq %%cl, %%rax, %%rdx \n\t" \ " shlq %%cl, %%rax \n\t" \ " leaq 1(%%rdx), %%r9 \n\t" \ " movq %%rax, %%r8 \n\t" \ "1: movq %%rdx, %%rax \n\t" \ " mulq %5 \n\t" \ " addq %%r8, %%rax \n\t" \ " adcq %%r9, %%rdx \n\t" \ "2: imulq %4, %%rdx \n\t" \ " subq %%rdx, %%r8 \n\t" \ "3: cmpq %%rax, %%r8 \n\t" \ " jbe 4f \n\t" \ " addq %4, %%r8 \n\t" \ " decq %%rax \n\t" \ "4: cmpq %4, %%r8 \n\t" \ " jae 5f \n\t" \ " jmp 6f \n\t" \ "5: subq %4, %%r8 \n\t" \ " incq %%rax \n\t" \ "6: shrq %%cl, %%r8 \n\t" \ " movq %%r8, %%rdx \n\t" \ : "=a"(q), "=d"(r) : "0"(a), "1"(b), "r"(m.d), "r"(m.v), "c"(m.s) : "%r8", "%r9", "%r10"); /* q = quotient */ return r; } //#define UINT64 unsigned long long // //typedef struct { // UINT64 s; /* shift */ // UINT64 v; /* reciprocal of d */ // UINT64 d0; /* divisor shifted up */ // UINT64 d1; //} recint; // recint recip1(UINT64 p); ULONG mulrec64(ULONG a, ULONG b, recint v); /******************************************************************************************/ /* Zp utilities */ /******************************************************************************************/ #define add32s add32util #define sub32s sub32util #define max32s max32util #define min32s min32util #define add64s add64util #define sub64s sub64util #define mul64s mul64util #define neg64s neg64util inline int add32s(int a, int b, int p) { int t; t = (a-p)+b; t += (t>>31) & p; return t; } inline int sub32s(int a, int b, int p) { int t; t = (a-b); t += (t>>31) & p; return t; } inline int mul32s(int a, int b, int p) { int t; t = (LONG) a * b % p; return t; } inline int neg32s(int a, int p) { return (a==0) ? 0 : p-a; } inline int submul32s(int a, int b, int c, int p) { int t; t = ((LONG) p+a-b)*c % p; return t; } inline int muladd32s(int a, int b, int c, int p) { int t; t = ((LONG) a*b+c) % p; return t; } inline int max32s(int a, int b) { if(a>b) return a; else return b; } inline int min32s(int a, int b) { if(a>63) & p; return t; } inline LONG sub64s(LONG a, LONG b, LONG p) { LONG t; t = a-b; t += (t>>63) & p; return t; } inline LONG mul64s(LONG a, LONG b, LONG p) { LONG q, r; __asm__ __volatile__( \ " mulq %%rdx \n\t" \ " divq %4 \n\t" \ : "=a"(q), "=d"(r) : "0"(a), "1"(b), "rm"(p)); return r; } inline LONG neg64s(LONG a, LONG p) { LONG t; t = -a; t += (t>>63) & p; return t; } inline LONG submul64s(LONG a, LONG b, LONG c, LONG p) { return mul64s(sub64s(a,b,p),c,p); } // This is faster. inline LONG muladd64s(LONG a, LONG b, LONG c, LONG p, recint P) { return add64s(mulrec64(a,b,P),c,p); } inline LONG max64s(LONG a, LONG b) { if(a>b) return a; else return b; } inline LONG min64s(LONG a, LONG b) { if(a>63) & p; // protect from bad input if( n==0 ) return 1; if( n==1 ) return a; for( r=1, s=a; n>0; n /= 2 ) { if( n & 1 ) r = mul64s(r,s,p); s = mul64s(s,s,p); } return r; } /* a^n mod p assuming 0 <= a < p < 2^63 */ LONG powmodrec64s( LONG a, LONG n, LONG p, recint P ) { LONG r,s; a += (a>>63) & p; // protect from bad input if( n==0 ) return 1; if( n==1 ) return a; for( r=1, s=a; n>0; n /= 2 ) { if( n & 1 ) r = mulrec64(r,s,P); s = mulrec64(s,s,P); } return r; } /******************************************************************************************/ /* Z utilities */ /******************************************************************************************/ void evalbeta( LONG *A, int d, LONG beta, recint P ) { int i; if( d<0 ) return; A[0] = 1; if( d==0 ) return; A[1] = beta; for( i=2; i<=d; i++ ) A[i] = mulrec64(beta,A[i-1],P); return; } LONG gcd64s( LONG a, LONG b ) { LONG d,r,q,r1,c1,d1; if( a<0 ) a = -a; if( b<0 ) b = -b; while( b!=0 ) { r = a % b; a = b; b = r; } return a; } /******************************************************************************************/ /* Polynomial utilities */ /******************************************************************************************/ void vecfill64s( LONG x, LONG *A, int n ) { int i; for( i=0; i0; i-- ) if( A[i]!=0 ) printf("%lld*x^%d+",A[i],i); /* print an array [a0,a1,...,ad] in form ad*x^d+...+a1*x+a0 */ printf("%lld",A[0]); return; } void polrev64s( LONG *f, LONG d ) { LONG i; LONG t; for( i=0; i<=d/2; i++ ) { t = f[i]; f[i] = f[d-i]; f[d-i] = t; } return; } //print a 2D array in 2 variables void polprintbivar64s( LONG *A, int dx, int dy, char * x, char * y){ int i,j; if(dx == -1 || dy == -1) { printf("0;\n"); return; } for (i=dy;i>=0;i--){ for(j=dx;j>=0;j--){ if(A[i*(dx+1)+j] != 0 && i*(dx+1)+j != 0){ printf("%lld*%s^%d*%s^%d+",A[i*(dx+1)+j],x,j,y,i); } } } printf("%lld",A[0]); return; } /* for C = [c1,c2,...,cn] and X = [e1,e2,...,en] print c1*x^e1+...+cn*x^en; */ void polsparseprint64s( LONG *C, LONG *X, int n ) { int i; if( n==0 ) { printf("0;\n"); return; } for( i=n-1; i>0; i-- ) printf("%lld*x^%lld+",C[i],X[i]); printf("%lld*x^%lld;\n",C[i],X[i]); return; } void pol3sparseprint64s( LONG *C, LONG *X, int t ) { int i,dx,dy,dz,mask; LONG m; if( t==0 ) { printf("0;\n"); return; } mask = (1<<21)-1; for( i=0; i>21; dy = m & mask; dx = m>>21; printf("%lld*x^%d*y^%d*z^%d",C[i],dx,dy,dz); if( i!=t-1 ) printf("+"); else printf(";\n"); } return; } void polnsparseprint64s( LONG *C, LONG *X, int t, int n ) { int numbits,i,j; LONG m,d,mask,D[63]; if( t==0 ) { printf("0;\n"); return; } numbits = 63/n; mask = (1LL<> numbits; } for( j=1; j<=n; j++ ) { d = D[n-j]; if( d!=0 ) printf("*x[%d]^%lld",j,d); } if( i!=t-1 ) printf("+"); } return; } void pol2sort64s( LONG *C, LONG *X, int n ) { LONG x,c; if( n<2 ) return; n--; x = X[n]; c = C[n]; pol2sort64s( C, X, n ); while( n>0 && X[n-1]>x ) { X[n] = X[n-1]; C[n] = C[n-1]; n--; } X[n] = x; C[n] = c; return; } LONG pol2simpNoSort64s( LONG *C, LONG *X, int n, LONG p ) { LONG i,j,k,c,x; k = 0; i = 0; for( k=0,i=0; i=0;i--){ if(A[i]!=0){ return i; } } return -1; } LONG poleval64s(LONG *a, int d, LONG x, LONG p) { int i; LONG r; recint P; if( d==-1 ) return 0; P = recip1(p); for( r=a[d],i=d-1; i>=0; i-- ) r = muladd64s(r, x, a[i], p, P); return r; } LONG evalhorner( LONG *f, LONG d, LONG x, recint P, LONG p ) { // f(x) mod p int i; LONG c; if( d<0 ) return 0; c = f[d]; for( i=d-1; i>=0; i-- ) c = add64s(f[i],mulrec64(c,x,P),p); return c; } void newt2stand( LONG *V, int d, LONG *B, recint P, LONG p ) { int i,j; LONG alpha; B[0] = V[d]; for( i=0,alpha=d-1; alpha>=0; alpha--,i++ ) { // B = (x-alpha) B mod p B[i+1] = B[i]; for( j=i; j>0; j-- ) B[j] = sub64s(B[j-1],mulrec64(alpha,B[j],P),p); B[0] = sub64s(V[alpha],mulrec64(alpha,B[0],P),p); //printf(" B = "); vecprint64s(B,i+2); printf("\n"); } return; } int newtoncofs( LONG *U, int d, LONG *minv, LONG *V, recint P, LONG p ) { // Let V = V[0]+(x-0)V[1]+(x-1)(x-0)V[2]+(x-2)(x-1)(x-0)V[3]+...+(x-d-1)...(x-1)(x-0)V[d] // Let V(i) = U[i] so that // V[i]=(U[i] - (V[0]+iV[1]+i(i-1)V[2]+...+i!V[i-1])) / i! which in horner form is // V[i]=(U[i] - (V[0]+i(V[1]+(i-1)(V[2]+...+3(V[i-2]+2(V[i-1]))))) / i! int i,j; LONG t,m; if( d<0 ) return d; V[0] = U[0]; for( i=1; i<=d; i++ ) { t = V[i-1]; for( m=2,j=i-2; j>=0; j--,m++ ) t = add64s( mulrec64(m,t,P), V[j], p ); t = mulrec64( sub64s(U[i],t,p), minv[i], P ); V[i] = t; } while( d>=0 && V[d]==0 ) d--; return d; } int polnsparsedeg64s( LONG *X, int t, int n, int xi ) { int numbits,j,d; LONG x,mask,shift; // degree(f,x[i]) if( xi<1 || xi>n ) return -1; numbits = 63/n; mask = (1LL<> shift; if( xi>1 ) x = x & mask; if( x>d ) d = x; } return d; } void polnsparsevecdeg64s( LONG *X, int t, int n, int *D ) { int numbits,i,j,d,e; LONG x,y,mask,shift; // D[i] = degree(f,x[i]) for 0<=iD[i] ) D[i] = d; x = x >> numbits; } } return; } LONG polnsparseeval64s(LONG *C, LONG *X, int t, LONG *A, int n, LONG p ) { // Evaluate f(A[0],A[1],...,A[n-1]) mod p where f(x1,x2,...,xn) = sum( c[i]*X[i], i=0..t-1 ) int numbits,i,j; LONG mask,s,x,u; recint P; P = recip1(p); numbits = 63/n; mask = (1LL<> numbits; } s = add64s(s,u,p); } return s; } int vec2partition64s(LONG *D, LONG *C, int n ) { int i,j,m; LONG x,t,c; m = n/2; x = D[m]; D[m] = D[0]; c = C[m]; C[m] = C[0]; for( i=1,j=n-1; j>=i ; ) { if( D[i]40 ) { // QUICKSORT i = vec2partition64s(D, C, n); sort2vec64s(D,C,i); sort2vec64s(D+i+1,C+i+1,n-i-1); return; } sort2vec64s(D,C,n-1); x = D[n-1]; c = C[n-1]; for( i=n-1; i>0 && x> numbits; } Y[i] = x; B[i] = u; } sort2vec64s(Y,B,t); k=0; i=0; while( i> s; x = x & mask; //u = powmod64s(beta,x,p); for( u=1,j=1; x>0; x=x>>1,j++ ) if( x&1 ) u = mulrec64(u,pow[j],P); B[i] = mulrec64(C[i],u,P); } sort2vec64s(Y,B,t); k=0; i=0; while( i0; i-- ) { d = a[i]-a[i-1]; if( d<=0 ) return 0; // should not happen if( d==1 ) y = x; else y = powmodrec64s(x,d,p,P); //r = (y * r + c[i-1]) % p; r = muladd64s(y,r,c[i-1],p,P); } y = powmodrec64s(x,a[0],p,P); r = mulrec64(y,r,P); return r; } LONG evalnext2( LONG *A, LONG *X, LONG *G, int na, LONG *B, LONG *C, LONG *Y, LONG *Z, LONG p, int MAX ) { int i,j,k,m,n; LONG c,d,t,x; recint P; P = recip1(p); k = m = 0; for( i=0; iMAX ) printf("ERROR in evalnext2: k out of range k=%d max=%d\n",k,MAX); if( DEBUG ) if ( m>MAX ) printf("ERROR in evalnext2: m out of range m=%d max=%d\n",m,MAX); t = ((LONG) k << 32) + m; return t; } /******************************************************************************/ /** Polynomial routines in Zp[x] **/ /******************************************************************************/ int poladd64s(LONG *a, LONG *b, LONG *c, int da, int db, LONG p) { // c = a + b mod p int i,m; m = min32s(da,db); for( i=0; i<=m; i++ ) c[i] = add64s(a[i],b[i],p); if( da==db ) { while( da>=0 && c[da]==0 ) da--; return da; } if( da=0 && c[da]==0 ) da--; return da; } if( da>db ) { for ( i=db+1; i<=da; i++ ) c[i] = a[i]; return da; } for( i=da+1; i<=db; i++ ) c[i] = mulrec64(alpha,b[i],P); return db; } int polsub64s(LONG *a, LONG *b, LONG *c, int da, int db, LONG p) { // c = a-b mod p int i,m; m = min32s(da,db); //if( da<0 ) printf("SUB da=%d\n",da); //if( db<0 ) printf("SUB db=%d\n",db); for( i=0; i<=m; i++ ) c[i] = sub64s(a[i],b[i],p); if( da==db ) { while( da>=0 && c[da]==0 ) da--; return da; } if( da>db ) { for ( i=db+1; i<=da; i++ ) c[i] = a[i]; return da; } else { for( i=da+1; i<=db; i++ ) c[i] = neg64s(b[i],p); return db; } } int polsubsca64s(LONG *a, LONG *b, LONG *c, int da, int db, LONG alpha, LONG p, recint P) { // c = a - alpha b mod p -- also supports a = a - alpha b mod p int i,m; m = min32s(da,db); for( i=0; i<=m; i++ ) c[i] = sub64s(a[i],mulrec64(alpha,b[i],P),p); if( da==db ) { while( da>=0 && c[da]==0 ) da--; return da; } if( da>db ) { for ( i=db+1; i<=da; i++ ) c[i] = a[i]; return da; } for( i=da+1; i<=db; i++ ) c[i] = neg64s(mulrec64(alpha,b[i],P),p); return db; } /* compute C(x) = A(x)^2 mod p and return deg(C) */ /* we allow C to overwrite A i.e. polsqr64s(A,A,d,p) */ int polsqr64s( LONG * A, LONG * C, int d, LONG p ) { int i,k,m; ULONG z[2]; if( d<0 ) return d; for( k=2*d; k>=0; k-- ) { m = min32s(k,d); i = max32s(0,k-d); z[0] ^= z[0]; // = 0? z[1] ^= z[1]; // = 0? while( i=p ) z[1] -= p; zfma(z,A[i++],A[m--]); } if( i=p ) z[1] -= p; } zadd(z,z[0],z[1]); if( z[1]>=p ) z[1] -= p; if( i==m ) zfma(z,A[i],A[i]); zmod(z,p); C[k] = z[0]; } //for( dc = 2*d; dc>=0 && C[dc]==0; dc-- ); // Why is this loop here? Z_p has no zero-divisors. return( 2*d ); } int min(LONG a, LONG b){ if(ab){return a;} else {return b;} } int polmul32s( LONG * A, LONG * B, LONG * C, LONG da, LONG db, LONG p ) { LONG dc,i,j,k,m; LONG t,M; if( da<0 || db<0 ) return -1; //if( A==B ) return polsqr32s(A,C,da,p); M = ((LONG) p)*p*2; dc = da+db; for( k=dc; k>=0; k-- ) { m = min(k,da); i = max(0,k-db); t = M; while( i>63) & M; } if( i==m ) { t -= ((LONG) A[i])*B[k-i]; t += (t>>63) & M; } t = -t; t += (t>>63) & M; t = t % p; C[k] = t; } return( dc ); } /* compute C(x) = A(x) * B(x) mod p and return deg(C) */ /* we allow C to overwrite either A or B i.e. polmul64s(A,B,A,da,db,p) */ int polmul64s( LONG * A, LONG * B, LONG * C, int da, int db, LONG p) { int i,k,m; int overflow; //recint P; P = recip1(p); ULONG z[2]; if( da<0 || db<0 ) return -1; int dc = da+db; for( k=dc; k>=0; k-- ) { i = max32s(0,k-db); m = min32s(k,da); //{ LONG t; // //for( t=0; i<=m; i++ ) t = add64s(t,mul64s(A[i],B[k-i],p),p); // for( t=0; i<=m; i++ ) t = add64s(t,mulrec64(A[i],B[k-i],P),p); // C[k] = t; //} z[0] = 0; z[1] = 0; //if( p < 1LL << 31 ) { // can't overflow - 20% faster on a Core i7 2600 if( p < 1LL << 50 ) { // can't overflow - 20% faster on a Core i7 2600 #if p < 2^50 while( i=p ) z[1] -= p; zfma(z,A[i],B[k-i]); i++; zfma(z,A[i],B[k-i]); i++; if( z[1]>=p ) z[1] -= p; zfma(z,A[i],B[k-i]); i++; } while( i<=m ) { zfma(z,A[i],B[k-i]); i++; if( z[1]>=p ) z[1] -= p; } } zmod(z,p); C[k] = z[0]; } return( dc ); } /* divide A by B and put the remainder and quotient in A */ /* return the degree of the remainder */ int poldiv64s( LONG * A, LONG * B, int da, int db, LONG p ) { int dq,dr,k,j,m; LONG t,inv; ULONG z[2]; //recint P; P = recip1(p); if( db<0 ) { printf("division by zero\n"); exit(1); } if( da=0; k-- ) { m = min32s(dr,k); j = max32s(0,k-dq); //for( t=0; j<=m; j++ ) t = add64s(t,mul64s(B[j],A[k-j+db],p),p); //for( t=0; j<=m; j++ ) t = add64s(t,mulrec64(B[j],A[k-j+db],P),p); //t = sub64s(A[k],t,p); z[0] = 0; z[1] = 0; //if( p< 1LL << 31 ) { // can't overflow - 20% faster on a Core i7 2600 if( p< 1LL << 50 ) { // can't overflow - 20% faster on a Core i7 2600 while( j=p ) z[1] -= p; zfma(z,B[j],A[k-j+db]); j++; zfma(z,B[j],A[k-j+db]); j++; if( z[1]>=p ) z[1] -= p; zfma(z,B[j],A[k-j+db]); j++; } while( j<=m ) { zfma(z,B[j],A[k-j+db]); j++; if( z[1]>=p ) z[1] -= p; } } zmod(z,p); t = sub64s(A[k],z[0],p); // t = A[k] - z[0]; t += (t>>63) & p; if( k>=db && inv!=1 ) t = mul64s(t,inv,p); A[k] = t; } while( dr>=0 && A[dr]==0 ) dr--; return( dr ); } /* compute gcd(A,B) and put gcd in A and return it's degree */ int polsubmulP( LONG *A, LONG *B, LONG a, LONG b, int dA, int dB, LONG p, recint P ) { // compute A = A - (ax+b) B efficiently LONG s,t; int i, d; if( dB==-1 ) return dA; // B = 0 d = dA; // if deg(A) <= deg(B) then pad A with zeroes while( dA<=dB ) A[++dA] = 0; // constant term is special t = mulrec64(b,B[0],P); A[0] = sub64s(A[0],t,p); //for( i=1; i<=dB; i++ ) { t = mul64s(a,B[i-1],p); t = add64s(t,mul64s(b,B[i],p),p); A[i] = sub64s(A[i],t,p); } for( i=1; i<=dB; i++ ) { t = mulrec64(a,B[i-1],P); t = add64s(t,mulrec64(b,B[i],P),p); A[i] = sub64s(A[i],t,p); } // update leading term from B t = mulrec64(a,B[dB],P); A[dB+1] = sub64s(A[dB+1],t,p); // compute and return degree while( dA>=0 && (A[dA]==0 || A[dA]==p) ) dA--; if( dA==d ) printf("polsubmul failure: dAin=%d dAout=%d\n",d,dA); return dA; } int polsubmul( LONG *A, LONG *B, LONG a, LONG b, int dA, int dB, LONG p, M_UINT recip, M_UINT shift ) { // compute A = A - (ax+b) B efficiently LONG t; int i; ULONG z[2]; if( dB==-1 ) return dA; // B = 0 z[0] = z[1] = 0LL; // if deg(A) <= deg(B) then pad A with zeroes while( dA<=dB ) A[++dA] = 0; // constant term is special t = mul64s(b,B[0],p) ; A[0] = sub64s(A[0],t,p); for( i=1; i<=dB; i++ ) { zmul(z,a,B[i-1]); zfma(z,b,B[i]); zmod(z,p); t = A[i]-z[0]; A[i] = t + ((t>>63)&p); } // update leading term from B t = mul64s(a,B[dB],p); A[dB+1] = sub64s(A[dB+1],t,p); // compute and return degree while( dA>=0 && (A[dA]==0 || A[dA]==p) ) dA--; return dA; } /* compute gcd(A,B) and put gcd in A and return it's degree */ /* Both A and B are destroyed */ int polgcd64s( LONG * A, LONG * B, int da, int db, LONG p ) { int dr; LONG *C, *D, *R, u, a, b; recint P; if( db<0 ) { printf("division by zero\n"); exit(1); } P = recip1(p); C = A; D = B; if( da0 && da-db==1 ) { // normal case //if( 0 ) { u = modinv64s(D[db],p); a = mulrec64(C[da],u,P); b = mulrec64(a,D[db-1],P); b = mulrec64(u,sub64s(C[da-1],b,p),P); // quotient = a x + b dr = polsubmulP(C,D,a,b,da,db,p,P); // C = C - (a x + b) D //dr = polsubmul(C,D,a,b,da,db,p,1,1); // C = C - (a x + b) D } else dr = poldiv64s(C,D,da,db,p); if( dr<0 ) { /* D|C so gcd(A,B)=D */ if( D!=A ) polcopy64s(D,db,A); monic64s( A, db, p ); return db; } R = C; C = D; D = R; da = db; db = dr; //printf("da=%d db=%d\n",da,db); } } void polgcdext64s( LONG *A, LONG *B, int da, int db, LONG *G, LONG *S, LONG *T, int *dG, int *dS, int *dT, LONG *W, LONG p ) { // Solve S A + T B = G = monic gcd(A,B) for G,S,T in Zp[x] // The arrays A and B are used for the remainder sequence so are destroyed // G,S,T must be of size max(da+1,db+1) // W is working storage of size 2max(da+1,db+1) // if S==0 or T==0 then S (and/or T) are not computed int m,dr,ds,dt,dq,ds1,ds2,dt1,dt2; LONG a,b,u, k; LONG *q,*r,*r1,*r2,*s,*s1,*s2,*t,*t1,*t2; //P_INT P = modinit(p); recint P = recip1(p); //printf("p = %lld da=%d db=%d\n",p,da,db); //printf("GCDEX: A := "); polprint64s(A,da); printf(";\n"); //printf("GCDEX: B := "); polprint64s(B,db); printf(";\n"); //if( S!=NULL ) printf("S"); //if( T!=NULL ) printf("T"); //printf("\n"); if( da<0 || db<0 ) { printf("inputs must be non-zero\n"); exit(1); } m = max32s(da+1,db+1); r1 = A; r2 = B; if(S) { s1 = S; s2 = W; s1[0]=1; ds1=0; ds2=-1; } if(T) { t1 = T; t2 = W+m; t2[0]=1; dt2=0; dt1=-1; } //M_UINT recip, shift; //init( (M_UINT) p, &recip, &shift ); M_UINT recip = 1; M_UINT shift = 1; while( 1 ) { if( db>0 && da-db==1 ) { // normal case u = modinv64s(r2[db],p); a = mulrec64(r1[da],u,P); b = mulrec64(a,r2[db-1],P); b = mulrec64(u,sub64s(r1[da-1],b,p),P); // quotient = a x + b dr = polsubmul(r1,r2,a,b,da,db,p,recip,shift); // r1 = r1 - (a x + b) r2 if(S) ds = polsubmul(s1,s2,a,b,ds1,ds2,p,recip,shift); // s1 = s1 - (a x + b) s2 if(T) dt = polsubmul(t1,t2,a,b,dt1,dt2,p,recip,shift); // t1 = t1 - (a x + b) t2 } else { dr = poldiv64s(r1,r2,da,db,p); // r1 = [remainder,quotient] q = r1+db; dq = da-db; if(S) ds = polmul64s(q,s2,G,dq,ds2,p); if(S) ds = polsub64s(s1,G,s1,ds1,ds,p); // s1 = s1 - q s2 if(T) dt = polmul64s(q,t2,G,dq,dt2,p); if(T) dt = polsub64s(t1,G,t1,dt1,dt,p); // t1 = t1 - q t2 } if( dr<0 ) { /* D|C so gcd(A,B)=D */ polcopy64s(r2,db,G); if(S) if( s2!=S ) polcopy64s(s2,ds2,S); if(T) if( t2!=T ) polcopy64s(t2,dt2,T); if( G[db]!=1 ) { u = modinv64s(G[db],p); polscamulrec64s(u,G,db,p,P); if(S) polscamulrec64s(u,S,ds2,p,P); if(T) polscamulrec64s(u,T,dt2,p,P); } dG[0] = db; if(S) dS[0] = ds2; if(T) dT[0] = dt2; //if( S ) { printf("GCDEX: S = "); polprint64s(S,ds2); printf(";\n"); } //if( T ) { printf("GCDEX: T = "); polprint64s(T,dt2); printf(";\n"); } return; } r = r1; r1 = r2; r2 = r; da = db; db = dr; if(S) { s = s1; s1 = s2; s2 = s; ds1 = ds2; ds2 = ds; } if(T) { t = t1; t1 = t2; t2 = t; dt1 = dt2; dt2 = dt; } } } int polsparseadd64s( LONG *A, LONG *X, int m, LONG *B, LONG *Y, int n, LONG p, LONG *C, LONG *Z) // assumes X and Y are sorted so we do a merge into Z { int i,j,k,t; LONG c; for( i=j=k=0; iY[j] ) { Z[k] = Y[j]; C[k] = B[j]; j++; k++; } else { c = add64s(A[i],B[j],p); if( c!=0 ) { Z[k] = X[i]; C[k] = c; k++; } i++; j++; } } while( i> xshift; if( d==i ) k++; } if( k==0 ) return 0; //alocate memory for the matched terms B = array(k); C[0] = B; Y = array(k); Z[0] = Y; //put all matched terms inside the memory //no degree should be changed for( k=0,j=0; j> xshift; if( d==i ) { B[k] = A[j]; Y[k] = X[j] & mask; k++; } } return k; // the number of terms of the coefficient } int poldiff64s( LONG *A, LONG *X, int t, int n, int j, LONG p ) { // diff(a,x[j]) a is stored as pair (A,X) with t terms in n variables int i, k, numbits, shift; LONG mask, x, one; recint P; P = recip1( p ); numbits = 63/n; shift = numbits*(n-j); mask = (1LL<> shift) & mask; if( x>0 ) { A[k] = mulrec64(x,A[i],P); X[k] = X[i] - one; k++; } } return k; // number of terms in derivative } int polbivareval64s( LONG *A, LONG *X, int na, LONG *W, LONG *B, int dx, int dy, LONG p ) { // eval(a(x,y),y=j) a is stored as pair (A,X) with t terms in 2 variables // Output is a dense array of coefficients of degree dx int i,j,shift,x,y; LONG t,mask,z[2]; recint P; P = recip1(p); //printf("BIVAR dx=%d dy=%d p=%lld", dx,dy,p); vecprint64s(W,dy); printf(";\n"); for( i=0; i<=dx; i++ ) B[i] = 0; shift = 31; mask = (1LL<<31)-1; for( i=0; i> shift; for( j=i+1; j> shift) == x); j++ ); //for( t=B[x]; i=p ) z[1] -= p; i++; // 20% faster to have if( z[1]>=p ) here in the middle than at the beginning or end of the loop y = X[i] & mask; zfma(z,W[y],A[i]); i++; } if( i==j-1 ) { y = X[i] & mask; zfma(z,W[y],A[i]); if( z[1]>=p ) z[1] -= p; i++; } zmod(z,p); B[x] = z[0]; } while( dx>=0 && B[dx]==0 ) dx--; return dx; } //My Algorithms void cleanArray64s(LONG *A, int len){ int i; for(i=0;i=p ) z[1] -= p; zfma(z,A[i],B[d-i]); i++; zfma(z,A[i],B[d-i]); i++; if( z[1]>=p ) z[1] -= p; zfma(z,A[i],B[d-i]); i++; } while( i<=max ) { zfma(z,A[i],B[d-i]); i++; if( z[1]>=p ) z[1] -= p; } } zmod(z,p); return z[0]; } void poladdFast(LONG *A,LONG *B, LONG *C,int dx, LONG p){ int i; for(i=0;i<=dx;i++)C[i] = add64s(A[i],B[i],p); } void addPol(LONG *A, int dz, LONG p){ int j,k; for(k=0;k=k;j--) A[j] = add64s(A[j],A[j+1],p); } void createProblem(LONG *A, LONG *F0, int n, int dx, int dz, int dxA, int dzA, LONG alpha, LONG p, LONG *W){ //Note: dxA = dx*n, dzA = dz*n LONG *TEMP,*TEMP2,*TEMP3,t1,t2,alpha2,r; int i,j,k,l,m,p1,p2,p3,MIN,MAX,da,db,dc,w; ULONG z[2]; //return the degree of dxA,dzA TEMP = array((dxA+1)*(dzA+1)); TEMP2 = array((dxA+1)*(dzA+1)); TEMP3 = array((dxA+1)*(dzA+1)); cleanArray64s(TEMP,(dxA+1)*(dzA+1)); cleanArray64s(TEMP2,(dxA+1)*(dzA+1)); cleanArray64s(TEMP3,(dxA+1)*(dzA+1)); cleanArray64s(A,(dzA+1)*(dxA+1)); //generate the polynomials of F (A = f_1*f_2*...*f_n) for(i=0;i=0;l--){ MIN = max32s(0,l-db); MAX = min32s(l,da); z[0] = convol64s(&TEMP3[p3], &TEMP[p1+p2],MIN,MAX,l,p); TEMP2[(k+j)*((i+1)*dx+1)+l] = add64s(z[0],TEMP2[(k+j)*((i+1)*dx+1)+l],p); } } } //move current polynomial back to TEMP1 for(j=0;j<=(i+1)*dz;j++){ p2 = j*((i+1)*dx+1); for(k=0;k<=(i+1)*dx;k++){ t1 = TEMP2[p2+k]; TEMP3[p2+k] = t1; } } } //move to A for(i=0;i<=dzA;i++){ p1 = i*(dxA+1); for(j=0;j<=dxA;j++){ t1 = TEMP3[p1+j]; A[p1+j] = t1; } } for(i=0;i y for(i=0;i=p){z[1] -=p;} zfma(z,A[i],B[i]);i++;if(z[1]>=p){z[1] -=p;} } while(i<=d){ zfma(z,A[i],B[i]);i++;if(z[1]>=p){z[1] -=p;} } zmod(z,p); return z[0]; } LONG * extendSpace(LONG *A, LONG ** H, LONG * Space, LONG d1, LONG d2, LONG k){ LONG * B,a; int i; B = A; if (d1 + 1 > *Space){ B = *H; a=1; for(i=0;i1;i--){ mPos = mPos + i*d+1; } mCalc[0] = 1; dmCalc = 0; //Main loop u = U + (n-1)*(d+1); for (i=n;i>=2;i--){ //calculate mcalc*U_i dmCalc = polmul64s(mCalc,u,mCalc,dmCalc,d,p); //add new mcalc to M polcopy64s(mCalc,dmCalc,M+mPos); mPos = mPos - ((n-i+2)*d + 1); //update u pointer u -= d+1; } //make sure gcd(u_i,M_i) = 1 for all M u = g + (n-1)*d+1; Wptr = u + d+1; dmCalc = (n-1)*d; mPos = 0; for(i=0;i0) return -1; dmCalc -= d; mPos += (n-i-1)*d+1; } return 0; } void generateGcdexS(LONG *U, int n, int du, LONG *M, int dx, LONG *S, int *sDeg, LONG *W, LONG p){ //local variables int i,j,k,du2,dm,dg,ds,wTemp,dt,mPos; LONG *uTemp,*mTemp,*g,*wPtr; cleanArray64s(W,4*n*(du+1)); uTemp = W; mTemp = uTemp + du+1; g = mTemp + (n-1)*du+1; wPtr = g + n*du+1; mPos = 0; //Calculate the sigma term and subtract it from C for (i=1;i1){ i=0; while(i=k){ G[s+t+i][k] = G[s+2*i][k]; } } s += t; t = ceil2(t); } if(degrees[s] >= k){return G[s][k];} else{ return 0;} } void LagInterpSetup(LONG *A,LONG *xPoints,int dx, LONG *W, LONG p){ //local variables LONG t,a,b,*p1,*p2; int i,j,k,m,dm,point1,point2,size; ULONG z[2]; size = (dx+1)/2+1; cleanArray64s(W,2*dx+6); p1 = &W[dx+2]; p2 = &W[dx+4]; //calculate master polynomial W[1] = 1; p1[1] = 1; for(i=1;i<=dx;i++){ p1[0] = p-xPoints[i]; polmul64s(W,p1,W,i,1,p); } //calculate Lagrange polynomials for(i=0;i<=dx;i++){ //copy the master polynomial to spare space for(j=0;j<=dx+1;j++){p2[j] = W[j]; } p1[0] = p-xPoints[i]; //perform division dm = poldiv64s(p2,p1,dx+1,1,p); //move polynomial to A for(j=0;j<=dx;j++){ A[i*(dx+1)+j] = p2[j+1]; } } //calculate inverse and multiply every term by it p1 = A; for(i=0;i<=dx;i++){ //calculate denom a = 1; b = xPoints[i]; for (j=0;j<=dx;j++){ if (i!=j){ a = mul64s(a,sub64s(b,xPoints[j],p),p); } } a = modinv64s(a,p); //multiply every term by a for(j=0;j<=dx;j++){ if(p1[j]!=0){ p1[j] = mul64s(p1[j],a,p); } } p1 = p1 + dx+1; } //transpose matrix (efficiently) cleanArray64s(W,2*(size)*(size-1) + 1); if(dx%2 ==0){ //single entry, W[0] = A[0]; //even case p1 = &W[1]; //firstRow for(i=2,k=0;i<=dx+1;i+=2,k++){ p1[k*size] = A[i]; } //each column (even elements) for(i=2,k=1;i<=dx+1;i+=2,k++){ for(j=2,m=0;j<=dx+1;j+=2,m++){ p1[m*(size)+k] = A[(i-1)*(dx+1) + j]; } } //odd p1 = p1 + size*(size-1); //firstRow for(i=1,k=0;i<=dx+1;i+=2,k++){ p1[k*size] = A[i]; } //each column (even elements) for(i=2,k=1;i<=dx+1;i+=2,k++){ for(j=1,m=0;j<=dx+1;j+=2,m++){ p1[m*(size)+k] = A[(i-1)*(dx+1) + j]; } } //move elements back to A for(i=0;i<2*size*(size-1)+1;i++){ A[i] = W[i]; } } else{ //transpose matrix A for(i=0;i<=dx;i++){ for(j=i+1;j<=dx;j++){ t = A[i*(dx+1)+j]; A[i*(dx+1)+j] = A[j*(dx+1)+i]; A[j*(dx+1)+i] = t; } } } return; } //perform Lagrangian interpolation. The Lagrangian polynomials are stored in LagPolys //and the interpolation points are stored in yPoints. //The resulting polynomial is stored in Delta //The size of W is dx+1 void LagInterpEval(LONG *LagPolys,LONG *yPoints,LONG *Delta, int dx, LONG *W, LONG p, LONG *nummuls){ //Lagpolys - holds the Lagrangian polynomials used for lagrange interpolation - matrix has size (dx+1)^2 //yPoints - array of length dx+1 - contains the y points for Lagrange interpolation (the x points are implied to be 0,1,-1,2,-2,... //Delta - the interpolated polynomial of degree at most dx //dx - degree of the main polynomial A in x //W - additional space needed - size = dx+1 //p - prime //local variables int i,j,k,size,point; LONG *p1,*V; ULONG z[2]; if(dx%2==0){ size = (dx+1)/2 + 1; V = W; cleanArray64s(V,size); V[0] = yPoints[0]; //solve first solution (alpha = 0) Delta[0] = mul64s(LagPolys[0],yPoints[0],p); *nummuls += 1; //******************************************************************* //solve Lagrange interpolation (even case) //******************************************************************* //build vector for(i=1,j=1;i=k;j--){ poladdFast(pA,pA2,pA,dx,p); pA2 -= (dx+1); pA -= (dx+1); } } } //if there are multiple blocks of code else{ //For each main block of data, compress Data, add it, then return it for(i=1;i=k;j--){ poladdFast(pW,pW2,pW,blockSize-1,p); pW2 = pW; pW -= blockSize; } } T2 = clock(); s2 = s2+ T2-T1; T1 = clock(); pW = W; pA = &A[(i-1)*blockSize]; for(j=0;j<=dz;j++){ for(k=0;k0){ T1 = clock(); pW = W; pA = &A[(blocks-1)*blockSize]; for(j=0;j<=dz;j++){ for(k=0;k=k;j--){ poladdFast(pW,pW2,pW,b-1,p); pW2 = pW; pW -= b; } } T2 = clock(); s2 = s2 + T2-T1; T1 = clock(); pW = W; pA = &A[(blocks-1)*blockSize]; for(j=0;j<=dz;j++){ for(k=0;k=p ) z1[1] -= p; zfma(z2,p1[k+1],p2[m]); if( z2[1]>=p ) z2[1] -= p; } } zmod(z1,p); e = z1[0]; zmod(z2,p); o = mul64s(z2[0],i,p); G[(2*i-1)*(numPolys)+j][iter] = add64s(e,o,p); G[(2*i)*(numPolys)+j][iter] = sub64s(e,o,p); } p1 = p1 + dxdz; } p2 = p2 + du+1; i++; } //odd case (if dx%2==1) if(dx%2==1){ p1 = F + iter*(dx+1); for(j=0;j=p ) z1[1] -= p; zfma(z2,p1[k+1],p2[m]); if( z2[1]>=p ) z2[1] -= p; } } zmod(z1,p); e = z1[0]; zmod(z2,p); o = mul64s(z2[0],i,p); G[dx*(numPolys)+j][iter] = add64s(e,o,p); } p1 = p1 + dxdz; } } return; } //This preforms hensel lifting on n factors to factor polynomial A int HenselLiftCubic(LONG *A, int dx, int dz, LONG *F0, int n, int du, LONG *F, LONG alpha, LONG *TempSpace, LONG p ){ /* A[x][z]- polynomial you wish to factor dx - degree bound on variable x of A dz - degree bound on variable z of A F0 - set of linear factors n - number of initial factors du - degree bound of initial factors (could be extended to an array later) F - array that will store the final polynomials. must be of size n*(dx+1)*(dz+1) W - working array for calculations. Must be at least size (tbd) alpha - LONG integer p - prime */ //local variables int i,j,k,l,m,maxSizeA,ndz,flag, *maxDeg, *sDeg, *curXDeg, sumDeg, InterpFlag, numPolys; LONG s,t,t2,z,*M,*E,*p1,*p2,*p3,*p4,*p5,*TEMP1,*Coeffs, *LagInterpPolys; LONG *DioSVal, numDioMuls,numCoeffExtractMul,numEvalMuls,numInterpMuls; LONG *H, *Space, *degrees, *ck,*evalPoints,*interpPoints,*Delta,*EvalPointsMul, *W, wTemp; LONG temp,temp2,temp3; LONG ** P1; recint P; clock_t T1,T2,T3,T4,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15; //set time to 0 s1=0;s2=0;s3=0;s4=0;s5=0;s6=0;s7=0;s8=0;s9=0;s10=0;s11=0;s12=0;s13=0;s14=0;s15=0; //initial size calculations maxSizeA = (dx+1)*(dz+1); ndz = n*(dz+1); numEvalMuls = 0; numDioMuls = 0; numCoeffExtractMul = 0; numInterpMuls = 0; InterpFlag = 0; P = recip1(p); //find the number of polynomial multiples we create numPolys = 1; t = n; while (t>1){ numPolys += t; t = ceil2(t); } //declare arrays W = TempSpace; maxDeg = arrayI(n); //this is in y sDeg = arrayI(n); //Degree of each s variable (for Diophantine Equation) curXDeg = arrayI(n); //Degree of x for each factor (efficiency) M = W; W += (n-1)*n*du/2 + n - 1; //M polynomials (diophantine) E = W; W += maxSizeA; //The error at each iteration TEMP1 = W; W += n*(du+1); //Temporary storage evalPoints = W; W += dx+1; interpPoints = W; W += dx+1; LagInterpPolys = W; W += (dx+1)*(dx+1); Delta = W; W += dx+1; EvalPointsMul = W; W += (du+1)*((dx/2)+2); DioSVal = W; W += (n-1)*(dx+1); Space = W; W += (dx+1)*(numPolys); // < (dx+1)*3*n degrees = W; W += (dx+1)*(numPolys); // < (dx+1)*3*n H = W; W += 4*(dx+1)*(log2N(n)+1)*(dz+n); LONG *G[(dx+1)*numPolys]; cleanArray64s(H,4*(dx+1)*(log2N(n)+1)); //move contents of F0 to F //Calculate exact degree bound of factors p1 = F; p2 = F0; //Initialize matrix G, Space, and degrees for(i=0;i<(dx+1)*numPolys;i++){ G[i] = H; Space[i] = 1; degrees[i] = 0; H += 1; } for(i=0;i1){ j=0; while(j=1;j--){ poly1 = &F[(j)*(dz+1)*(dx+1)]; poly2 = &CoeffCalc[(j)*(dx+1)*(dz+1)]; coeffDegs[(j)*(dz+1)] = facDegs[(j)*(dz+1)] + coeffDegs[(j+1)*(dz+1)]; fastPolyMult(poly1,poly2,&CoeffCalc[(j-1)*(dx+1)*(dz+1)],facDegs[(j)*(dz+1)],coeffDegs[(j+1)*(dz+1)],p); } //main FOR loop T3 = clock(); for(k=1;k<=dz;k++){ //get the error for this loop point1 = k*(dx+1); ck = &(E[point1]); //calculate deltacoeffDegs[(j+1)*(dz+1)+(i-q)] T1 = clock(); for(i=k;i<=k;i++){ //set degree totals accDeg = maxDeg[n-1]; for (j=n-2; j>=1; j--){ curDeg = accDeg; accDeg = accDeg + maxDeg[j]; //make sure i need to do current calculations if(i <= accDeg){ //first 2 polynomials multiplied if (j==n-2) { MIN = max32s(0,i-maxDeg[j]); MAX = min32s(i,maxDeg[j]); point1 = (n-1)*(dx+1)*(dz+1); point2 = (n-2)*(dx+1)*(dz+1); //refers to the degree of (y-alpha) for each factor for (q=MIN;q<=MAX;q++){ //multiply the 2 polynomials together (classical) point3 = q*(dx+1); point4 = (i-q)*(dx+1); poly2 = &F[point1+point3]; poly1 = &F[point2+point4]; coeffDegs[(n-2)*(dz+1)+i] = max32s(coeffDegs[(n-2)*(dz+1)+i],facDegs[(n-1)*(dz+1)+(i-q)]+facDegs[(n-2)*(dz+1)+q]); //multiply quickly fastPolyMult(poly2,poly1,&CoeffCalc[(j-1)*(dx+1)*(dz+1) + i*(dx+1)],facDegs[(n-1)*(dz+1)+q],facDegs[(n-2)*(dz+1)+(i-q)],p); } } //continued polynomial multiplication up to degree d else{ MIN = max32s(0,i-curDeg); MAX = min32s(i,maxDeg[j]); point1 = (j)*(dx+1)*(dz+1); //this draws from F point2 = (j-1)*(dx+1)*(dz+1); //this draws from CoeffCalc //refers to the degree of (y-alpha) for each factor for (q=MIN;q<=MAX;q++){ //multiply the 2 polynomials together (classical) point3 = q*(dx+1); point4 = (i-q)*(dx+1); poly2 = &F[point1+point3]; poly1 = &CoeffCalc[point1 + point4]; coeffDegs[(j)*(dz+1)+i] = max32s(coeffDegs[(j)*(dz+1)+i],coeffDegs[(j+1)*(dz+1)+(i-q)]+facDegs[(j)*(dz+1)+q]); //multiply quickly fastPolyMult(poly2,poly1,&CoeffCalc[(j-1)*(dx+1)*(dz+1) + i*(dx+1)],facDegs[(j)*(dz+1)+q],coeffDegs[(j+1)*(dz+1)+(i-q)],p); } } } } } //zero Delta array cleanArray64s(Delta,dx+1); curDeg = accDeg; accDeg = accDeg + maxDeg[0]; MIN = max32s(0,k-accDeg); MAX = min32s(k,curDeg); //Calculate final coefficients (Delta) for(m=MIN;m<=MAX;m++){ poly1 = &F[m*(dx+1)]; poly2 = &CoeffCalc[(k-m)*(dx+1)]; fastPolyMult(poly1,poly2,Delta,facDegs[m],coeffDegs[(dz+1) + (k-m)],p); } T2 = clock(); s1 = s1 + T2 - T1; //subtract delta from e T1 = clock(); temp = polsub64s(ck, Delta, ck, dx, dx, p); T2 = clock(); s5 = s5 + T2 - T1; //printf("ck:=");polprint64s(ck,dx);printf("\n"); cleanArray64s(TEMP1,maxSizeA); //check to make sure ck isn't zero flag = 0; for (i=0;i<=dx;i++){ if (ck[i] != 0){ flag = 1; break; } } if(flag == 1) { //Solve Diophantine Equation T1 = clock(); Dionvar(F0, n, du, ck, dx, M, DioSVal,sDeg, W, TEMP1, p, &numDioMuls); T2 = clock(); s4 = s4 + T2 - T1; //printf("Del:=");polprint64s(TEMP1,maxSizeA);printf("\n"); //Update F point2 = k*(dx+1); for(i=0;i=1;i--){ poly1 = &F[(i)*(dx+1)*(dz+1)]; poly2 = &F[(i)*(dx+1)*(dz+1) + k*(dx+1)]; poly3 = &CoeffCalc[(i)*(dx+1)*(dz+1)]; poly4 = &CoeffCalc[(i-1)*(dx+1)*(dz+1) + k*(dx+1)]; cleanArray64s(TEMP2,dx+1); fastPolyMult(poly1,TEMP1,TEMP2,facDegs[(i)*(dz+1)],accDeg,p); fastPolyMult(poly2,poly3,TEMP2,facDegs[(i)*(dz+1)+k],coeffDegs[(i+1)*(dz+1)],p); accDeg = max32s(accDeg + facDegs[(i)*(dz+1)],facDegs[(i)*(dz+1)+k]+coeffDegs[(i+1)*(dz+1)]); coeffDegs[(i)*(dz+1)+k] = max32s(accDeg,coeffDegs[(i)*(dz+1)+k]); poly1 = &CoeffCalc[(i-1)*(dx+1)*(dz+1) + k*(dx+1)]; for(j=0;j<=accDeg;j++){ TEMP1[j] = TEMP2[j]; poly1[j] = add64s(poly1[j],TEMP2[j],p); } } } } T4 = clock(); //print statements /* printf("Polynomial Multiplication time=%8.2fms\n",(s1)/1000.0/1.0); printf("Diophantine time=%8.2fms\n",(s4)/1000.0/1.0); printf("Sub time=%8.2fms\n",(s5)/1000.0/1.0); printf("Total Loop time=%8.2fms\n",(T4-T3)/1000.0/1.0); */ return 0; }