/***********************************************************************/ /* */ /* */ /* Program: */ /* -------- */ /* */ /* ssehex3.c */ /* */ /* This program is included in a package, consisting of 6 fast */ /* programs, to calculate e=exp(1) hexadecimal. Programs to */ /* convert from hexadecimal to decimal are also included. */ /* */ /* */ /* Version: */ /* -------- */ /* */ /* First version: November 1998 */ /* Latest update: 2003-06-15 */ /* */ /* */ /* Author of the program: */ /* ---------------------- */ /* */ /* Stefan Spaennare, Lund, Sweden */ /* */ /* E-mail: stefan@spaennare.se */ /* */ /* Home page: http://www.spaennare.se/index.html */ /* Program page: http://www.spaennare.se/ssprog.html */ /* */ /* If you have problems with the program please let me know! */ /* */ /* */ /* References: */ /* ----------- */ /* */ /* 1. The program now use a very fast FFT (fftsg_h.c) by Takuya Ooura: */ /* */ /* http://momonga.t.u-tokyo.ac.jp/~ooura/ */ /* */ /* */ /* Copy-right: */ /* ----------- */ /* */ /* This program is free to copy if this information follows the */ /* program. */ /* */ /* */ /* IMPORTANT! */ /* ---------- */ /* */ /* Please don't use the result from this program for important */ /* purposes without doing an independent check of the result! */ /* See also the notice below! */ /* */ /* */ /* Function: */ /* --------- */ /* */ /* How to calculate e=exp(1) hexadecimal and decimal: */ /* */ /* First calculate the hexadecimal numbers a and b using one of the */ /* ssehex.c, ssehex2.c or ssehex3.c. Here n=100000 is the number */ /* of desired DECIMAL digits. Type the following: */ /* */ /*>ssehex 100000 */ /* */ /* Then calculate e=exp(1) hexadecimal by e=a/b: */ /* */ /*>hexediv ehex_a.dat ehex_b.dat e5.hex */ /* */ /* Then convert e=exp(1) hexadecimal to decimal by: */ /* */ /*>hex2dec e5.hex e5.dec */ /* */ /* or */ /* */ /*>fhex2dec e5.hex e5.dec */ /* */ /* */ /* Compilation: */ /* ------------ */ /* */ /* On Linux computers just run "compall" to compile all six programs: */ /* */ /*>compall */ /* */ /* The program works in UNIX, Linux and Windows (DOS using DJGPP) */ /* It is recommended to optimize the program (i.e. use cc -O3 option */ /* or similar option) when you compile. */ /* */ /*>gcc -O3 -o program program.c -lm */ /* */ /* or (for those programs using FFT) */ /* */ /*>gcc -O3 -o program program.c fftsg_h.c -lm */ /* */ /* The time wraps around after 4295 seconds on some computers. */ /* */ /* */ /* Hexadecimal numbers: */ /* -------------------- */ /* */ /* Hexadecimal numbers that can be converted to decimal with the */ /* programs hex2dec.c and fhex2dec.c must be on the following */ /* form (0 <= X <= 9): */ /* */ /* X.A1B678F3EC95D6C4... */ /* */ /* */ /* Algorithm and accuracy: */ /* ----------------------- */ /* */ /* The programs ssehex.c, ssehex2.c and ssehex3.c calculates */ /* SUM 1/k! using the algorithm (C-like notation): */ /* */ /* b=1; */ /* a=0; */ /* */ /* for (i=n; i>=1; i--) { */ /* b=b*i; */ /* a=a+b; */ /* } */ /* */ /* e=a/b; */ /* */ /* Multiplications and divisions with the base (2^32) are made using */ /* the shift operators (<< and >>) and the AND operator (&&) to be */ /* fast. This is also the reason why the calculations are performed */ /* with a hexadecimal (binary) base. */ /* */ /* The final computation of e=a/b is made with the separate program */ /* hexediv.c using Newtons Iteration and FFT-multiplication */ /* for division. */ /* */ /* The programs use long long integer (64 bit) calculations and gives */ /* probably correct result up to 2*10^9 digits of e=exp(1) (slightly */ /* less than maxint = 2^31 digits). But it will take a VERY long */ /* time! The result has however only been verified up to 10^7 digits. */ /* */ /* Note! If the final decimal result is expected to have n digits */ /* The actual calculation is performed using slightly more digits. */ /* All calculated digits are output from these programs, but only */ /* the n desired decimal digits can be expected to be correct. */ /* */ /* WARNING! */ /* -------- */ /* */ /* This is valid for hexediv.c and fhex2dec.c: */ /* */ /* If you want to make calculations with much more than 4 million */ /* (2^22) digits the convolutions in the FFT multiplications can be */ /* saturated which will give erroneous results or no convergence */ /* at all. If this occur then set mulversion=2 which */ /* is 2 times slower than mulversion=1 but use the base 16 */ /* instead of 256. This means that the convolution will be */ /* saturated for a much larger number of digits (> 100 millions). */ /* */ /* */ /* Timing, benchmark and memory: */ /* ----------------------------- */ /* */ /* The CPU-times are given for a 1400 MHz Intel Celeron computer */ /* (100 MHz memory buss) and 512 Mbyte memory. The cache memory is */ /* 256 kbyte at full CPU speed. The operating system was Red Hat */ /* Linux 9 with the gcc 3.22 C-compiler. The CPU load was on */ /* average 1.00 (no other programs running). */ /* */ /* The programs were compiled using the following lines: */ /* */ /* For ssehex.c, ssehex2.c, ssehex3.c and hex2dec.c: */ /* */ /*>gcc -O3 -o program program.c -lm */ /* */ /* For hexediv.c and fhex2dec.c: */ /* */ /*>gcc -O3 -o program program.c fftsg_h.c -lm */ /* */ /* How to compile see also the file "compall". TC below means */ /* Time Complexity. The CPU-times are given for n=10^5=100000 and */ /* and n=10^6=1000000 decimal digits. */ /* */ /* */ /* Program 10^5 10^6 TC Memory/(10^6) */ /*---------------------------------------------------------------------*/ /* ssehex 3.46 s 791.00 s O(n^2) 1.2 Mbyte */ /* ssehex2 2.86 s 607.09 s O(n^2) 1.2 Mbyte */ /* ssehex3 3.78 s 707.71 s O(n^2) 1.2 Mbyte */ /* hexediv 5.32 s 62.24 s O(n*log(n)^2) 21.0 Mbyte */ /* hex2dec 1.35 s 266.02 s O(n^2) 0.9 Mbyte */ /* fhex2dec 12.48 s 157.51 s O(n*log(n)^3) 36.0 Mbyte */ /*---------------------------------------------------------------------*/ /* */ /***********************************************************************/ /********************************************************************************/ /* */ /* Notice */ /* ====== */ /* */ /* I make no warranties that this program is (1) free of errors, (2) consistent */ /* with any standard merchantability, or (3) meeting the requirements of a */ /* particular application. This software shall not, partly or as a whole, */ /* participate in a process, whose outcome can result in injury to a person or */ /* loss of property. It is solely designed for analytical work. Permission to */ /* use, copy and distribute is hereby granted without fee, providing that the */ /* header above including this notice appears in all copies. */ /* */ /* Stefan Spaennare */ /* */ /********************************************************************************/ #include #include #include #include #include #include double timetic=(double)(CLOCKS_PER_SEC); char fname1[80]="ehex3_a.dat"; char fname2[80]="ehex3_b.dat"; char ppp1(s) unsigned long s; { char c; if ((s>=0) && (s<=9)) { c=s+48; } else { if (s==10) { c='A'; } else if (s==11) { c='B'; } else if (s==12) { c='C'; } else if (s==13) { c='D'; } else if (s==14) { c='E'; } else if (s==15) { c='F'; } /* if */ } /* if */ return(c); } /* ppp1 */ void ppp2(sss,s) char *sss; unsigned long s; { unsigned long temp; temp=s & 0xf0000000; temp=temp >> 28; sss[0]=ppp1(temp); temp=s & 0x0f000000; temp=temp >> 24; sss[1]=ppp1(temp); temp=s & 0x00f00000; temp=temp >> 20; sss[2]=ppp1(temp); temp=s & 0x000f0000; temp=temp >> 16; sss[3]=ppp1(temp); temp=s & 0x0000f000; temp=temp >> 12; sss[4]=ppp1(temp); temp=s & 0x00000f00; temp=temp >> 8; sss[5]=ppp1(temp); temp=s & 0x000000f0; temp=temp >> 4; sss[6]=ppp1(temp); temp=s & 0x0000000f; temp=temp >> 0; sss[7]=ppp1(temp); sss[8]='\0'; } /* ppp2 */ int main(argc,argv) int argc; char *argv[]; { FILE *outfile1,*outfile2; int i,j,k,n,l1,l2,fac,ll,nn; unsigned long biti; double lten,l2d,sum,jd,lld,invc,ltwo; double lteninv,ltwoinv; unsigned long long basii,jii,remii; unsigned long long rem2ii,vv1; clock_t t1,td1,t2,td2,td3; unsigned long *v1,*v2; char sss[10]; char sss2[10]; if (argc < 2) { printf("Usage: %s ni\n",argv[0]); exit(0); } /* if */ n=atoi(argv[1]); ltwo=log(2.0); lten=log(10.0); ltwoinv=1.0/ltwo; lteninv=1.0/lten; basii=4294967296LL; biti=32; l1=(int)((double)(n)*lten/(ltwo*(double)(biti))); l2=(int)(1.005*(double)(l1)+0.5)+5; nn=8*l2; v1=(unsigned long *)calloc(l2+1,sizeof(unsigned long)); v2=(unsigned long *)calloc(l2+1,sizeof(unsigned long)); l2d=(double)(l2)*ltwo*(double)(biti); i=1; sum=0.0; while (sum < l2d) { sum=sum+log((double)(i)); i++; } /* while */ fac=i-5; printf("\n"); printf("e = exp(1)\n"); printf("==========\n"); printf("\n"); printf("Hexadecimal (64 bit) method, numerically stable.\n"); printf("\n"); printf("Factorial=%8d; Length=%8d; Decimals=%8d; Bit=%4d\n",fac,l2,n,biti); printf("\n"); printf("Calculating ...\n"); for (i=1; i<=l2; i++) { v1[i]=0; v2[i]=0; } /* for i */ v1[1]=1; invc=1.0/(ltwo*(double)(biti)); lld=0.0; td2=0; td3=0; t1=clock(); for (j=fac; j>=1; j--) { jd=(double)(j); lld=lld+log(jd); ll=(int)(invc*lld+6.5); if (ll > l2) { ll=l2; } /* if */ t2=clock(); jii=(unsigned long long)(j); remii=0; rem2ii=0; for (i=1; i<=ll; i++) { remii=(unsigned long long)(v1[i])*jii+remii; vv1=remii & 0x00000000ffffffff; remii=remii >> 32; v1[i]=(unsigned long)(vv1); rem2ii=vv1+(unsigned long long)(v2[i])+rem2ii; v2[i]=(unsigned long)(rem2ii & 0x00000000ffffffff); rem2ii=rem2ii >> 32; } /* for i */ td2=td2+(clock()-t2); } /* for j */ td1=clock()-t1; td3=0; printf("\n"); printf("CPU time=%10.2f s %10.2f s %10.2f s\n", (double)(td1)/timetic,(double)(td2)/timetic,(double)(td3)/timetic); printf("\n"); printf("Calculation finished, output to %s %s.\n\n",fname1,fname2); outfile1=fopen(fname1,"w"); j=l2; while (v2[j] == 0) { j--; } /* while */ nn=8*j; fprintf(outfile1,"0."); ppp2(sss,v2[j]); k=0; while (sss[k] == '0') { k++; } /* while */ for (i=k; i<=8; i++) { sss2[i-k]=sss[i]; } /* for i */ sss2[i-k]='\0'; fprintf(outfile1,"%s",sss2); for (i=j-1; i>=1; i--) { ppp2(sss,v2[i]); fprintf(outfile1,"%s",sss); } /* for i */ for (i=8-k+1; i<=8; i++) { fprintf(outfile1,"0"); } /* for i */ fprintf(outfile1,"\n"); outfile2=fopen(fname2,"w"); fprintf(outfile2,"0."); ppp2(sss,v1[j]); for (i=k; i<=8; i++) { sss2[i-k]=sss[i]; } /* for i */ sss2[i-k]='\0'; fprintf(outfile2,"%s",sss2); for (i=j-1; i>=1; i--) { ppp2(sss,v1[i]); fprintf(outfile2,"%s",sss); } /* for i */ for (i=8-k+1; i<=8; i++) { fprintf(outfile2,"0"); } /* for i */ fprintf(outfile2,"\n"); fclose(outfile1); fclose(outfile2); } /* End */