/***********************************************************************/
/*                                                                     */
/*                                                                     */
/* Program:                                                            */
/* --------                                                            */
/*                                                                     */
/* ssehex3.c                                                           */
/*                                                                     */
/* This program is included in a package, consisting of 6 fast         */
/* programs, to calculate e=exp(1) hexadecimal. Programs to            */
/* convert from hexadecimal to decimal are also included.              */
/*                                                                     */
/*                                                                     */
/* Version:                                                            */
/* --------                                                            */
/*                                                                     */
/* First version: November 1998                                        */
/* Latest update: 2003-06-15                                           */
/*                                                                     */
/*                                                                     */
/* Author of the program:                                              */
/* ----------------------                                              */
/*                                                                     */
/* Stefan Spaennare, Lund, Sweden                                      */
/*                                                                     */
/* E-mail: stefan@spaennare.se                                         */
/*                                                                     */
/* Home page:    http://www.spaennare.se/index.html                    */
/* Program page: http://www.spaennare.se/ssprog.html                   */
/*                                                                     */
/* If you have problems with the program please let me know!           */
/*                                                                     */
/*                                                                     */
/* References:                                                         */
/* -----------                                                         */
/*                                                                     */
/* 1. The program now use a very fast FFT (fftsg_h.c) by Takuya Ooura: */  
/*                                                                     */
/* http://momonga.t.u-tokyo.ac.jp/~ooura/                              */
/*                                                                     */
/*                                                                     */
/* Copy-right:                                                         */
/* -----------                                                         */
/*                                                                     */
/* This program is free to copy if this information follows the        */
/* program.                                                            */
/*                                                                     */
/*                                                                     */
/* IMPORTANT!                                                          */
/* ----------                                                          */
/*                                                                     */
/* Please don't use the result from this program for important         */
/* purposes without doing an independent check of the result!          */
/* See also the notice below!                                          */
/*                                                                     */
/*                                                                     */
/* Function:                                                           */
/* ---------                                                           */
/*                                                                     */
/* How to calculate e=exp(1) hexadecimal and decimal:                  */
/*                                                                     */
/* First calculate the hexadecimal numbers a and b using one of the    */
/* ssehex.c, ssehex2.c or ssehex3.c. Here n=100000 is the number       */
/* of desired DECIMAL digits. Type the following:                      */
/*                                                                     */
/*>ssehex 100000                                                       */
/*                                                                     */
/* Then calculate e=exp(1) hexadecimal by e=a/b:                       */
/*                                                                     */
/*>hexediv ehex_a.dat ehex_b.dat e5.hex                                */
/*                                                                     */
/* Then convert e=exp(1) hexadecimal to decimal by:                    */
/*                                                                     */
/*>hex2dec e5.hex e5.dec                                               */
/*                                                                     */
/* or                                                                  */
/*                                                                     */
/*>fhex2dec e5.hex e5.dec                                              */
/*                                                                     */
/*                                                                     */
/* Compilation:                                                        */
/* ------------                                                        */
/*                                                                     */
/* On Linux computers just run "compall" to compile all six programs:  */
/*                                                                     */
/*>compall                                                             */
/*                                                                     */
/* The program works in UNIX, Linux and Windows (DOS using DJGPP)      */
/* It is recommended to optimize the program (i.e. use cc -O3 option   */
/* or similar option) when you compile.                                */
/*                                                                     */
/*>gcc -O3 -o program program.c -lm                                    */
/*                                                                     */
/* or (for those programs using FFT)                                   */
/*                                                                     */
/*>gcc -O3 -o program program.c fftsg_h.c -lm                                    */
/*                                                                     */
/* The time wraps around after 4295 seconds on some computers.         */
/*                                                                     */
/*                                                                     */
/* Hexadecimal numbers:                                                */
/* --------------------                                                */
/*                                                                     */
/* Hexadecimal numbers that can be converted to decimal with the       */
/* programs hex2dec.c and fhex2dec.c must be on the following          */
/* form (0 <= X <= 9):                                                 */
/*                                                                     */
/* X.A1B678F3EC95D6C4...                                               */
/*                                                                     */
/*                                                                     */
/* Algorithm and accuracy:                                             */
/* -----------------------                                             */
/*                                                                     */
/* The programs ssehex.c, ssehex2.c and ssehex3.c calculates           */
/* SUM 1/k! using the algorithm (C-like notation):                     */
/*                                                                     */
/* b=1;                                                                */
/* a=0;                                                                */
/*                                                                     */
/* for (i=n; i>=1; i--) {                                              */
/*    b=b*i;                                                           */
/*    a=a+b;                                                           */
/* }                                                                   */
/*                                                                     */
/* e=a/b;                                                              */
/*                                                                     */
/* Multiplications and divisions with the base (2^32) are made using   */
/* the shift operators (<< and >>) and the AND operator (&&) to be     */
/* fast. This is also the reason why the calculations are performed    */
/* with a hexadecimal (binary) base.                                   */
/*                                                                     */
/* The final computation of e=a/b is made with the separate program    */
/* hexediv.c using Newtons Iteration and FFT-multiplication            */
/* for division.                                                       */
/*                                                                     */
/* The programs use long long integer (64 bit) calculations and gives  */
/* probably correct result up to 2*10^9 digits of e=exp(1) (slightly   */
/* less than maxint = 2^31 digits). But it will take a VERY long       */
/* time! The result has however only been verified up to 10^7 digits.  */
/*                                                                     */
/* Note! If the final decimal result is expected to have n digits      */
/* The actual calculation is performed using slightly more digits.     */
/* All calculated digits are output from these programs, but only      */
/* the n desired decimal digits can be expected to be correct.         */
/*                                                                     */
/* WARNING!                                                            */
/* --------                                                            */
/*                                                                     */
/* This is valid for hexediv.c and fhex2dec.c:                         */
/*                                                                     */
/* If you want to make calculations with much more than 4 million      */
/* (2^22) digits the convolutions in the FFT multiplications can be    */
/* saturated which will give erroneous results or no convergence       */
/* at all. If this occur then set mulversion=2 which                   */
/* is 2 times slower than mulversion=1 but use the base 16             */
/* instead of 256. This means that the convolution will be             */
/* saturated for a much larger number of digits (> 100 millions).      */
/*                                                                     */
/*                                                                     */
/* Timing, benchmark and memory:                                       */
/* -----------------------------                                       */
/*                                                                     */
/* The CPU-times are given for a 1400 MHz Intel Celeron computer       */
/* (100 MHz memory buss) and 512 Mbyte memory. The cache memory is     */
/* 256 kbyte at full CPU speed. The operating system was Red Hat       */
/* Linux 9 with the gcc 3.22 C-compiler. The CPU load was on           */
/* average 1.00 (no other programs running).                           */
/*                                                                     */
/* The programs were compiled using the following lines:               */
/*                                                                     */
/* For ssehex.c, ssehex2.c, ssehex3.c and hex2dec.c:                   */
/*                                                                     */
/*>gcc -O3 -o program program.c -lm                                    */
/*                                                                     */
/* For hexediv.c and fhex2dec.c:                                       */
/*                                                                     */
/*>gcc -O3 -o program program.c fftsg_h.c -lm                          */
/*                                                                     */
/* How to compile see also the file "compall". TC below means          */
/* Time Complexity. The CPU-times are given for n=10^5=100000 and      */
/* and n=10^6=1000000 decimal digits.                                  */
/*                                                                     */
/*                                                                     */
/* Program       10^5       10^6         TC          Memory/(10^6)     */
/*---------------------------------------------------------------------*/
/* ssehex        3.46 s    791.00 s     O(n^2)         1.2 Mbyte       */ 
/* ssehex2       2.86 s    607.09 s     O(n^2)         1.2 Mbyte       */ 
/* ssehex3       3.78 s    707.71 s     O(n^2)         1.2 Mbyte       */ 
/* hexediv       5.32 s     62.24 s  O(n*log(n)^2)    21.0 Mbyte       */
/* hex2dec       1.35 s    266.02 s     O(n^2)         0.9 Mbyte       */
/* fhex2dec     12.48 s    157.51 s  O(n*log(n)^3)    36.0 Mbyte       */
/*---------------------------------------------------------------------*/
/*                                                                     */
/***********************************************************************/

/********************************************************************************/
/*                                                                              */
/* Notice                                                                       */
/* ======                                                                       */
/*                                                                              */
/* I make no warranties that this program is (1) free of errors, (2) consistent */
/* with any standard merchantability, or (3) meeting the requirements of a      */
/* particular application. This software shall not, partly or as a whole,       */
/* participate in a process, whose outcome can result in injury to a person or  */
/* loss of property. It is solely designed for analytical work. Permission to   */
/* use, copy and distribute is hereby granted without fee, providing that the   */
/* header above including this notice appears in all copies.                    */
/*                                                                              */
/*                                                            Stefan Spaennare  */
/*                                                                              */
/********************************************************************************/


#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <time.h>

double timetic=(double)(CLOCKS_PER_SEC);

char fname1[80]="ehex3_a.dat";
char fname2[80]="ehex3_b.dat";


char ppp1(s)
unsigned long s;
{
   char c;

   if ((s>=0) && (s<=9)) {
      c=s+48; 
   } 
   else {
     if (s==10) {
        c='A';
     } 
     else if (s==11) {
        c='B';
     }
     else if (s==12) {
        c='C';
     } 
     else if (s==13) {
        c='D';
     }
     else if (s==14) {
        c='E';
     }
     else if (s==15) {
        c='F';
     } /* if */
  } /* if */

  return(c);

} /* ppp1 */

void ppp2(sss,s)
char *sss;
unsigned long s;
{
   unsigned long temp;

   temp=s & 0xf0000000;
   temp=temp >> 28;
   sss[0]=ppp1(temp);

   temp=s & 0x0f000000;
   temp=temp >> 24;
   sss[1]=ppp1(temp);

   temp=s & 0x00f00000;
   temp=temp >> 20;
   sss[2]=ppp1(temp);

   temp=s & 0x000f0000;
   temp=temp >> 16;
   sss[3]=ppp1(temp);

   temp=s & 0x0000f000;
   temp=temp >> 12;
   sss[4]=ppp1(temp);

   temp=s & 0x00000f00;
   temp=temp >> 8;
   sss[5]=ppp1(temp);

   temp=s & 0x000000f0;
   temp=temp >> 4;
   sss[6]=ppp1(temp);

   temp=s & 0x0000000f;
   temp=temp >> 0;
   sss[7]=ppp1(temp);

   sss[8]='\0';

} /* ppp2 */


int main(argc,argv) 
int argc;
char *argv[];
{
   
   FILE *outfile1,*outfile2;

   int i,j,k,n,l1,l2,fac,ll,nn; 
   unsigned long biti;

   double lten,l2d,sum,jd,lld,invc,ltwo;
   double lteninv,ltwoinv;

   unsigned long long basii,jii,remii; 
   unsigned long long rem2ii,vv1; 

   clock_t t1,td1,t2,td2,td3;

   unsigned long *v1,*v2; 

   char sss[10];
   char sss2[10];

   if (argc < 2) {
      printf("Usage: %s ni\n",argv[0]);
      exit(0);
   } /* if */

   n=atoi(argv[1]);

   ltwo=log(2.0);
   lten=log(10.0);
   ltwoinv=1.0/ltwo;
   lteninv=1.0/lten;

   basii=4294967296LL;
   biti=32;


   l1=(int)((double)(n)*lten/(ltwo*(double)(biti)));
   l2=(int)(1.005*(double)(l1)+0.5)+5;
   nn=8*l2;

   v1=(unsigned long *)calloc(l2+1,sizeof(unsigned long));
   v2=(unsigned long *)calloc(l2+1,sizeof(unsigned long));

   l2d=(double)(l2)*ltwo*(double)(biti);

   i=1;
   sum=0.0;

   while (sum < l2d) {
      sum=sum+log((double)(i));
      i++;
   } /* while */

   fac=i-5;

   printf("\n");
   printf("e = exp(1)\n");
   printf("==========\n");
   printf("\n");
   printf("Hexadecimal (64 bit) method, numerically stable.\n");
   printf("\n");
   printf("Factorial=%8d; Length=%8d; Decimals=%8d; Bit=%4d\n",fac,l2,n,biti);
   printf("\n");
   printf("Calculating ...\n");


   for (i=1; i<=l2; i++) {
      v1[i]=0;
      v2[i]=0;
   } /* for i */

   v1[1]=1;

   invc=1.0/(ltwo*(double)(biti));

   lld=0.0;
   td2=0;
   td3=0;

   t1=clock();

   for (j=fac; j>=1; j--) {

      jd=(double)(j);
      lld=lld+log(jd);
      ll=(int)(invc*lld+6.5);

      if (ll > l2) {
         ll=l2;
      } /* if */

      t2=clock();

      jii=(unsigned long long)(j);                    
      remii=0;                                
      rem2ii=0;

      for (i=1; i<=ll; i++) {
         remii=(unsigned long long)(v1[i])*jii+remii; 
	 vv1=remii & 0x00000000ffffffff;                 
	 remii=remii >> 32;
	 v1[i]=(unsigned long)(vv1);
         rem2ii=vv1+(unsigned long long)(v2[i])+rem2ii;
         v2[i]=(unsigned long)(rem2ii & 0x00000000ffffffff);                 
	 rem2ii=rem2ii >> 32;
      } /* for i */

      td2=td2+(clock()-t2);

   } /* for j */

   td1=clock()-t1;

   td3=0;

   printf("\n");
   printf("CPU time=%10.2f s %10.2f s %10.2f s\n",
   (double)(td1)/timetic,(double)(td2)/timetic,(double)(td3)/timetic);
   printf("\n");
   printf("Calculation finished, output to %s %s.\n\n",fname1,fname2);


   outfile1=fopen(fname1,"w");

   j=l2;
   while (v2[j] == 0) {
      j--;
   } /* while */

   nn=8*j;

   fprintf(outfile1,"0.");

   ppp2(sss,v2[j]);

   k=0;
   while (sss[k] == '0') {
      k++;
   } /* while */

   for (i=k; i<=8; i++) {
      sss2[i-k]=sss[i];
   } /* for i */
   sss2[i-k]='\0';

   fprintf(outfile1,"%s",sss2);

   for (i=j-1; i>=1; i--) {
      ppp2(sss,v2[i]);
      fprintf(outfile1,"%s",sss);
   } /* for i */

   for (i=8-k+1; i<=8; i++) {
      fprintf(outfile1,"0");
   } /* for i */

   fprintf(outfile1,"\n");


   outfile2=fopen(fname2,"w");

   fprintf(outfile2,"0.");

   ppp2(sss,v1[j]);

   for (i=k; i<=8; i++) {
      sss2[i-k]=sss[i];
   } /* for i */
   sss2[i-k]='\0';

   fprintf(outfile2,"%s",sss2);

   for (i=j-1; i>=1; i--) {
      ppp2(sss,v1[i]);
      fprintf(outfile2,"%s",sss);
   } /* for i */

   for (i=8-k+1; i<=8; i++) {
      fprintf(outfile2,"0");
   } /* for i */

   fprintf(outfile2,"\n");
   
   fclose(outfile1);
   fclose(outfile2);
   

} /* End */