Why is Compaq Visual Fortran (CVF) running under WinXP so much faster than gfortran or Intel ifort on Win10Pro 64?

117 Views Asked by At

Computer: intel i7-7820 OS: Windows 10 pro 64bit and WinXP pro running in Oracle VM Virtual box Fortran Compilers: Compaq Visual Fortran(CVF) Pro. 6.6.C Intel Fortran ifort version 2024.0.2.27 Gfortran version: 7.3.0

I’ve compiled the exact same source code on three compilers with full optimization on each as 32bit executable. The code performs 50 timing trials of user specified number of complex fast Fourier transforms (FFT) and then averages the number of FFT’s per second. The CVF generated code was generated in WinXP Pro running within a VM. The resultant CVF exe was executed in the XP OS (within the VM) and on the host machine. The gfortran and Intel generated code was compiled in the Win10Pro 64 OS as 32 bit code. I’m seeing over an order of magnitude difference in speed between the CVF generated code and that of the Intel and Gfortran generated code. Am I doing something stupid (I do that on a regular basis) or is this normal?? How can I get CVF speeds with a newer FORTRAN compiler???

Speeds and executable file size:

CVF on XP within VM 37774 FFT/sec 312KB,
CVF on Win10 Pro 64bit 39451FFT/sec 312KB,
Intel ifort on Win10 Pro 64bit 2588 FFT/sec 29KB,
Gfortran on Win10 Pro 64bit 2421 FFT/sec 419 KB,

Source code:

c do serial calculation
c
c
c create some fake data
      program main
c
c
      character filename*100
      integer dtstart(8),dtend(8)
      common /ddata/y(16384),n,k,numfft
c
c
c
c      numfft=100000       !number of ffts per threa
      ntrials=50        !number of trials or random timings
c
c find scaling of 0-1.0 random number to 1 to nthmax
c
      write (*,4)
    4 format ('Enter number of ffts per trial')
      read (*,*) numfft
      write (*,8)
    8 format ('Enter output file name (e.g. fftout.txt) ')
      read (*,*) filename
c
c
      open (unit=10,file=filename,
     c      form='formatted',status='unknown')
      write (10,67) numfft,ntrials
   67 format (' GNU Serial Processing  '/i6,4x,'Number of FFTs'/
     ci6,4x,'Number of trials')
c
c  generate fake data
      pi=3.141592653589793
      n=14
      k=2**14
      do 1 j=1,k
    1 y(j)=(1.0*sin(2.0*pi*j/float(k))) + (3.0*sin(20.0*pi*j/float(k)))
c
c loop through the number of trials NTRIALS
c write header
      write (10,6)
    6 format ('total CPU time, clock time,  FFT rate')
c
      sumratcpu=0.0
      sumrattime=0.0
      do 3 itrial=1,ntrials
      write (*,5) itrial
    5 format (//'ITRIAL: ',i5)
c
c      call gettim(ihr0,imin0,isec0,i100th0)
      call cpu_time(timestart)
      call date_and_time(VALUES=dtstart)
c
      ndir=1
      do 88 j=1,numfft
      call fftx(n,k,ndir)
   88 continue
c
c
c      call gettim(ihr1,imin1,isec1,i100th1)
      call cpu_time(timeend)
      call date_and_time(VALUES=dtend)
      deltim=timeend-timestart
      ratcpu=numfft/deltim
c
      tt=(dtend(5)-dtstart(5))*3600 +
     c  (dtend(6)-dtstart(6))*60 +
     c  (dtend(7)-dtstart(7)) +
     c  (dtend(8)-dtstart(7))*0.001
      rattime=numfft/tt
c
      write (10,7) deltim,ratcpu,tt,rattime
      write (*,7) deltim,ratcpu,tt,rattime
    7 format (/'cpu time = ',6x,f10.3,'  fft/sec = ',f10.3/
     c'date_time time = ',f10.3,'  fft/sec = ',f10.3)
c
      sumratcpu=sumratcpu+ratcpu
      sumrattime=sumrattime+rattime
c
    3 continue
      avecpu=sumratcpu/ntrials
      avetime=sumrattime/ntrials
      write (10,2)  avecpu,avetime
      write (*,2)  avecpu,avetime
    2 format ('average rate (cpu)= ',f10.3/
     c'average rate (date_time) = ',f10.3)
      close (unit=10)
c
      stop
      end
      SUBROUTINE FFTX(N,NB,NDIR)
C this rountine was checked on 4/3/94
C NDIR=1  FORWARD TRANSFORM
C NDIR=-1 INVERSE TRANSFORM
C IF NDIR=1 forward  transform
C    INPUT:   A(1)-->A(NB/2+1) ARE signal AMPLITUDEs for positive times
c             if dx=delta x  (i.e. dx=x(n+1)-x(n) then
c             x(1),a(1) corresponds to t=0=0*dx
c             x(2),a(2) corresponds to t=1*dx
c             x(nb/2+1), a(nb/2+1) corresponds to t=(nb/2)*dx
c             x(nb),a(nb) corresponds to t=-1*dx
c             x(nb-1),a(nb-1) corresponds to t=-2*dx
c             x(nb/2+2),a(nb/2+2) corresponds to t=-(nb/2+1)*dx
C    OUTPUT:  A(1)-->A(NB/2+1) ARE +FREQUENCY AMPLITUDE
C             A(NB/2 +2)-->A(NB) -FREQUENCY AMPLITUDE
C             X=FREQUENCies orders as described above
C IF NDIR=-1  forward transform
C   INPUT:   A(1)-->A(NB/2+1) ARE +FREQUENCY AMPLITUDES
C            A(NB/2 + 2)<--A(NB) -FREQUENCY AMPLITUDES
C            X=FREQUENCIES (as described above)
C   OUTPUT:  A(1)-A(NB) TIME AMPLITUDE
C            X(1)-X(NB) TIME (as described above)
c
C
C NOTE:
C  FMAX=1/(DELTA-X)*2=NB/(2*XMAX)
C  DELTAF = 1/XMAX
C  XMAX=NB/2*FMAX
C
      IMPLICIT REAL*4 (A-H,O-Z)
      COMPLEX A(16384),U,W,T
      common /ddata/y(16384),n11,n22,n33
c
c
      do 8 kx=1,nb
    8 a(kx)=cmplx(y(kx),0.0)
c
C REORDER SEQUENCE
      NBD2=NB/2
      NBM1=NB-1
      J=1
      DO 4 L=1,NBM1
      IF (L.GE.J) GO TO 2
      T=A(J)
      A(J)=A(L)
      A(L)=T
    2 K=NBD2
    3 IF (K.GE.J) GO TO 4
      J=J-K
      K=K/2
      GO TO 3
    4 J=J+K
C CALCULATE FFT
      PI=3.141592653589793
      DO 6 M=1,N
      U=CMPLX(1.00,0.00)
      ME=2**M
      K=ME/2
      W=CMPLX(COS(PI/K),-NDIR*SIN(PI/K))
      DO 6 J=1,K
      DO 5 L=J,NB,ME
      LPK=L+K
      T=A(LPK)*U
      A(LPK)=A(L)-T
    5 A(L)=A(L)+T
    6 U=U*W
      RETURN
      END

I was expecting newer compilers to generate code comparable in speed to the old CVF compiler.

0

There are 0 best solutions below