|
|
Re: "-optc-O3" plus "-prof" gcc failure: msg#00186
lang.haskell.glasgow.bugs
|
Subject: |
Re: "-optc-O3" plus "-prof" gcc failure |
I fixed this in the HEAD a while back, the patch has now been merged
into the 6.4 branch for 6.4.2.
Cheers,
Simon
Johannes Ahlmann wrote:
1.
AMD64 Athlon 2800+, Debian testing/unstable 32bit, Linux 2.6.11.7 #1 x86_64
2.
gcc version 3.3.5 (Debian 1:3.3.5-8)
3.
ghc -O2 -optc-O3 -fglasgow-exts -optc-ffast-math -prof --make nbody.hs
Chasing modules from: nbody.hs
Compiling Main ( nbody.hs, nbody.o )
Prologue junk?: .type s5pB_ret, @function
s5pB_ret:
xorl %eax, %eax
xorl %ecx, %ecx
movl %eax, 16(%esp)
movl %ecx, 20(%esp)
4.
see attachment "verbose"
5.
flags "-optc-O3" and "-prof" caused gcc to fail. when "optc-O3" was
removed, compilation worked fine.
6.
see attachment "nbody.hs"
------------------------------------------------------------------------
--
-- The Great Computer Language Shootout
-- http://shootout.alioth.debian.org/
--
-- Contributed by Christoph Bauer
-- Rewritten in Haskell from C by Don Stewart
-- Rewritten again by Chris Kuklewicz
--
-- -O2 -optc-O3 -fglasgow-exts -fexcess-precision -optc-ffast-math
--
-- This code actually runs faster then the previous entry!
--
import Debug.Trace
import System
import System.IO.Unsafe
import Monad
import Data.Bits
import Data.List
import Data.Array.MArray
import Data.Array.IO
import Data.Array.Base(unsafeRead,unsafeWrite)
import Text.Printf
import Data.IORef
default(Int)
main = do
args <- getArgs
let n = if null args then 1000000 else read ( head args )
offsetMomentum
energy >>= printf "%.9f\n"
advance n
energy >>= printf "%.9f\n"
-- Offsets for each field
x = 0
y = 1
z = 2
vx= 3
vy= 4
vz= 5
m = 6
type Bodies = IOUArray Int Double
{-# NOINLINE b #-}
b :: Bodies = unsafePerformIO $ newListArray (0,pred (length bodiesData))
(bodiesData)
get = unsafeRead b
set = unsafeWrite b
resetB = mapM_ (uncurry set) (zip [0..] bodiesData)
-- sun jupiter saturn uranus neptune
-- sun starts at center at rest
bodiesData = concat [ mkB 1 0 0 0 0 0 0
,mkB 9.54791938424326609e-04
4.84143144246472090e+00 (-1.16032004402742839e+00) (-1.03622044471123109e-01)
( 1.66007664274403694e-03) ( 7.69901118419740425e-03)
(-6.90460016972063023e-05)
,mkB 2.85885980666130812e-04
8.34336671824457987e+00 4.12479856412430479e+00
(-4.03523417114321381e-01)
(-2.76742510726862411e-03) ( 4.99852801234917238e-03) (
2.30417297573763929e-05)
,mkB 4.36624404335156298e-05
1.28943695621391310e+01 (-1.51111514016986312e+01)
(-2.23307578892655734e-01)
( 2.96460137564761618e-03) ( 2.37847173959480950e-03)
(-2.96589568540237556e-05)
,mkB 5.15138902046611451e-05
1.53796971148509165e+01 (-2.59193146099879641e+01)
1.79258772950371181e-01
( 2.68067772490389322e-03) ( 1.62824170038242295e-03)
(-9.51592254519715870e-05)]
mkB m x y z vx vy vz = [x, y, z, vx*days_per_year,
vy*days_per_year, vz*days_per_year, m*solar_mass, 0]
solar_mass = 4 * pi * pi
days_per_year = 365.24
nbodies = 4 -- that is 0 to 4
sh i = shiftL i 3 -- multiply by 8
pos i = sh i
vel i = velOffset .|. sh i where velOffset = 3
mass i = get (massOffset .|. sh i) where massOffset = 6
-- Give the sun a small velocity so the total momentum of all bodies totals to
zero
offsetMomentum :: IO ()
offsetMomentum = do sm <- mass 0
let sv = vel 0
act i = mass i >>= \m -> addScaled sv (-m/sm) (vel i)
mapM_ act [1..nbodies]
-- Total all kineticE and potentialE
energy :: IO Double
energy = loop 0 0
where loop i e | i > nbodies = return e
| otherwise = do ke <- kineticE i
(loop' (i+1) i $! (e+ke)) >>= loop (i+1)
loop' j i e | j > nbodies = return e
| otherwise = do pe <- potentialE i j
loop' (j+1) i $! (e + pe)
kineticE i = let i' = (.|. sh i)
in do m <- mass i
vx <- get (i' vx)
vy <- get (i' vy)
vz <- get (i' vz)
return $! 0.5 * m * (vx*vx + vy*vy + vz*vz)
potentialE i j = do
m1 <- mass i
m2 <- mass j
let i' = (.|. (sh i))
j' = (.|. (sh j))
dx <- liftM2 (-) (get (i' x)) (get (j' x))
dy <- liftM2 (-) (get (i' y)) (get (j' y))
dz <- liftM2 (-) (get (i' z)) (get (j' z))
return $! ((-1)*m1*m2/sqrt (dx*dx + dy*dy + dz*dz))
dt = 0.01
addScaled i a j = let scale old new = old + a * new
i1=i; i2=succ i1; i3=succ i2;
j1=j; j2=succ j1; j3=succ j2;
in do set i1 =<< liftM2 scale (get i1) (get j1)
set i2 =<< liftM2 scale (get i2) (get j2)
set i3 =<< liftM2 scale (get i3) (get j3)
addScaled3 i a jx jy jz = do
let scale new old = a * new + old
i1=i; i2=succ i1; i3=succ i2;
set i1 =<< liftM (scale jx) (get i1)
set i2 =<< liftM (scale jy) (get i2)
set i3 =<< liftM (scale jz) (get i3)
-- This is the main code. Essentially all the time is spent here
advance n = when (n>0) $ do
let {-# NOINLINE updateVel #-}
updateVel i = when (i <= nbodies) $ do
let i' = (.|. sh i)
im <- get (i' m)
ix <- get (i' x)
iy <- get (i' y)
iz <- get (i' z)
ivx <- get (i' vx)
ivy <- get (i' vy)
ivz <- get (i' vz)
let {-# INLINE updateVel' #-}
updateVel' ivx ivy ivz j = ivx `seq` ivy `seq` ivz `seq`
if j > nbodies then do
set (i' vx) ivx
set (i' vy) ivy
set (i' vz) ivz
else do
let j' = (.|. sh j)
jm <- get (j' m)
dx <- liftM (ix-) (get (j' x))
dy <- liftM (iy-) (get (j' y))
dz <- liftM (iz-) (get (j' z))
let distance = sqrt (dx*dx+dy*dy+dz*dz)
mag = dt / (distance * distance * distance)
addScaled3 (vel j) ( im*mag) dx dy dz
let a = -jm*mag
ivx' = ivx+a*dx
ivy' = ivy+a*dy
ivz' = ivz+a*dz
updateVel' ivx' ivy' ivz' $! (j+1)
updateVel' ivx ivy ivz $! (i+1)
addScaled (pos i) dt (vel i)
updateVel (i+1)
updateVel 0
advance (pred n)
------------------------------------------------------------------------
Glasgow Haskell Compiler, Version 6.4.1, for Haskell 98, compiled by GHC
version 6.4
Using package config file: /usr/lib/ghc-6.4.1/package.conf
Hsc static flags: -static -fscc-profiling
*** Chasing dependencies:
Chasing modules from: nbody.hs
Stable modules:
*** Compiling Main ( nbody.hs, interpreted ):
compile: input file nbody.hs
*** Checking old interface for Main:
Compiling Main ( nbody.hs, nbody.o )
*** Parser:
*** Renamer/typechecker:
*** Desugar:
Result size = 2576
*** Simplify:
Result size = 1776
Result size = 1721
Result size = 1721
*** Specialise:
Result size = 1731
*** Float out (not lambdas, not constants):
Result size = 1805
*** Float inwards:
Result size = 1805
*** Simplify:
Result size = 4502
Result size = 2363
Result size = 2280
Result size = 2270
Result size = 2270
*** Simplify:
Result size = 2248
Result size = 2248
*** Simplify:
Result size = 2262
Result size = 2256
Result size = 2256
*** Demand analysis:
Result size = 2256
*** Worker Wrapper binds:
Result size = 2469
*** GlomBinds:
*** Simplify:
Result size = 2847
Result size = 2092
Result size = 2077
Result size = 2077
*** Float out (not lambdas, constants):
Result size = 2849
*** Common sub-expression:
Result size = 2666
*** Float inwards:
Result size = 2666
*** Liberate case:
Result size = 2666
*** SpecConstr:
Result size = 2867
*** Simplify:
Result size = 2573
Result size = 2365
Result size = 2365
*** Tidy Core:
Result size = 2365
*** CorePrep:
Result size = 2936
*** Stg2Stg:
*** CodeGen:
*** CodeOutput:
*** C Compiler
gcc -x c /tmp/ghc9496.hc -o /tmp/ghc9496.raw_s -DDONT_WANT_WIN32_DLL_SUPPORT
-fno-defer-pop -fomit-frame-pointer -fno-builtin -DSTOLEN_X86_REGS=4 -v -S
-Wimplicit -O -D__GLASGOW_HASKELL__=604 -DPROFILING -O3 -ffast-math
-ffloat-store -I . -I /usr/lib/ghc-6.4.1/include
Reading specs from /usr/lib/gcc-lib/i486-linux/3.3.5/specs
Configured with: ../src/configure -v
--enable-languages=c,c++,java,f77,pascal,objc,ada,treelang --prefix=/usr
--mandir=/usr/share/man --infodir=/usr/share/info
--with-gxx-include-dir=/usr/include/c++/3.3 --enable-shared --with-system-zlib
--enable-nls --without-included-gettext --enable-__cxa_atexit
--enable-clocale=gnu --enable-debug --enable-java-gc=boehm
--enable-java-awt=xlib --enable-objc-gc i486-linux
Thread model: posix
gcc version 3.3.5 (Debian 1:3.3.5-8)
/usr/lib/gcc-lib/i486-linux/3.3.5/cc1 -quiet -v -I . -I
/usr/lib/ghc-6.4.1/include -D__GNUC__=3 -D__GNUC_MINOR__=3
-D__GNUC_PATCHLEVEL__=5 -DDONT_WANT_WIN32_DLL_SUPPORT -DSTOLEN_X86_REGS=4
-D__GLASGOW_HASKELL__=604 -DPROFILING /tmp/ghc9496.hc -quiet -dumpbase
ghc9496.hc -auxbase-strip /tmp/ghc9496.raw_s -O -O3 -Wimplicit -version
-fno-defer-pop -fomit-frame-pointer -fno-builtin -ffast-math -ffloat-store -o
/tmp/ghc9496.raw_s
GNU C version 3.3.5 (Debian 1:3.3.5-8) (i486-linux)
compiled by GNU C version 3.3.5 (Debian 1:3.3.5-8).
GGC heuristics: --param ggc-min-expand=98 --param ggc-min-heapsize=128431
ignoring nonexistent directory "/usr/i486-linux/include"
#include "..." search starts here:
#include <...> search starts here:
.
/usr/lib/ghc-6.4.1/include
/usr/local/include
/usr/lib/gcc-lib/i486-linux/3.3.5/include
/usr/include
End of search list.
*** Mangler
/usr/lib/ghc-6.4.1/ghc-asm /tmp/ghc9496.raw_s /tmp/ghc9496.s 4
Prologue junk?: .type s5pB_ret, @function
s5pB_ret:
xorl %eax, %eax
xorl %ecx, %ecx
movl %eax, 16(%esp)
movl %ecx, 20(%esp)
*** Deleting temp files
Deleting: /tmp/ghc9496.s /tmp/ghc9496.raw_s /tmp/ghc9496.hc
------------------------------------------------------------------------
_______________________________________________
Glasgow-haskell-bugs mailing list
Glasgow-haskell-bugs@xxxxxxxxxxx
http://www.haskell.org/mailman/listinfo/glasgow-haskell-bugs
| |