[Pw_forum] Problem in running GPU version of PW on big input

马璐 somnusfish at gmail.com
Wed Mar 5 14:53:43 CET 2014


Hi,
I’ve tested some small examples using pw-gpu.x, it works well, however, when I try to run big input , setting the environment variables 
> export PHI_DGEMM_SPLIT=0.95
> export PHI_ZGEMM_SPLIT=0.95
I get the following messages:
> [root at node19 CHNO]# mpirun -np 2 ../bin/pw-gpu.x -in CHNO.scf.in > gup0.95.txt
> application called MPI_Abort(MPI_COMM_WORLD, 1) - process 0
> --------------------------------------------------------------------------
> mpirun noticed that the job aborted, but has no info as to the process
> that caused that situation.
> —————————————————————————————————————
when I reset the environment variables like blow
> export PHI_DGEMM_SPLIT=0.5
> export PHI_ZGEMM_SPLIT=0.5
I get the following messages:
> [root at node19 CHNO]# mpirun -np 2 ../bin/pw-gpu.x -in CHNO.scf.in > gup0.5.txt
> --------------------------------------------------------------------------
> mpirun noticed that the job aborted, but has no info as to the process
> that caused that situation.
> —————————————————————————————————————
then I set the environment variables like that,and get that:
> export PHI_DGEMM_SPLIT=0.1
> export PHI_ZGEMM_SPLIT=0.1
> [root at node19 CHNO]# mpirun -np 2 ../bin/pw-gpu.x -in CHNO.scf.in > gup0.1.txt
> --------------------------------------------------------------------------
> mpirun noticed that the job aborted, but has no info as to the process
> that caused that situation.
> —————————————————————————————————————
I wondering if the input is too large , then I cannot use the gpu version of pw to run it. Could anybody tell me the boundary of the input that can be right to run gpu version of pw?
here is my input:
> &control
>     calculation='scf'
>     pseudo_dir = './',
>     tstress = .true.
>     tprnfor = .true.
>     outdir='./Out_data/',
>     prefix='CHNO',
>  /
>  &system
>      ibrav=  0, celldm(1)=1, nat=  218, ntyp= 4, nbnd= 500,
>      ecutwfc = 100.0,
>      occupations='smearing',
>      smearing='gaussian',
>      degauss=0.01,
>  /
>  &electrons
> 	diagonalization='david'
>    	mixing_mode = 'plain'
>    	conv_thr =  1.0d-6
> 	diago_thr_init=1.0e-2
>  /
>  &ions
>       ion_dynamics = 'bfgs',
> 	  trust_radius_min =1D-5,
>  /
> 
> CELL_PARAMETERS {cubic}
> 40       0      0
> 0        45     0
> 0        0      65
> ATOMIC_SPECIES
> H   1   H.pz-vbc.UPF
> C   6   C.pz-vbc.UPF
> N   7   N.pz-vbc.UPF
> O   8   O.pz-mt.UPF
> ATOMIC_POSITIONS {angstrom}
> H -2.11358 0.424689 9.1544 0 0 0
> H -2.40158 -0.550311 7.8704 0 0 0
> H -0.861581 -0.195311 8.2994 0 0 0
> H -1.12158 2.05569 7.7664 0 0 0
> H -3.82958 1.43769 6.7664 0 0 0
> H -3.58858 2.13269 8.3004 0 0 0
> H -3.81858 3.51269 6.2054 0 0 0
> H -0.559581 2.85369 5.7924 0 0 0
> H 0.0354187 1.08569 3.6734 0 0 0
> H 0.689419 3.08369 2.5734 0 0 0
> H 0.0474187 4.12569 3.8264 0 0 0
> H 1.80542 4.91369 5.4044 0 0 0
> H 4.21242 4.28669 6.0504 0 0 0
> H 2.37442 0.733689 2.5214 0 0 0
> H 6.20742 2.39669 5.2264 0 0 0
> H 4.58842 -0.415311 2.3944 0 0 0
> H 6.50642 0.424689 3.7264 0 0 0
> H -1.31058 0.261689 2.3194 0 0 0
> H -3.68258 1.71869 1.2894 0 0 0
> H -4.66658 -0.513311 0.928404 0 0 0
> H -2.31758 -1.39831 2.2354 0 0 0
> H -5.17058 -1.09931 3.2294 0 0 0
> H -5.07158 0.672689 3.1004 0 0 0
> H -3.73158 -0.234311 3.8144 0 0 0
> H -4.12358 1.31369 -1.0666 0 0 0
> H -1.59658 1.10169 -2.5326 0 0 0
> H -4.22258 2.17469 -3.2046 0 0 0
> H -2.77858 3.11869 -2.8046 0 0 0
> H -4.85558 1.79469 -5.5976 0 0 0
> H -3.68958 2.33769 -7.8536 0 0 0
> H -0.250581 3.15669 -3.8706 0 0 0
> H -1.38058 3.82569 -8.6786 0 0 0
> H 1.36742 4.31069 -5.3896 0 0 0
> H 0.790419 4.68669 -7.7626 0 0 0
> H -1.10158 -0.387311 -3.9696 0 0 0
> H -2.88558 -2.69831 -4.2416 0 0 0
> H 0.0124187 -2.83231 -4.2326 0 0 0
> H -0.772581 -2.64231 -2.6696 0 0 0
> H -1.70658 -4.71331 -2.9256 0 0 0
> H -1.33858 -4.90931 -4.5786 0 0 0
> H -3.17958 -0.715311 -6.1306 0 0 0
> H -2.32258 -2.12031 -8.5116 0 0 0
> H -3.57858 -0.860311 -8.4166 0 0 0
> H -0.0145813 -1.13931 -7.7466 0 0 0
> H 0.421419 1.52169 -8.9576 0 0 0
> H 2.42342 -0.507311 -9.4006 0 0 0
> H 0.987419 -0.844311 -10.3206 0 0 0
> H 2.74042 2.23969 -9.5276 0 0 0
> H 2.86842 2.63869 -11.2676 0 0 0
> H 1.09542 -0.494311 -6.2676 0 0 0
> H 3.06442 1.48169 -5.2976 0 0 0
> H 4.38542 -0.355311 -5.8676 0 0 0
> H 4.23742 -0.466311 -4.1496 0 0 0
> H 2.54742 -2.15031 -4.6466 0 0 0
> H 2.90742 -2.10331 -6.3556 0 0 0
> H 3.91642 -3.94531 -5.3436 0 0 0
> H 5.19742 -2.85331 -5.7566 0 0 0
> H 5.51742 -3.91131 -3.6326 0 0 0
> H 5.26142 -2.23931 -3.3476 0 0 0
> H 4.12342 -3.78331 -1.8386 0 0 0
> H 3.25242 -4.36531 -3.1136 0 0 0
> H 3.09742 -2.78631 -2.6566 0 0 0
> H 3.14342 1.65869 -2.9286 0 0 0
> H 0.616419 1.07569 -1.6266 0 0 0
> H 1.63842 2.25669 0.146404 0 0 0
> H 3.17042 2.43369 -0.662596 0 0 0
> H 3.29442 4.38769 -2.3716 0 0 0
> H 1.83142 6.43569 -2.8786 0 0 0
> H -0.914581 3.09269 0.251404 0 0 0
> H -0.792581 7.39269 -2.2556 0 0 0
> H -2.84858 4.65169 0.345404 0 0 0
> H -2.81058 6.78469 -0.915596 0 0 0
> H 0.503419 -0.131311 0.442404 0 0 0
> H 2.73742 -1.84731 1.2184 0 0 0
> H 1.52842 -3.18431 -0.488596 0 0 0
> H 0.965419 -4.87131 1.1534 0 0 0
> H -0.785581 -2.16131 -0.0185965 0 0 0
> H -0.847581 -3.17431 1.4354 0 0 0
> H -0.752581 -3.91531 -0.170596 0 0 0
> H 2.59542 -2.54131 3.4014 0 0 0
> H 0.729419 -1.02531 5.0444 0 0 0
> H 2.93642 -2.75131 5.8924 0 0 0
> H 3.19942 -1.09531 5.4984 0 0 0
> H 2.23142 -3.43631 8.2294 0 0 0
> H 1.52742 -2.14831 10.3494 0 0 0
> H 1.97842 1.25069 6.2714 0 0 0
> H 0.850419 0.566689 11.0584 0 0 0
> H 1.41842 3.20569 7.7034 0 0 0
> H 0.823419 2.86769 10.0874 0 0 0
> H -0.976581 -1.89731 6.0944 0 0 0
> H -1.36958 -4.81831 6.1274 0 0 0
> H -3.66058 -4.05131 6.6214 0 0 0
> H -3.20058 -2.40231 6.3804 0 0 0
> H -4.22858 -3.36331 4.4354 0 0 0
> H -2.59158 -3.05731 4.0394 0 0 0
> H -2.22858 -5.24631 3.8874 0 0 0
> H -3.40258 -5.75531 5.0844 0 0 0
> H -4.06258 -6.57031 3.0474 0 0 0
> H -5.12858 -5.24531 3.3424 0 0 0
> H -2.75058 -5.17231 1.6264 0 0 0
> H -3.99458 -4.08431 1.6864 0 0 0
> H -4.24058 -5.59131 1.0694 0 0 0
> C -1.82158 1.32969 7.3574 0 0 0
> C -3.19258 2.00069 7.2924 0 0 0
> C -1.35358 0.998689 5.9494 0 0 0
> C -0.450581 2.02669 3.8864 0 0 0
> C 0.518419 3.17769 3.5544 0 0 0
> C 1.88142 3.12469 4.1934 0 0 0
> C 2.99542 2.20069 3.9554 0 0 0
> C 2.36142 4.05269 5.0524 0 0 0
> C 4.12142 2.64369 4.7104 0 0 0
> C 3.19442 1.07169 3.1284 0 0 0
> C 5.37742 2.02069 4.6444 0 0 0
> C 4.44942 0.431689 3.0524 0 0 0
> C 5.54142 0.906689 3.8034 0 0 0
> C -1.72258 2.09269 3.0314 0 0 0
> C -3.02658 0.846689 1.2844 0 0 0
> C -3.86958 -0.389311 1.6604 0 0 0
> C -4.50358 -0.255311 3.0454 0 0 0
> C -2.41758 0.705689 -0.117596 0 0 0
> C -2.68158 1.01269 -2.5536 0 0 0
> C -3.22758 2.24369 -3.2786 0 0 0
> C -2.94458 2.34869 -4.7446 0 0 0
> C -1.73958 2.85969 -5.3866 0 0 0
> C -3.82958 2.11669 -5.7416 0 0 0
> C -2.01158 3.03069 -6.7736 0 0 0
> C -0.484581 3.29469 -4.9156 0 0 0
> C -1.12058 3.68269 -7.6396 0 0 0
> C 0.424419 3.94469 -5.7716 0 0 0
> C 0.0984187 4.15769 -7.1246 0 0 0
> C -2.99658 -0.304311 -3.2736 0 0 0
> C -1.92658 -2.22731 -4.4486 0 0 0
> C -0.817581 -3.00031 -3.7006 0 0 0
> C -1.00958 -4.51931 -3.6166 0 0 0
> C 0.303419 -5.19731 -3.2126 0 0 0
> C -1.77258 -2.17631 -5.9836 0 0 0
> C -2.58158 -1.15731 -8.0936 0 0 0
> C -1.57058 -0.133311 -8.6046 0 0 0
> C 0.831419 0.550689 -8.6566 0 0 0
> C 1.61242 -0.102311 -9.8236 0 0 0
> C 2.07742 0.880689 -10.8916 0 0 0
> C 1.80642 0.924689 -7.5276 0 0 0
> C 2.60942 0.495689 -5.2006 0 0 0
> C 3.74842 -0.551311 -5.1226 0 0 0
> C 3.26142 -1.99931 -5.3306 0 0 0
> C 4.34142 -3.07031 -5.1136 0 0 0
> C 4.82842 -3.18731 -3.6646 0 0 0
> C 1.76942 0.519689 -3.9216 0 0 0
> C 1.70042 1.08969 -1.5246 0 0 0
> C 2.08142 2.36469 -0.743596 0 0 0
> C 1.55542 3.66169 -1.3026 0 0 0
> C 0.252419 4.30369 -1.0926 0 0 0
> C 2.27042 4.53469 -2.0536 0 0 0
> C 0.257419 5.56769 -1.7576 0 0 0
> C -0.900581 3.99669 -0.335596 0 0 0
> C -0.827581 6.45769 -1.7136 0 0 0
> C -1.99158 4.88969 -0.268596 0 0 0
> C -1.96458 6.11069 -0.966596 0 0 0
> C 2.12842 -0.193311 -0.785596 0 0 0
> C 1.65542 -1.74931 1.1104 0 0 0
> C 1.11242 -3.05231 0.510404 0 0 0
> C -0.418581 -3.07931 0.437404 0 0 0
> C 1.05342 -1.55731 2.5044 0 0 0
> C 1.30742 -1.94331 4.9324 0 0 0
> C 2.51142 -1.84631 5.8724 0 0 0
> C 2.13642 -1.50131 7.2804 0 0 0
> C 1.79042 -0.197311 7.8324 0 0 0
> C 2.03742 -2.37431 8.3064 0 0 0
> C 1.47142 -0.357311 9.2104 0 0 0
> C 1.74042 1.10869 7.3084 0 0 0
> C 1.09742 0.724689 10.0204 0 0 0
> C 1.41842 2.20969 8.1204 0 0 0
> C 1.08442 2.01769 9.4724 0 0 0
> C 0.453419 -3.13631 5.3654 0 0 0
> C -1.45958 -3.95731 8.0704 0 0 0
> C -1.60558 -3.83931 6.5484 0 0 0
> C -3.03058 -3.45131 6.1284 0 0 0
> C -3.29158 -3.65631 4.6234 0 0 0
> C -3.16558 -5.13631 4.2194 0 0 0
> C -4.11758 -5.57331 3.1004 0 0 0
> N -1.79758 0.152689 8.2454 0 0 0
> N -0.785581 2.01669 5.2994 0 0 0
> N 3.65742 3.73869 5.4064 0 0 0
> N -1.94158 1.03369 2.2554 0 0 0
> N -3.17258 1.03369 -1.1716 0 0 0
> N -3.24758 2.44969 -6.9496 0 0 0
> N -1.95858 -0.886311 -3.8786 0 0 0
> N -2.55758 -1.31031 -6.6376 0 0 0
> N -0.284581 -0.339311 -8.2806 0 0 0
> N 2.60742 2.02669 -10.5156 0 0 0
> N 1.75642 0.247689 -6.3736 0 0 0
> N 3.73942 -3.56031 -2.7456 0 0 0
> N 2.29942 1.13269 -2.8616 0 0 0
> N 1.49942 5.64169 -2.3486 0 0 0
> N 1.35742 -0.604311 0.227404 0 0 0
> N 1.73542 -2.05531 3.5404 0 0 0
> N 1.64742 -1.70231 9.4504 0 0 0
> N -0.690581 -2.84831 5.9864 0 0 0
> N -3.74258 -5.05931 1.7704 0 0 0
> O -3.02258 3.26969 6.6894 0 0 0
> O -1.48058 -0.133311 5.4724 0 0 0
> O -2.43458 3.09369 3.0144 0 0 0
> O -3.08258 -1.55931 1.6744 0 0 0
> O -1.24458 0.347689 -0.245596 0 0 0
> O -4.12458 -0.794311 -3.2736 0 0 0
> O 0.590419 -5.22031 -1.9956 0 0 0
> O 1.01342 -5.66731 -4.1276 0 0 0
> O -0.968581 -2.88831 -6.5866 0 0 0
> O -1.95858 0.792689 -9.3116 0 0 0
> O 1.97942 0.620689 -12.0776 0 0 0
> O 2.59242 1.85669 -7.7156 0 0 0
> O 0.662419 -0.0153111 -3.8876 0 0 0
> O 3.16742 -0.778311 -1.0936 0 0 0
> O 1.53642 -4.11831 1.3264 0 0 0
> O 0.00041871 -0.931311 2.6384 0 0 0
> O 0.862419 -4.28931 5.2234 0 0 0
> O -0.753581 -3.32031 8.3804 0 0 0
> O -2.10358 -4.64431 8.4074 0 0 0
> 
> K_POINTS {gamma}
> 1 1 1 0 0 0

Sincerely need you help, thank you for reading this.



More information about the users mailing list