cc_staff
1,857
edits
(Supporting spaces in the file name) |
mNo edit summary |
||
Line 130: | Line 130: | ||
<!--T:222--> | <!--T:222--> | ||
Most Fluent jobs should use the following <i>by node</i> script to minimize solution latency and maximize performance over as few nodes as possible. Very large jobs, however, might wait less in the queue if they use a <i>by core</i> script. However, the startup time of a job using many nodes can be significantly longer, thus offsetting some of the benefits. In addition, be aware that running large jobs over an unspecified number of potentially very many nodes will make them far more vulnerable to crashing if any of the compute nodes fail during the simulation. The scripts will ensure Fluent uses shared memory for communication when run on a single node or distributed memory (utilizing MPI and the appropriate HPC interconnect) when run over multiple nodes. | Most Fluent jobs should use the following <i>by node</i> script to minimize solution latency and maximize performance over as few nodes as possible. Very large jobs, however, might wait less in the queue if they use a <i>by core</i> script. However, the startup time of a job using many nodes can be significantly longer, thus offsetting some of the benefits. In addition, be aware that running large jobs over an unspecified number of potentially very many nodes will make them far more vulnerable to crashing if any of the compute nodes fail during the simulation. The scripts will ensure Fluent uses shared memory for communication when run on a single node or distributed memory (utilizing MPI and the appropriate HPC interconnect) when run over multiple nodes. The two narval tabs maybe be useful to provide a more robust alternative if fluent hangs during the initial auto mesh partitioning phase when using the standard intel based scripts with the parallel solver. The other option would be to manually perform the mesh partitioning in the fluent gui then trying to run the job again on the cluster with the intel scripts. Doing so will allow you to inspect the partition statistics and specify the partitioning method to obtain an optimal result. The number of mesh partitions should be an integral multiple of the there number of cores. For optimal efficiency there should be at least 10000 cells per core. In otherwords specifying too many cores for the size of the problem will result in the poor performance as the scaling performance will eventually drop off. | ||
<!--T:2300--> | <!--T:2300--> | ||
<tabs> | <tabs> | ||
<tab name="Multinode (by node)"> | <tab name="Multinode (by node)"> | ||
{{File | {{File | ||
|name=script-flu-bynode.sh | |name=script-flu-bynode-intel.sh | ||
|lang="bash" | |lang="bash" | ||
|contents= | |contents= | ||
Line 192: | Line 193: | ||
<tab name="Multinode (by core)"> | <tab name="Multinode (by core)"> | ||
{{File | {{File | ||
|name=script-flu-bycore.sh | |name=script-flu-bycore-intel.sh | ||
|lang="bash" | |lang="bash" | ||
|contents= | |contents= | ||
Line 244: | Line 245: | ||
}} | }} | ||
</tab> | </tab> | ||
<tab name="Multinode (by node, narval)"> | |||
{{File | |||
|name=script-flu-bynode-openmpi.sh | |||
|lang="bash" | |||
|contents= | |||
#!/bin/bash | |||
<!--T:5302--> | |||
#SBATCH --account=def-group # Specify account name | |||
#SBATCH --time=00-03:00 # Specify time limit dd-hh:mm | |||
#SBATCH --nodes=1 # Specify number of compute nodes (1 or more) | |||
#SBATCH --ntasks-per-node=64 # Specify number of cores per node (narval 64) | |||
#SBATCH --mem=0 # Do not change (allocates all memory per compute node) | |||
#SBATCH --cpus-per-task=1 # Do not change | |||
<!--T:5733--> | |||
rm -f cleanup* core* | |||
<!--T:5306--> | |||
module load StdEnv/2023 | |||
module load ansys/2023R2 # only this version (applies to narval only) | |||
<!--T:5735--> | |||
export OPENMPI_ROOT=$EBROOTOPENMPI | |||
slurm_hl2hl.py --format ANSYS-FLUENT > /tmp/mf-$SLURM_JOB_ID | |||
for i in `cat /tmp/mf-$SLURM_JOB_ID {{!}} uniq`; do echo "${i}:$(cat /tmp/mf-$SLURM_JOB_ID {{!}} grep $i {{!}} wc -l)" >> /tmp/machinefile-$SLURM_JOB_ID; done | |||
NCORES=$((SLURM_NNODES * SLURM_NTASKS_PER_NODE * SLURM_CPUS_PER_TASK)) | |||
<!--T:5310--> | |||
# Specify 2d, 2ddp, 3d or 3ddp and replace sample with your journal filename … | |||
if [ "$SLURM_NNODES" == 1 ]; then | |||
fluent -g 2ddp -t $NCORES -affinity=0 -i sample.jou | |||
else | |||
fluent -g 2ddp -t $NCORES -affinity=0 -cnf=/tmp/machinefile-$SLURM_JOB_ID -mpi=openmpi -ssh -i sample.jou | |||
fi | |||
}} | |||
</tab> | |||
<tab name="Multinode (by core, narval)"> | |||
{{File | |||
|name=script-flu-bycore-openmpi.sh | |||
|lang="bash" | |||
|contents= | |||
#!/bin/bash | |||
<!--T:6302--> | |||
#SBATCH --account=def-group # Specify account name | |||
#SBATCH --time=00-03:00 # Specify time limit dd-hh:mm | |||
##SBATCH --nodes=2 # Optional (uncomment to specify number of compute nodes) | |||
#SBATCH --ntasks=16 # Specify total number of cores | |||
#SBATCH --mem-per-cpu=4G # Specify memory per core | |||
#SBATCH --cpus-per-task=1 # Do not change | |||
<!--T:6733--> | |||
rm -f cleanup* core* | |||
<!--T:6306--> | |||
module load StdEnv/2023 | |||
module load ansys/2023R2 # only this version (applies to narval only) | |||
<!--T:6735--> | |||
export OPENMPI_ROOT=$EBROOTOPENMPI | |||
slurm_hl2hl.py --format ANSYS-FLUENT > /tmp/mf-$SLURM_JOB_ID | |||
for i in `cat /tmp/mf-$SLURM_JOB_ID {{!}} uniq`; do echo "${i}:$(cat /tmp/mf-$SLURM_JOB_ID {{!}} grep $i {{!}} wc -l)" >> /tmp/machinefile-$SLURM_JOB_ID; done | |||
NCORES=$((SLURM_NTASKS * SLURM_CPUS_PER_TASK)) | |||
<!--T:6310--> | |||
# Specify 2d, 2ddp, 3d or 3ddp and replace sample with your journal filename … | |||
if [ "$SLURM_NNODES" == 1 ]; then | |||
fluent -g 2ddp -t $NCORES -affinity=0 -i sample.jou | |||
else | |||
fluent -g 2ddp -t $NCORES -affinity=0 -cnf=/tmp/machinefile-$SLURM_JOB_ID -mpi=openmpi -ssh -i sample.jou | |||
fi | |||
}} | |||
</tab> | |||
</tabs> | </tabs> | ||