# On DevCloud: access node with GPU support #qsub -I -l nodes=1:gpu:ppn=2 -d . qsub -I -l nodes=1:gen9:ppn=2 -d . # Setup Intel oneAPI 2022.3 environment source /opt/intel/oneapi/setvars.sh which dpcpp icx sycl-ls advisor # Check devices sycl-ls ################ # OpenMP Nbody # ################ # Get Nbody code from https://github.com/fbaru-dev/nbody-demo git clone https://github.com/fbaru-dev/nbody-demo.git # Go to best optimized versions pushd nbody-demo/ver8 # Build the executable (without AVX512 flags: OPTFLAGS=) #make -j make -j OPTFLAGS= # Run without Advisor, showing that OpenMP is enabled export KMP_AFFINITY=verbose ./nbody.x 64000 export KMP_AFFINITY=verbose,scatter ./nbody.x 64000 export KMP_AFFINITY=scatter # Do Offload Advisor data collection targeting Gen9 GPU (takes about 7 minutes) # * reduce the analysis to outer OpenMP loops: --markup omp # * switch off the dependency analysis for OpenMP loop: --collect basic time advisor-python $APM/run_oa.py --verbose 3 ./adv_oa_64000 --config gen9_gt2 --markup omp --collect basic --out-dir ./result_oa_64000 -- ./nbody.x 64000 #time advisor-python $APM/run_oa.py --verbose 3 ./adv_oa_64000.gen11 --config gen11_icl --markup omp --collect basic --out-dir ./result_oa_64000.gen11 -- ./nbody.x 64000 # Open the HTML result report firefox ./result_oa_64000/report.html # Move the "Gen9 GT2 configuration" sliders to virtually increase the target performance # Download the file or open it and copy-paste into file scalers.toml # or copy result_oa_64000/config.toml to file scalers.toml and edit it # Re-do performance estimation with scaled target values in scalers.toml # EU_count = 192 # L3_BW = 1024000000000 #cat scalers.toml #time advisor-python $APM/analyze.py --verbose 3 ./adv_oa_64000 --config gen9_gt2 --config scalers.toml --out-dir ./result_oa_64000_scalers # Open the new HTML result report #firefox ./result_oa_64000_scalers/report.html # Re-do performance estimation with scaled target values in --set-parameter flag time advisor-python $APM/analyze.py --verbose 3 ./adv_oa_64000 --config gen9_gt2 --set-parameter "scale={EU_count=192,L3_BW=1024000000000}" --out-dir ./result_oa_64000_parameters # Open the new HTML result report firefox ./result_oa_64000_parameters/report.html # Go back to main directory popd ############### # DPC++ Nbody # ############### # Get oneAPI samples from https://github.com/oneapi-src/oneAPI-samples git clone https://github.com/oneapi-src/oneAPI-samples.git # Go to Nbody code pushd oneAPI-samples/DirectProgramming/DPC++/N-BodyMethods/Nbody # Build the executable mkdir build cd build cmake .. make -j VERBOSE=1 # Check available devices sycl-ls # Run with checking the device SYCL_PI_TRACE=1 src/nbody 64000 #SYCL_PI_TRACE=1 SYCL_DEVICE_FILTER=level_zero:gpu src/nbody 64000 SYCL_PI_TRACE=1 SYCL_DEVICE_FILTER=opencl:gpu src/nbody 64000 # Run Roofline analysis (survey + tripcounts) time advisor --collect=survey --profile-gpu --project-dir=./adv_roof -- src/nbody 64000 time advisor --collect=tripcounts --stacks --flop --profile-gpu --enable-data-transfer-analysis --data-transfer=light --project-dir=./adv_roof -- src/nbody 64000 # Create Roofline report time advisor --report=roofline --gpu --project-dir=./adv_roof --report-output=./adv_roof/roofline.html # Open the new Roofline HTML report firefox ./adv_roof/roofline.html # GPU-to-GPU projection # Possible values: gen11_icl gen12_dg1 gen12_tgl gen9_gt2 gen9_gt3e gen9_gt4e xehp_sdv_480eu xehpg_256xve xehpg_512xve time advisor --collect=projection --profile-gpu --config=gen11_icl --project-dir=./adv_roof # Open the new projection HTML reports #firefox ./adv_roof/e000/report/report.html firefox ./adv_roof/e000/report/advisor-report.html # Go back to main directory popd