OpenACC Tutorial - Profiling: Difference between revisions

Jump to navigation Jump to search
no edit summary
No edit summary
No edit summary
Line 117: Line 117:
== How to Enable Compiler Feedback  == <!--T:18-->
== How to Enable Compiler Feedback  == <!--T:18-->
* Edit the Makefile
* Edit the Makefile
CXX=pgc++
  CXX=nvc++
CXXFLAGS=-fast -Minfo=all,intensity,ccff LDFLAGS=${CXXFLAGS}
  CXXFLAGS=-fast -Minfo=all,intensity,ccff
  LDFLAGS=${CXXFLAGS}
* Rebuild
* Rebuild
</translate>
</translate>
Line 124: Line 125:
|make
|make
|result=
|result=
pgc++ CXXFLAGS=-fast -Minfo=all,intensity,ccff LDFLAGS=-fast -fast   -c -o main.o main.cpp
nvc++ -fast -Minfo=all,intensity,ccff  -c -o main.o main.cpp
"vector.h", line 30: warning: variable "vcoefs" was declared but never
initialize_vector(vector &, double):
          referenced
    20, include "vector.h"
    double *vcoefs=v.coefs;
           36, Intensity = 0.0
            ^
 
_Z17initialize_vectorR6vectord:
           37, Intensity = 0.0
               Memory set idiom, loop replaced by call to __c_mset8
               Memory set idiom, loop replaced by call to __c_mset8
_Z3dotRK6vectorS1_:
dot(const vector &, const vector &):
           27, Intensity = 1.00  
    21, include "vector_functions.h"
              Generated 3 alternate versions of the loop
           27, Intensity = 1.00
               Generated vector sse code for the loop
               Generated vector simd code for the loop containing reductions
               Generated 2 prefetch instructions for the loop
               FMA (fused multiply-add) instruction(s) generated
_Z6waxpbydRK6vectordS1_S1_:
waxpby(double, const vector &, double, const vector &, const vector &):
           39, Intensity = 1.00  
    21, include "vector_functions.h"
           39, Intensity = 1.00
               Loop not vectorized: data dependency
               Loop not vectorized: data dependency
               Loop unrolled 4 times
              Generated vector simd code for the loop
_Z26allocate_3d_poisson_matrixR6matrixi:
               Loop unrolled 2 times
              FMA (fused multiply-add) instruction(s) generated
allocate_3d_poisson_matrix(matrix &, int):
    22, include "matrix.h"
           43, Intensity = 0.0
           43, Intensity = 0.0
              Loop not fused: different loop trip count
           44, Intensity = 0.0
           44, Intensity = 0.0
               Loop not vectorized/parallelized: loop count too small
               Loop not vectorized/parallelized: loop count too small
           45, Intensity = 0.0
           45, Intensity = 0.0
              Loop unrolled 3 times (completely unrolled)
           57, Intensity = 0.0
           57, Intensity = 0.0
           59, Intensity = 0.0
           59, Intensity = 0.0
               Loop not vectorized: data dependency
               Loop not vectorized: data dependency
_Z6matvecRK6matrixRK6vectorS4_:
matvec(const matrix &, const vector &, const vector &):
    23, include "matrix_functions.h"
           29, Intensity = (num_rows*((row_end-row_start)*        2))/(num_rows+(num_rows+(num_rows+((row_end-row_start)+(row_end-row_start)))))
           29, Intensity = (num_rows*((row_end-row_start)*        2))/(num_rows+(num_rows+(num_rows+((row_end-row_start)+(row_end-row_start)))))
           33, Intensity = 1.00  
              FMA (fused multiply-add) instruction(s) generated
               Unrolled inner loop 4 times
           33, Intensity = 1.00
               Generated 2 prefetch instructions for the loop
               Loop not vectorized: non-stride-1 array reference
              Loop not vectorized: mixed data types
              Loop unrolled 2 times
               FMA (fused multiply-add) instruction(s) generated
main:
main:
     61, Intensity = 16.00 
     38, allocate_3d_poisson_matrix(matrix &, int) inlined, size=41 (inline) file main.cpp (29)
        Loop not vectorized/parallelized: potential early exits
          43, Intensity = 0.0
pgc++ CXXFLAGS=-fast -Minfo=all,intensity,ccff LDFLAGS=-fast main.o -o cg.x -fast
              Loop not fused: different loop trip count
          44, Intensity = 0.0
              Loop not vectorized/parallelized: loop count too small
          45, Intensity = 0.0
          57, Intensity = 0.0
              Loop not fused: function call before adjacent loop
          59, Intensity = 0.0
              Loop not vectorized: data dependency
    42, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24)
    43, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24)
    44, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24)
    45, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24)
    46, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24)
    48, initialize_vector(vector &, double) inlined, size=5 (inline) file main.cpp (34)
          36, Intensity = 0.0
              Loop not vectorized/parallelized: not countable
    49, initialize_vector(vector &, double) inlined, size=5 (inline) file main.cpp (34)
          36, Intensity = 0.0
              Loop not vectorized/parallelized: not countable
    52, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          39, Intensity = 0.0
              Memory copy idiom, loop replaced by call to __c_mcopy8
    53, matvec(const matrix &, const vector &, const vector &) inlined, size=19 (inline) file main.cpp (20)
          29, Intensity = [symbolic], and not printable, try the -Mpfi -Mpfo options
              Loop not fused: different loop trip count
          33, Intensity = 1.00
              Loop not vectorized: non-stride-1 array reference
              Loop not vectorized: mixed data types
              Loop unrolled 2 times
    54, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          27, FMA (fused multiply-add) instruction(s) generated
          29, FMA (fused multiply-add) instruction(s) generated
          33, FMA (fused multiply-add) instruction(s) generated
          39, Intensity = 0.67
              Loop not fused: different loop trip count
              Loop not vectorized: data dependency
              Generated vector simd code for the loop
              Loop unrolled 4 times
              FMA (fused multiply-add) instruction(s) generated
    56, dot(const vector &, const vector &) inlined, size=9 (inline) file main.cpp (21)
          27, Intensity = 1.00
              Loop not fused: function call before adjacent loop
              Generated vector simd code for the loop containing reductions
    61, Intensity = 0.0
    62, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          39, Intensity = 0.0
              Memory copy idiom, loop replaced by call to __c_mcopy8
    65, dot(const vector &, const vector &) inlined, size=9 (inline) file main.cpp (21)
          27, Intensity = 1.00
              Loop not fused: different loop trip count
              Generated vector simd code for the loop containing reductions
    67, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          39, Intensity = 0.67
              Loop not fused: different loop trip count
              Loop not vectorized: data dependency
              Generated vector simd code for the loop
              Loop unrolled 4 times
    72, matvec(const matrix &, const vector &, const vector &) inlined, size=19 (inline) file main.cpp (20)
          29, Intensity = [symbolic], and not printable, try the -Mpfi -Mpfo options
              Loop not fused: different loop trip count
          33, Intensity = 1.00
              Loop not vectorized: non-stride-1 array reference
              Loop not vectorized: mixed data types
              Loop unrolled 2 times
    73, dot(const vector &, const vector &) inlined, size=9 (inline) file main.cpp (21)
          27, Intensity = 1.00
              Loop not fused: different loop trip count
              Generated vector simd code for the loop containing reductions
    77, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          39, Intensity = 0.67
              Loop not fused: different loop trip count
              Loop not vectorized: data dependency
              Generated vector simd code for the loop
              Loop unrolled 4 times
    78, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          39, Intensity = 0.67
              Loop not fused: function call before adjacent loop
              Loop not vectorized: data dependency
              Generated vector simd code for the loop
              Loop unrolled 4 times
    88, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29)
    89, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29)
    90, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29)
    91, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29)
    92, free_matrix(matrix &) inlined, size=5 (inline) file main.cpp (73)
nvc++ main.o -o cg.x -fast -Minfo=all,intensity,ccff
}}
}}
<translate>
<translate>
== Computational Intensity  == <!--T:19-->
== Computational Intensity  == <!--T:19-->
Computational Intensity of a loop is a measure of how much work is being done compared to memory operations.
Computational Intensity of a loop is a measure of how much work is being done compared to memory operations.
Bureaucrats, cc_docs_admin, cc_staff, rsnt_translations
2,837

edits

Navigation menu