OpenACC Tutorial - Profiling/fr: Difference between revisions

Jump to navigation Jump to search
Updating to match new version of source page
(Updating to match new version of source page)
(Updating to match new version of source page)
Line 31: Line 31:
|make  
|make  
|result=
|result=
pgc++ -fast  -c -o main.o main.cpp
nvc++   -c -o main.o main.cpp
"vector.h", line 30: warning: variable "vcoefs" was declared but never
nvc++ main.o -o cg.x
      referenced
      double *vcoefs=v.coefs;
                    ^
 
pgc++ main.o -o cg.x -fast
}}
}}


Line 99: Line 94:
======== Data collected at 100Hz frequency
======== Data collected at 100Hz frequency
}}
}}
==Renseignements sur le compilateur==
==Renseignements sur le compilateur==
Avant de travailler sur la routine, nous devons comprendre ce que fait le compilateur; posons-nous les questions suivantes :
Avant de travailler sur la routine, nous devons comprendre ce que fait le compilateur; posons-nous les questions suivantes :
Line 111: Line 107:
* ccff – ajout de renseignements aux fichiers objet pour utilisation future
* ccff – ajout de renseignements aux fichiers objet pour utilisation future


<div class="mw-translate-fuzzy">
== Obtenir les renseignements sur le compilateur  ==
== Obtenir les renseignements sur le compilateur  ==
* Éditez le Makefile.
* Éditez le Makefile.
Line 116: Line 113:
CXXFLAGS=-fast -Minfo=all,intensity,ccff LDFLAGS=${CXXFLAGS}
CXXFLAGS=-fast -Minfo=all,intensity,ccff LDFLAGS=${CXXFLAGS}
* Effectuez un nouveau build.
* Effectuez un nouveau build.
</div>
{{Command
{{Command
|make
|make
|result=
|result=
pgc++ CXXFLAGS=-fast -Minfo=all,intensity,ccff LDFLAGS=-fast -fast   -c -o main.o main.cpp
nvc++ -fast -Minfo=all,intensity,ccff  -c -o main.o main.cpp
"vector.h", line 30: warning: variable "vcoefs" was declared but never
initialize_vector(vector &, double):
          referenced
    20, include "vector.h"
    double *vcoefs=v.coefs;
           36, Intensity = 0.0
            ^
 
_Z17initialize_vectorR6vectord:
           37, Intensity = 0.0
               Memory set idiom, loop replaced by call to __c_mset8
               Memory set idiom, loop replaced by call to __c_mset8
_Z3dotRK6vectorS1_:
dot(const vector &, const vector &):
           27, Intensity = 1.00  
    21, include "vector_functions.h"
              Generated 3 alternate versions of the loop
           27, Intensity = 1.00
               Generated vector sse code for the loop
               Generated vector simd code for the loop containing reductions
               Generated 2 prefetch instructions for the loop
               FMA (fused multiply-add) instruction(s) generated
_Z6waxpbydRK6vectordS1_S1_:
waxpby(double, const vector &, double, const vector &, const vector &):
           39, Intensity = 1.00  
    21, include "vector_functions.h"
           39, Intensity = 1.00
               Loop not vectorized: data dependency
               Loop not vectorized: data dependency
               Loop unrolled 4 times
              Generated vector simd code for the loop
_Z26allocate_3d_poisson_matrixR6matrixi:
               Loop unrolled 2 times
              FMA (fused multiply-add) instruction(s) generated
allocate_3d_poisson_matrix(matrix &, int):
    22, include "matrix.h"
           43, Intensity = 0.0
           43, Intensity = 0.0
              Loop not fused: different loop trip count
           44, Intensity = 0.0
           44, Intensity = 0.0
               Loop not vectorized/parallelized: loop count too small
               Loop not vectorized/parallelized: loop count too small
           45, Intensity = 0.0
           45, Intensity = 0.0
              Loop unrolled 3 times (completely unrolled)
           57, Intensity = 0.0
           57, Intensity = 0.0
           59, Intensity = 0.0
           59, Intensity = 0.0
               Loop not vectorized: data dependency
               Loop not vectorized: data dependency
_Z6matvecRK6matrixRK6vectorS4_:
matvec(const matrix &, const vector &, const vector &):
    23, include "matrix_functions.h"
           29, Intensity = (num_rows*((row_end-row_start)*        2))/(num_rows+(num_rows+(num_rows+((row_end-row_start)+(row_end-row_start)))))
           29, Intensity = (num_rows*((row_end-row_start)*        2))/(num_rows+(num_rows+(num_rows+((row_end-row_start)+(row_end-row_start)))))
           33, Intensity = 1.00  
              FMA (fused multiply-add) instruction(s) generated
               Unrolled inner loop 4 times
           33, Intensity = 1.00
               Generated 2 prefetch instructions for the loop
               Loop not vectorized: non-stride-1 array reference
              Loop not vectorized: mixed data types
              Loop unrolled 2 times
               FMA (fused multiply-add) instruction(s) generated
main:
main:
     61, Intensity = 16.00 
     38, allocate_3d_poisson_matrix(matrix &, int) inlined, size=41 (inline) file main.cpp (29)
        Loop not vectorized/parallelized: potential early exits
          43, Intensity = 0.0
pgc++ CXXFLAGS=-fast -Minfo=all,intensity,ccff LDFLAGS=-fast main.o -o cg.x -fast
              Loop not fused: different loop trip count
          44, Intensity = 0.0
              Loop not vectorized/parallelized: loop count too small
          45, Intensity = 0.0
          57, Intensity = 0.0
              Loop not fused: function call before adjacent loop
          59, Intensity = 0.0
              Loop not vectorized: data dependency
    42, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24)
    43, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24)
    44, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24)
    45, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24)
    46, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24)
    48, initialize_vector(vector &, double) inlined, size=5 (inline) file main.cpp (34)
          36, Intensity = 0.0
              Loop not vectorized/parallelized: not countable
    49, initialize_vector(vector &, double) inlined, size=5 (inline) file main.cpp (34)
          36, Intensity = 0.0
              Loop not vectorized/parallelized: not countable
    52, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          39, Intensity = 0.0
              Memory copy idiom, loop replaced by call to __c_mcopy8
    53, matvec(const matrix &, const vector &, const vector &) inlined, size=19 (inline) file main.cpp (20)
          29, Intensity = [symbolic], and not printable, try the -Mpfi -Mpfo options
              Loop not fused: different loop trip count
          33, Intensity = 1.00
              Loop not vectorized: non-stride-1 array reference
              Loop not vectorized: mixed data types
              Loop unrolled 2 times
    54, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          27, FMA (fused multiply-add) instruction(s) generated
          29, FMA (fused multiply-add) instruction(s) generated
          33, FMA (fused multiply-add) instruction(s) generated
          39, Intensity = 0.67
              Loop not fused: different loop trip count
              Loop not vectorized: data dependency
              Generated vector simd code for the loop
              Loop unrolled 4 times
              FMA (fused multiply-add) instruction(s) generated
    56, dot(const vector &, const vector &) inlined, size=9 (inline) file main.cpp (21)
          27, Intensity = 1.00
              Loop not fused: function call before adjacent loop
              Generated vector simd code for the loop containing reductions
    61, Intensity = 0.0
    62, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          39, Intensity = 0.0
              Memory copy idiom, loop replaced by call to __c_mcopy8
    65, dot(const vector &, const vector &) inlined, size=9 (inline) file main.cpp (21)
          27, Intensity = 1.00
              Loop not fused: different loop trip count
              Generated vector simd code for the loop containing reductions
    67, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          39, Intensity = 0.67
              Loop not fused: different loop trip count
              Loop not vectorized: data dependency
              Generated vector simd code for the loop
              Loop unrolled 4 times
    72, matvec(const matrix &, const vector &, const vector &) inlined, size=19 (inline) file main.cpp (20)
          29, Intensity = [symbolic], and not printable, try the -Mpfi -Mpfo options
              Loop not fused: different loop trip count
          33, Intensity = 1.00
              Loop not vectorized: non-stride-1 array reference
              Loop not vectorized: mixed data types
              Loop unrolled 2 times
    73, dot(const vector &, const vector &) inlined, size=9 (inline) file main.cpp (21)
          27, Intensity = 1.00
              Loop not fused: different loop trip count
              Generated vector simd code for the loop containing reductions
    77, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          39, Intensity = 0.67
              Loop not fused: different loop trip count
              Loop not vectorized: data dependency
              Generated vector simd code for the loop
              Loop unrolled 4 times
    78, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33)
          39, Intensity = 0.67
              Loop not fused: function call before adjacent loop
              Loop not vectorized: data dependency
              Generated vector simd code for the loop
              Loop unrolled 4 times
    88, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29)
    89, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29)
    90, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29)
    91, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29)
    92, free_matrix(matrix &) inlined, size=5 (inline) file main.cpp (73)
nvc++ main.o -o cg.x -fast -Minfo=all,intensity,ccff
}}
}}
== Intensité computationnelle  ==
== Intensité computationnelle  ==
L'intensité computationnelle d'une boucle représente la quantité de travail accompli par la boucle en fonction des opérations effectuées en mémoire.
L'intensité computationnelle d'une boucle représente la quantité de travail accompli par la boucle en fonction des opérations effectuées en mémoire.
38,760

edits

Navigation menu