Bureaucrats, cc_docs_admin, cc_staff, rsnt_translations
2,837
edits
No edit summary |
No edit summary |
||
Line 117: | Line 117: | ||
== How to Enable Compiler Feedback == <!--T:18--> | == How to Enable Compiler Feedback == <!--T:18--> | ||
* Edit the Makefile | * Edit the Makefile | ||
CXX= | CXX=nvc++ | ||
CXXFLAGS=-fast -Minfo=all,intensity,ccff LDFLAGS=${CXXFLAGS} | CXXFLAGS=-fast -Minfo=all,intensity,ccff | ||
LDFLAGS=${CXXFLAGS} | |||
* Rebuild | * Rebuild | ||
</translate> | </translate> | ||
Line 124: | Line 125: | ||
|make | |make | ||
|result= | |result= | ||
nvc++ -fast -Minfo=all,intensity,ccff -c -o main.o main.cpp | |||
"vector.h" | initialize_vector(vector &, double): | ||
20, include "vector.h" | |||
36, Intensity = 0.0 | |||
Memory set idiom, loop replaced by call to __c_mset8 | Memory set idiom, loop replaced by call to __c_mset8 | ||
dot(const vector &, const vector &): | |||
27, Intensity = 1.00 | 21, include "vector_functions.h" | ||
27, Intensity = 1.00 | |||
Generated vector | Generated vector simd code for the loop containing reductions | ||
FMA (fused multiply-add) instruction(s) generated | |||
waxpby(double, const vector &, double, const vector &, const vector &): | |||
39, Intensity = 1.00 | 21, include "vector_functions.h" | ||
39, Intensity = 1.00 | |||
Loop not vectorized: data dependency | Loop not vectorized: data dependency | ||
Loop unrolled | Generated vector simd code for the loop | ||
Loop unrolled 2 times | |||
FMA (fused multiply-add) instruction(s) generated | |||
allocate_3d_poisson_matrix(matrix &, int): | |||
22, include "matrix.h" | |||
43, Intensity = 0.0 | 43, Intensity = 0.0 | ||
Loop not fused: different loop trip count | |||
44, Intensity = 0.0 | 44, Intensity = 0.0 | ||
Loop not vectorized/parallelized: loop count too small | Loop not vectorized/parallelized: loop count too small | ||
45, Intensity = 0.0 | 45, Intensity = 0.0 | ||
57, Intensity = 0.0 | 57, Intensity = 0.0 | ||
59, Intensity = 0.0 | 59, Intensity = 0.0 | ||
Loop not vectorized: data dependency | Loop not vectorized: data dependency | ||
matvec(const matrix &, const vector &, const vector &): | |||
23, include "matrix_functions.h" | |||
29, Intensity = (num_rows*((row_end-row_start)* 2))/(num_rows+(num_rows+(num_rows+((row_end-row_start)+(row_end-row_start))))) | 29, Intensity = (num_rows*((row_end-row_start)* 2))/(num_rows+(num_rows+(num_rows+((row_end-row_start)+(row_end-row_start))))) | ||
33, Intensity = 1.00 | FMA (fused multiply-add) instruction(s) generated | ||
33, Intensity = 1.00 | |||
Loop not vectorized: non-stride-1 array reference | |||
Loop not vectorized: mixed data types | |||
Loop unrolled 2 times | |||
FMA (fused multiply-add) instruction(s) generated | |||
main: | main: | ||
38, allocate_3d_poisson_matrix(matrix &, int) inlined, size=41 (inline) file main.cpp (29) | |||
43, Intensity = 0.0 | |||
Loop not fused: different loop trip count | |||
44, Intensity = 0.0 | |||
Loop not vectorized/parallelized: loop count too small | |||
45, Intensity = 0.0 | |||
57, Intensity = 0.0 | |||
Loop not fused: function call before adjacent loop | |||
59, Intensity = 0.0 | |||
Loop not vectorized: data dependency | |||
42, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24) | |||
43, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24) | |||
44, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24) | |||
45, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24) | |||
46, allocate_vector(vector &, unsigned int) inlined, size=3 (inline) file main.cpp (24) | |||
48, initialize_vector(vector &, double) inlined, size=5 (inline) file main.cpp (34) | |||
36, Intensity = 0.0 | |||
Loop not vectorized/parallelized: not countable | |||
49, initialize_vector(vector &, double) inlined, size=5 (inline) file main.cpp (34) | |||
36, Intensity = 0.0 | |||
Loop not vectorized/parallelized: not countable | |||
52, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33) | |||
39, Intensity = 0.0 | |||
Memory copy idiom, loop replaced by call to __c_mcopy8 | |||
53, matvec(const matrix &, const vector &, const vector &) inlined, size=19 (inline) file main.cpp (20) | |||
29, Intensity = [symbolic], and not printable, try the -Mpfi -Mpfo options | |||
Loop not fused: different loop trip count | |||
33, Intensity = 1.00 | |||
Loop not vectorized: non-stride-1 array reference | |||
Loop not vectorized: mixed data types | |||
Loop unrolled 2 times | |||
54, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33) | |||
27, FMA (fused multiply-add) instruction(s) generated | |||
29, FMA (fused multiply-add) instruction(s) generated | |||
33, FMA (fused multiply-add) instruction(s) generated | |||
39, Intensity = 0.67 | |||
Loop not fused: different loop trip count | |||
Loop not vectorized: data dependency | |||
Generated vector simd code for the loop | |||
Loop unrolled 4 times | |||
FMA (fused multiply-add) instruction(s) generated | |||
56, dot(const vector &, const vector &) inlined, size=9 (inline) file main.cpp (21) | |||
27, Intensity = 1.00 | |||
Loop not fused: function call before adjacent loop | |||
Generated vector simd code for the loop containing reductions | |||
61, Intensity = 0.0 | |||
62, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33) | |||
39, Intensity = 0.0 | |||
Memory copy idiom, loop replaced by call to __c_mcopy8 | |||
65, dot(const vector &, const vector &) inlined, size=9 (inline) file main.cpp (21) | |||
27, Intensity = 1.00 | |||
Loop not fused: different loop trip count | |||
Generated vector simd code for the loop containing reductions | |||
67, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33) | |||
39, Intensity = 0.67 | |||
Loop not fused: different loop trip count | |||
Loop not vectorized: data dependency | |||
Generated vector simd code for the loop | |||
Loop unrolled 4 times | |||
72, matvec(const matrix &, const vector &, const vector &) inlined, size=19 (inline) file main.cpp (20) | |||
29, Intensity = [symbolic], and not printable, try the -Mpfi -Mpfo options | |||
Loop not fused: different loop trip count | |||
33, Intensity = 1.00 | |||
Loop not vectorized: non-stride-1 array reference | |||
Loop not vectorized: mixed data types | |||
Loop unrolled 2 times | |||
73, dot(const vector &, const vector &) inlined, size=9 (inline) file main.cpp (21) | |||
27, Intensity = 1.00 | |||
Loop not fused: different loop trip count | |||
Generated vector simd code for the loop containing reductions | |||
77, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33) | |||
39, Intensity = 0.67 | |||
Loop not fused: different loop trip count | |||
Loop not vectorized: data dependency | |||
Generated vector simd code for the loop | |||
Loop unrolled 4 times | |||
78, waxpby(double, const vector &, double, const vector &, const vector &) inlined, size=10 (inline) file main.cpp (33) | |||
39, Intensity = 0.67 | |||
Loop not fused: function call before adjacent loop | |||
Loop not vectorized: data dependency | |||
Generated vector simd code for the loop | |||
Loop unrolled 4 times | |||
88, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29) | |||
89, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29) | |||
90, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29) | |||
91, free_vector(vector &) inlined, size=2 (inline) file main.cpp (29) | |||
92, free_matrix(matrix &) inlined, size=5 (inline) file main.cpp (73) | |||
nvc++ main.o -o cg.x -fast -Minfo=all,intensity,ccff | |||
}} | }} | ||
<translate> | <translate> | ||
== Computational Intensity == <!--T:19--> | == Computational Intensity == <!--T:19--> | ||
Computational Intensity of a loop is a measure of how much work is being done compared to memory operations. | Computational Intensity of a loop is a measure of how much work is being done compared to memory operations. |