Visible to Intel only — GUID: GUID-95976E75-18AF-4A0E-8C66-B6E62A4CF7E6
Visible to Intel only — GUID: GUID-95976E75-18AF-4A0E-8C66-B6E62A4CF7E6
GAP Message (Diagnostic ID 30538)
Message
Moving the block of code that consists of a function-call (line %d), if-condition (line %d), and an early return (line %d) to outside the loop may enable parallelization of the loop at line %d.
Advice
Move the function call and an associated return from inside the loop (perhaps by inserting them before the loop) to help parallelize the loop.
This kind of function-leading-to-return inside a loop usually handles some error-condition inside the loop. If this error check can be done before starting the execution of the loop without changing the program semantics, the compiler may be able to parallelize the loop thus improving performance.
Example
Consider the following:
extern int num_nodes;
typedef struct TEST_STRUCT {
// Coordinates of city1
float latitude1;
float longitude1;
// Coordinates of city2
float latitude2;
float longitude2;
} test_struct;
extern int *mark_larger;
extern float *distances, **matrix;
extern test_struct** nodes;
extern test_struct ***files;
extern void init_node(test_struct *node, int i);
extern void process_nodes(void);
float compute_max_distance(void);
extern int check_error_condition(int width);
#include <math.h>
#include <stdio.h>
void process_nodes(int width)
{
float const R = 3964.0;
float temp, lat1, lat2, long1, long2, result, pat2;
int m, j, temp1 = num_nodes;
nodes = files[0];
m = 1;
#pragma loop count min(4)
#pragma parallel
for (int k=0; k < temp1; k++) {
if (check_error_condition(width)) {
return;
}
lat1 = nodes[k]->latitude1;
lat2 = nodes[k]->latitude2;
long1 = nodes[k]->longitude1;
long2 = nodes[k]->longitude2;
// Compute the distance between the two cities
temp = sin(lat1) * sin(lat2) + cos(lat1) * cos(lat2) *
cos(long1-long2);
result = 2.0 * R * atan(sqrt((1.0-temp)/(1.0+temp)));
pat2 = 0;
for(j=0; j<width; j++) {
pat2 += distances[j];
matrix[k][j] = distances[k]+j;
}
// Store the distance computed in the distances array
if (result > distances[k]) {
distances[k] = result + pat2;
}
}
}
In this case, the compiler is unable to parallelize the loop at line 38.
If you determine it is safe to do so, you can modify the above code as follows:
extern int num_nodes;
typedef struct TEST_STRUCT {
// Coordinates of city1
float latitude1;
float longitude1;
// Coordinates of city2
float latitude2;
float longitude2;
} test_struct;
extern int *mark_larger;
extern float *distances, **matrix;
extern test_struct** nodes;
extern test_struct ***files;
extern void init_node(test_struct *node, int i);
extern void process_nodes(void);
float compute_max_distance(void);
extern int check_error_condition(int width);
#include <math.h>
#include <stdio.h>
void process_nodes(int width) {
float const R = 3964.0;
float temp, lat1, lat2, long1, long2, result, pat2;
int m, j, temp1 = num_nodes;
nodes = files[0];
m = 1;
if (check_error_condition(width)) {
return;
}
#pragma loop count min(4)
#pragma parallel
for (int k=0; k < temp1; k++) {
lat1 = nodes[k]->latitude1;
lat2 = nodes[k]->latitude2;
long1 = nodes[k]->longitude1;
long2 = nodes[k]->longitude2;
// Compute the distance between the two cities
temp = sin(lat1) * sin(lat2) + cos(lat1) * cos(lat2) *
cos(long1-long2);
result = 2.0 * R * atan(sqrt((1.0-temp)/(1.0+temp)));
pat2 = 0;
for(j=0; j<width; j++) {
pat2 += distances[j];
matrix[k][j] = distances[k]+j;
}
// Store the distance computed in the distances array
if (result > distances[k]) {
distances[k] = result + pat2;
}
}
}
Verify
Confirm that the function call does not rely on any computation inside the loop and that restructuring the code as suggested above, retains the original program semantics.