Visible to Intel only — GUID: GUID-4E6F1716-D044-49A6-86D6-E15F8F1899FF
DPCT1000
DPCT1001
DPCT1002
DPCT1003
DPCT1004
DPCT1005
DPCT1006
DPCT1007
DPCT1008
DPCT1009
DPCT1010
DPCT1011
DPCT1012
Message
Detailed Help
Suggestions to Fix
DPCT1013
DPCT1014
DPCT1015
DPCT1016
DPCT1017
DPCT1018
DPCT1019
DPCT1020
DPCT1021
DPCT1022
DPCT1023
DPCT1024
DPCT1025
DPCT1026
DPCT1027
DPCT1028
DPCT1029
DPCT1030
DPCT1031
DPCT1032
DPCT1033
DPCT1034
DPCT1035
DPCT1036
DPCT1037
DPCT1038
DPCT1039
DPCT1040
DPCT1041
DPCT1042
DPCT1043
DPCT1044
DPCT1045
DPCT1046
DPCT1047
DPCT1048
DPCT1049
DPCT1050
DPCT1051
DPCT1052
DPCT1053
DPCT1054
DPCT1055
DPCT1056
DPCT1057
DPCT1058
DPCT1059
DPCT1060
DPCT1061
DPCT1062
DPCT1063
DPCT1064
DPCT1065
DPCT1066
DPCT1067
DPCT1068
DPCT1069
DPCT1070
DPCT1071
DPCT1072
DPCT1073
DPCT1074
DPCT1075
DPCT1076
DPCT1077
DPCT1078
DPCT1079
DPCT1080
DPCT1081
DPCT1082
DPCT1083
DPCT1084
DPCT1085
DPCT1086
DPCT1087 [UPDATE]
DPCT1088
DPCT1089
DPCT1090
DPCT1091
DPCT1092
DPCT1093
DPCT1094
DPCT1095
DPCT1096
DPCT1097
DPCT1098
DPCT1099
DPCT1100
DPCT1101
DPCT1102
DPCT1103
DPCT1104
DPCT1105
DPCT1106
DPCT1107
DPCT1108
DPCT1109
DPCT1110
DPCT1111
DPCT1112
DPCT1113
DPCT1114
DPCT1115
DPCT1116
DPCT1117
DPCT1118
DPCT1119
DPCT1120
DPCT1121
DPCT1122
DPCT1123
DPCT1124
DPCT1125
DPCT1126
DPCT1127
DPCT1128
DPCT1129
DPCT1130
DPCT1131
DPCT1132
DPCT2001
DPCT3000
DPCT3001
DPCT3002
Visible to Intel only — GUID: GUID-4E6F1716-D044-49A6-86D6-E15F8F1899FF
DPCT1012
Message
Detected kernel execution time measurement pattern and generated an initial code for time measurements in SYCL. You can change the way time is measured depending on your goals.
Detailed Help
The generated code uses the CPU time to measure the kernel execution time. You can change the way time is measured depending on your requirements.
Suggestions to Fix
Review the logic and adjust it as needed.
For example, this original CUDA* code:
__global__ void kernel() {
...
}
void foo() {
cudaEvent_t start;
cudaEvent_t end;
cudaEventCreate(&start);
cudaEventCreate(&end);
cudaEventRecord(start);
kernel<<<1, 1>>>();
cudaEventRecord(end, 0);
cudaEventSynchronize(end);
float time;
cudaEventElapsedTime(&time, start, end);
}
results in the following migrated SYCL* code:
void kernel() {
...
}
void foo() {
dpct::device_ext &dev_ct1 = dpct::get_current_device();
sycl::queue &q_ct1 = dev_ct1.default_queue();
dpct::event_ptr start;
std::chrono::time_point<std::chrono::steady_clock> start_ct1;
dpct::event_ptr end;
std::chrono::time_point<std::chrono::steady_clock> end_ct1;
start = new sycl::event();
end = new sycl::event();
/*
DPCT1012:0: Detected kernel execution time measurement pattern and generated
an initial code for time measurements in SYCL. You can change the way time is
measured depending on your goals.
*/
start_ct1 = std::chrono::steady_clock::now();
*end = q_ct1.parallel_for(
sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
[=](sycl::nd_item<3> item_ct1) {
kernel();
});
/*
DPCT1012:1: Detected kernel execution time measurement pattern and generated
an initial code for time measurements in SYCL. You can change the way time is
measured depending on your goals.
*/
end->wait();
end_ct1 = std::chrono::steady_clock::now();
float time;
time = std::chrono::duration<float, std::milli>(end_ct1 - start_ct1).count();
}
which is rewritten to:
// User can add `--enable-profiling` option to migrate the code
void kernel() {
...
}
void foo() {
dpct::device_ext &dev_ct1 = dpct::get_current_device();
sycl::queue &q_ct1 = dev_ct1.default_queue();
dpct::event_ptr start;
dpct::event_ptr end;
start = new sycl::event();
end = new sycl::event();
*start = q_ct1.ext_oneapi_submit_barrier();
q_ct1.parallel_for(
sycl::nd_range<3>(sycl::range<3>(1, 1, 1), sycl::range<3>(1, 1, 1)),
[=](sycl::nd_item<3> item_ct1) {
kernel();
});
*end = q_ct1.ext_oneapi_submit_barrier();
end->wait_and_throw();
float time;
time =
(end->get_profiling_info<sycl::info::event_profiling::command_end>() -
start
->get_profiling_info<sycl::info::event_profiling::command_start>()) /
1000000.0f;
}