SYCL Sample Code With Explicit Data Movement
main.cpp File
#include <algorithm>
#include <array>
#include <numeric>
#include <vector>
#include <sycl/sycl.hpp>
#include <sycl/ext/intel/fpga_extensions.hpp>
using namespace sycl;
// the number of bins in the histogram is constant
constexpr int kNumBins = 10;
// Forward declare the kernel names in the global scope to reduce name mangling
class histogram;
int main(int argc, char* argv[]) {
// parse command line args
uint count = 1000000;
if (argc > 1) {
count = atoi(argv[1]);
}
// host input and output memory
std::vector<int> in_h(count);
std::array<int, kNumBins> bins_h = {0};
std::array<int, kNumBins> bins_ref_h = {0};
// generate random input and compute the expected result
std::generate(in_h.begin(), in_h.end(), [] { return rand() % 100; });
for (auto& x : in_h) { bins_ref_h[x % kNumBins]++; };
// the device selector
#ifdef FPGA_EMULATOR
ext::intel::fpga_emulator_selector selector;
#else
ext::intel::fpga_selector selector;
#endif
// create the device queue
queue q(selector);
// allocate memory on the device
int *in_d, *bins_d;
if ((in_d = malloc_device<int>(count, q)) == nullptr) {
std::cerr << "ERROR: could not allocate space for 'in_d'\n";
std::terminate();
}
if ((bins_d = malloc_device<int>(kNumBins, q)) == nullptr) {
std::cerr << "ERROR: could not allocate space for 'bins_d'\n";
std::terminate();
}
try {
// copy input to device (blocking, using .wait() on the returned event)
q.memcpy(in_d, in_h.data(), count*sizeof(int)).wait();
q.memcpy(bins_d, bins_h.data(), kNumBins*sizeof(int)).wait();
// launch the kernel
event kernel_event =
q.single_task<histogram>([=]() [[intel::kernel_args_restrict]] {
// inform the compiler that the pointer lives on the device
device_ptr<int> in(in_d);
device_ptr<int> bins(bins_d);
// store a local copy of the histogram to avoid read-accumulate-writes
// to global memory
[[intel::fpga_register]] int bins_local[kNumBins];
// initialize the local bins
#pragma unroll
for (uint i = 0; i < kNumBins; i++) {
bins_local[i] = 0;
}
// compute the histogram
[[intel::initiation_interval(1)]]
for (uint i = 0; i < count; i++) {
bins_local[in[i] % kNumBins]++;
}
// write back the local copy to global memory
#pragma unroll
for (uint i = 0; i < kNumBins; i++) {
bins[i] = bins_local[i];
}
});
// wait for the kernel to finish
kernel_event.wait();
// copy the output back from the device
q.memcpy(bins_h.data(), bins_d, kNumBins*sizeof(int)).wait();
} catch (exception const& e) {
std::cout << "Caught a synchronous SYCL exception: " << e.what() << "\n";
std::terminate();
}
// validate the results
bool passed = std::equal(bins_h.begin(), bins_h.end(), bins_ref_h.begin());
if (passed) {
printf("PASSED\n");
} else {
printf("FAILED\n");
}
// free the allocated device memory
sycl::free(in_d, q);
sycl::free(bins_d, q);
return passed;
}
BOARD=intel_a10gx_pac:pac_a10
fpga_emu: main.cpp
dpcpp -fintelfpga -DFPGA_EMULATOR main.cpp -o main.fpga_emu
report: main.cpp
dpcpp -fintelfpga -Xshardware -Xstarget=$(BOARD) -fsycl-link=early main.cpp -o main_report.a
fpga: main.cpp
dpcpp -fintelfpga -Xshardware -Xstarget=$(BOARD) -reuse-exe=main.fpga main.cpp -o main.fpga
clean:
rm -rf *.o *.a *.prj
Parent topic: Histogram Design Example Walkthrough