Developer Reference

Migrating OpenCL™ FPGA Designs to SYCL*

ID 767849
Date 5/08/2024
Public

SYCL Sample Code With Explicit Data Movement

main.cpp File

#include #include #include <numeric> #include <vector> #include <sycl/sycl.hpp> #include <sycl/ext/intel/fpga_extensions.hpp> using namespace sycl; // the number of bins in the histogram is constant constexpr int kNumBins = 10; // Forward declare the kernel names in the global scope to reduce name mangling class histogram; int main(int argc, char* argv[]) { // parse command line args uint count = 1000000; if (argc > 1) { count = atoi(argv[1]); } // host input and output memory std::vector<int> in_h(count); std::array<int, kNumBins> bins_h = {0}; std::array<int, kNumBins> bins_ref_h = {0}; // generate random input and compute the expected result std::generate(in_h.begin(), in_h.end(), [] { return rand() % 100; }); for (auto& x : in_h) { bins_ref_h[x % kNumBins]++; }; // the device selector #ifdef FPGA_EMULATOR ext::intel::fpga_emulator_selector selector; #else ext::intel::fpga_selector selector; #endif // create the device queue queue q(selector); // allocate memory on the device int *in_d, *bins_d; if ((in_d = malloc_device<int>(count, q)) == nullptr) { std::cerr << "ERROR: could not allocate space for 'in_d'\n"; std::terminate(); } if ((bins_d = malloc_device<int>(kNumBins, q)) == nullptr) { std::cerr << "ERROR: could not allocate space for 'bins_d'\n"; std::terminate(); } try { // copy input to device (blocking, using .wait() on the returned event) q.memcpy(in_d, in_h.data(), count*sizeof(int)).wait(); q.memcpy(bins_d, bins_h.data(), kNumBins*sizeof(int)).wait(); // launch the kernel event kernel_event = q.single_task<histogram>([=]() [[intel::kernel_args_restrict]] { // inform the compiler that the pointer lives on the device device_ptr<int> in(in_d); device_ptr<int> bins(bins_d); // store a local copy of the histogram to avoid read-accumulate-writes // to global memory [[intel::fpga_register]] int bins_local[kNumBins]; // initialize the local bins #pragma unroll for (uint i = 0; i < kNumBins; i++) { bins_local[i] = 0; } // compute the histogram [[intel::initiation_interval(1)]] for (uint i = 0; i < count; i++) { bins_local[in[i] % kNumBins]++; } // write back the local copy to global memory #pragma unroll for (uint i = 0; i < kNumBins; i++) { bins[i] = bins_local[i]; } }); // wait for the kernel to finish kernel_event.wait(); // copy the output back from the device q.memcpy(bins_h.data(), bins_d, kNumBins*sizeof(int)).wait(); } catch (exception const& e) { std::cout << "Caught a synchronous SYCL exception: " << e.what() << "\n"; std::terminate(); } // validate the results bool passed = std::equal(bins_h.begin(), bins_h.end(), bins_ref_h.begin()); if (passed) { printf("PASSED\n"); } else { printf("FAILED\n"); } // free the allocated device memory sycl::free(in_d, q); sycl::free(bins_d, q); return passed; }
BOARD=intel_a10gx_pac:pac_a10 fpga_emu: main.cpp dpcpp -fintelfpga -DFPGA_EMULATOR main.cpp -o main.fpga_emu report: main.cpp dpcpp -fintelfpga -Xshardware -Xstarget=$(BOARD) -fsycl-link=early main.cpp -o main_report.a fpga: main.cpp dpcpp -fintelfpga -Xshardware -Xstarget=$(BOARD) -reuse-exe=main.fpga main.cpp -o main.fpga clean: rm -rf *.o *.a *.prj