Developer Reference

Migrating OpenCL™ FPGA Designs to SYCL*

ID 767849
Date 5/08/2024
Public

SYCL Sample Code

main.cpp File

#include #include #include <numeric> #include <vector> #include <sycl/sycl.hpp> #include <sycl/ext/intel/fpga_extensions.hpp> using namespace sycl; // the number of bins in the histogram is constant constexpr int kNumBins = 10; // Forward declare the kernel names in the global scope to reduce name mangling class histogram; int main(int argc, char* argv[]) { // parse command line args uint count = 1000000; if (argc > 1) { count = atoi(argv[1]); } // host input and output memory std::vector<int> in_h(count); std::array<int, kNumBins> bins_h = {0}; std::array<int, kNumBins> bins_ref_h = {0}; // generate random input and compute the expected result std::generate(in_h.begin(), in_h.end(), [] { return rand() % 100; }); for (auto& x : in_h) { bins_ref_h[x % kNumBins]++; }; // the device selector #ifdef FPGA_EMULATOR ext::intel::fpga_emulator_selector selector; #else ext::intel::fpga_selector selector; #endif // create the device queue queue q(selector); try { // create SYCL buffers for inputs and outputs // providing host pointers (in this case, std::vectors and std::array) // allows the runtime to automatically migrate input and output data // to and from the device on demand buffer in_buf(in_h); buffer bins_buf(bins_h); // launch the kernel event kernel_event = q.submit([&](handler& h) { // get accessors to the SYCL buffers // 'no_init' tells the runtime that we don't care about the initial // contents of the output (z) and avoids copying the output from host to // device before launching the kernel. accessor in(in_buf, h, read_only); accessor bins(bins_buf, h, write_only, no_init); h.single_task<histogram>([=]() [[intel::kernel_args_restrict]] { // store a local copy of the histogram to avoid read-accumulate-writes // to global memory [[intel::fpga_register]] int bins_local[kNumBins]; // initialize the local bins #pragma unroll for (uint i = 0; i < kNumBins; i++) { bins_local[i] = 0; } // compute the histogram [[intel::initiation_interval(1)]] for (uint i = 0; i < count; i++) { bins_local[in[i] % kNumBins]++; } // write back the local copy to global memory #pragma unroll for (uint i = 0; i < kNumBins; i++) { bins[i] = bins_local[i]; } }); }); } catch (exception const& e) { std::cout << "Caught a synchronous SYCL exception: " << e.what() << "\n"; std::terminate(); } // Exiting the try-catch scope will cause the buffer destructors to be called // which will result in an implicit 'wait' on the kernel to finish (since // the kernel uses the buffers). // Therefore, at this point in the code, we know the kernel has finished // and the data has been transferred back to the host. // Since x_buf and y_buf are only accessed with 'read_only' buffers, // the runtime will not copy them back from the device. // validate the results bool passed = std::equal(bins_h.begin(), bins_h.end(), bins_ref_h.begin()); if (passed) { printf("PASSED\n"); } else { printf("FAILED\n"); } return passed; }

Makefile

BOARD=intel_a10gx_pac:pac_a10 fpga_emu: main.cpp icpx -fsycl -fintelfpga -DFPGA_EMULATOR main.cpp -o main.fpga_emu report: main.cpp icpx -fsycl -fintelfpga -Xshardware -Xstarget=$(BOARD) -fsycl-link=early main.cpp -o main_report.a fpga: main.cpp icpx -fsycl -fintelfpga -Xshardware -Xstarget=$(BOARD) -reuse-exe=main.fpga main.cpp -o main.fpga clean: rm -rf *.o *.a *.prj