Video and Vision Processing Suite Intel® FPGA IP User Guide

ID 683329
Date 9/30/2021
Public

A newer version of this document is available. Customers should click here to go to the newest version.

Document Table of Contents

Warp IP Software Code Examples

UHD 60 Hz Workflow example

This example shows the workflow and basic warp software usage of the C++ source code to generate and apply 15 degree rotation warp. The example is for 3840x2160@60Hz video, which requires the processing to be split between two warp engines. The framebuffer and warp coefficient base addresses in the example are arbitrary. Actual values depend on your particular system design.
const uint32_t FRAMEBUF_BASE_ADDR	= 0x80000000;
const uint32_t COEF_BASE_ADDR		= 0xa0000000;
intel_vvp_warp_base_t base = INTEL_VVP_WARP_BASE;
intel_vvp_warp_instance_t wrp0;

// Warp data sizes should be multiples of 256kb
auto align_256k = [](const uint32_t addr)->uint32_t
{
	static const uint32_t DATA_SIZE_256KB = (256 * 1024);
	return ((addr + DATA_SIZE_256KB - 1) & ~(DATA_SIZE_256KB - 1));
};

// Initialize driver instance
intel_vvp_warp_init_instance(&wrp0, base);

assert(wrp0.num_engines > 1);

intel_vvp_warp_channel_t* ch0 = intel_vvp_warp_create_double_channel(&wrp0, 0, 0, 1, 0);

// Fill in warp channel configuration structure
intel_vvp_warp_channel_config_t cfg;

cfg.ram_addr = FRAMEBUF_BASE_ADDR;	// Frame buffers base address
cfg.cs = ERGB_FULL;						// Video colourspace
cfg.scan = EMEGABLOCK;					// Scan pattern
cfg.width_input = 3840;					// Video dimensions
cfg.height_input = 2160;
cfg.width_output = 3840;
cfg.height_output = 2160;
cfg.bypass = 0;							// Disable warp bypass
cfg.lfr = 0;								// No low frame rate fallback

// Configure warp channel using the parameters above
intel_vvp_warp_configure_channel(ch0, &cfg);

// Instantiate and initialize mesh generator
WarpConfigurator configurator;
configurator.SetInputResolution(3840, 2160);
configurator.SetOutputResolution(3840, 2160);
configurator.Reset();
configurator.SetRotate(15.0f);

// Generate mesh
WarpMeshPtr mesh = configurator.GenerateMeshFromFixed();
WarpMeshSet mesh_set{ mesh };

// Instantiate data generator
WarpDataGenerator data_generator;

// Obtain required hardware information
WarpHwContextPtr hw = WarpDataHelper::GetHwContext(ch0);

WarpDataContext ctx{
	hw,
	3840, 2160,
	3840, 2160
};

// Generate warp data using provided hardware configuration and mesh
WarpDataPtr user_data = data_generator.GenerateData(ctx, mesh_set);

// Allocate and fill in intel_vvp_warp_data_t object for the required number of engines
const uint32_t warp_data_size = sizeof(intel_vvp_warp_data_t) + user_data->_engines * sizeof(intel_vvp_warp_engine_data_t);
intel_vvp_warp_data_t* warp_data = (intel_vvp_warp_data_t*)malloc(warp_data_size);

assert(user_data->_engines > 1);

warp_data->num_engines = user_data->_engines;
intel_vvp_warp_engine_data_t* engine_data = warp_data->engine_data;
const uint32_t mesh_stride = ctx._engine_mesh_stride / 4 - 1; // Mesh nodes in multiples of 4 less 1

// Processing is split between two engines
// 1st engine processes left half of the frame
{
	engine_data[0].start_h = 0;
	engine_data[0].start_v = 0;
	engine_data[0].end_h = ctx._engine_hblocks - 1;
	engine_data[0].end_v = ctx._vblocks_out - 1;
	engine_data[0].mesh_stride = mesh_stride;

	const uint32_t mesh_data_size = user_data->_engine_data[0]->_mesh_entries * sizeof(mesh_entry_t);
	const uint32_t filter_data_size = user_data->_engine_data[0]->_filter_entries * sizeof(filter_entry_t);
	const uint32_t fetch_data_size = user_data->_engine_data[0]->_fetch_entries * sizeof(fetch_entry_t);

	// Point engine to the location of the mesh, filter and fetch data
	engine_data[0].mesh_addr = COEF_BASE_ADDR;
	engine_data[0].filter_addr = engine_data[0].mesh_addr + align_256k(mesh_data_size);
	engine_data[0].fetch_addr = engine_data[0].filter_addr + align_256k(filter_data_size);

	// Transfer generated warp data to the calculated destination
	memcpy((void*)(engine_data[0].mesh_addr), user_data->_engine_data[0]->_mesh_data,		mesh_data_size);
	memcpy((void*)(engine_data[0].filter_addr), user_data->_engine_data[0]->_filter_data,	filter_data_size);
	memcpy((void*)(engine_data[0].fetch_addr), user_data->_engine_data[0]->_fetch_data,	fetch_data_size);
}
// 2nd engine - right half of the frame
{
	engine_data[1].start_h = ctx._engine_hblocks;
	engine_data[1].start_v = 0;
	engine_data[1].end_h = ctx._hblocks_out - 1;
	engine_data[1].end_v = ctx._vblocks_out - 1;
	engine_data[1].mesh_stride = mesh_stride;

	const uint32_t mesh_data_size = user_data->_engine_data[1]->_mesh_entries * sizeof(mesh_entry_t);
	const uint32_t filter_data_size = user_data->_engine_data[1]->_filter_entries * sizeof(filter_entry_t);
	const uint32_t fetch_data_size = user_data->_engine_data[1]->_fetch_entries * sizeof(fetch_entry_t);

	// Point engine to the location of the mesh, filter and fetch data
	engine_data[1].mesh_addr = engine_data[0].fetch_addr + align_256k(user_data->_engine_data[0]->_fetch_entries * sizeof(fetch_entry_t));
	engine_data[1].filter_addr = engine_data[1].mesh_addr + align_256k(mesh_data_size);
	engine_data[1].fetch_addr = engine_data[1].filter_addr + align_256k(filter_data_size);

	// Transfer generated warp data to the calculated destination
	memcpy((void*)(engine_data[1].mesh_addr), user_data->_engine_data[1]->_mesh_data,		mesh_data_size);
	memcpy((void*)(engine_data[1].filter_addr), user_data->_engine_data[1]->_filter_data,	filter_data_size);
	memcpy((void*)(engine_data[1].fetch_addr), user_data->_engine_data[1]->_fetch_data,	fetch_data_size);
}

warp_data->_skip_megablock_data = user_data->_skip_megablock_data;
warp_data->_skip_ram_page = 0;

// Apply warp by passing new warp data set to the driver
intel_vvp_warp_apply_transform(ch0, warp_data);

// Release allocated resources
free(warp_data);
intel_vvp_warp_free_channel(ch0);

Full HD up to UHD@30 Hz warp channel allocation

Input video streams in full HD and up to UHD@30Hz formats require a single warp engine for processing. The example shows how to allocate a warp channel for such use cases:
intel_vvp_warp_base_t base = INTEL_VVP_WARP_BASE;
intel_vvp_warp_instance_t wrp0;

// Initialize driver instance
intel_vvp_warp_init_instance(&wrp0, base);

assert(wrp0.num_engines > 0);

uint32_t input = 0, output = 0;
uint32_t engine = 0;

intel_vvp_warp_channel_t* ch0 = intel_vvp_warp_create_channel(&wrp0, input, engine, output);

//
//	Application code here
//
intel_vvp_warp_free_channel(ch0);

Warp mesh usage

Define required warp using the WarpMesh object. The example shows the simplest case of 1:1 (unity) warp for a 3840x2160 video.

intel_vvp_warp::WarpMesh mesh{3840, 2160};

for(uint32_t v = 0; v < mesh.GetVNodes(); ++v)
{
	mesh_node_t* node = mesh.GetRow(v);

	for(uint32_t h = 0; h < mesh.GetHNodes(); ++h)
	{
					  node->_x = (h * mesh.GetStep()) << 4;
		  node->_y = (v * mesh.GetStep()) << 4;
	}
}

Mesh coordinates use the least significant four bits as fractional part for subpixel precision. In the example above the fractional part is always 0. Store subpixel positions in the following way:

mesh_node_t* node = mesh.GetRow(v);
…
float pos_x = 10.6f;
node->_x = static_cast<int32_t>(roundf(pos_x * 16.0f));