Intel® C++ Compiler Classic Developer Guide and Reference

ID 767249
Date 3/31/2023
Public

A newer version of this document is available. Customers should click here to go to the newest version.

Document Table of Contents

vector_variant

Specifies a vector variant function that corresponds to its original C/C++ scalar function. This vector variant function can be invoked under vector context at call sites.

Syntax

Windows* OS:

__declspec(vector_variant(clauses))

Linux* OS:

__attribute__((vector_variant(clauses)))

Arguments

clauses

Is the following:

implements clause, in the form implements (<function declarator>) [, <simd-clauses>]), where function declarator is the original scalar function, and simd-clauses is one or more of the clauses allowed for the vector attribute. The simd-clauses are optional.

Description

This attribute provides a means for programmers to describe the association between the vector variant function and its corresponding scalar function. The compiler will use the vector variant to replace the scalar call for a vectorized loop.

The following are restrictions for this attribute:

  • A vector variant function can have only one vector_variant annotation.

  • A vector variant annotation can have only one implements clause.

  • A vector variant annotation applies to only one vector variant function, which must not have both mask and nomask clauses specified. It can be specified with either mask or nomask; the default is nomask.

  • A vector variant function should have the __regcall attribute.

If the user-defined vector variant function is a variant with mask, the mask argument should be the last argument.

Example

The following shows an example of a vector variant function:

#include <immintrin.h>
__declspec(noinline)
float MyAdd(float* a, int b) { return *a + b; }
__declspec(vector_variant(implements(MyAdd(float *a, int b)),       
                          linear(a), vectorlength(8),
                          nomask, processor(core_2nd_gen_avx)))
__m256 __regcall MyAddVec(float* v_a, __m128i v_b, __m128i v_b2) {
  __m256i t96 = _mm256_castsi128_si256(v_b);
  __m256i tmp = _mm256_insertf128_si256(t96, v_b2, 1);
  __m256  t95 = _mm256_cvtepi32_ps(tmp);
  return _mm256_add_ps(*((__m256*)v_a), t95);
}
float x[2000], y[2000];
float foo(float y[]) {
#pragma omp simd
  for (int k=0; k< 2000; k++) {
    x[k] = MyAdd(&y[k], k);
  }
  return x[0] + x[1999];

If the return value contains more than one register, the following technique can be used for the correct definition of the function:

#include <immintrin.h>

typedef struct {
 __m256d r1;
 __m256d r2;
} __m256dx2;

__declspec(noinline)
double MyAdd(double* a, int b) { return *a + b; }

__declspec(vector_variant(implements(MyAdd(double *a, int b)),       
                          linear(a), vectorlength(8),
                          nomask, processor(core_2nd_gen_avx)))
__m256dx2 __regcall MyAddVec(double* v_a, __m128i v_b, __m128i v_b2) {
  __m256d t1 = _mm256_cvtepi32_pd(v_b);
  __m256d t2 = _mm256_cvtepi32_pd(v_b2);
  __m256dx2 ret;
  ret.r1 = _mm256_mul_pd(t1,*((__m256d*)v_a));
  ret.r2 = _mm256_mul_pd(t2,*(((__m256d*)v_a)+1));
  return ret;
}

__declspec(align(32)) double x[2000], y[2000];
double foo(double* y) {
#pragma omp simd
  for (int k=0; k< 2000; k++) {
    x[k] = MyAdd(y, k);
    y++;
  }
  return x[0] + x[1999];
}

See Also