The conclusion is:
- NEON intrinsics support converting a float data type to int32 regarding a Q value. The reverse is also supported.
- Neon intrinsics already do the clip operation to float value outside of [-1, 1]:!!
- The issue cost 1 Cycles per 32x2 on cortex-a9.
Source code:
#include "arm_neon.h"
void
test_float_int32_converter()
{
const
int
q = 31;
float
f[] = {-1.0, -0.5, 0, 0.5};
int32_t i[] = {0, 0, 0, 0};
float32x4_t vf = vld1q_f32(&f[0]);
int32x4_t vi = vcvtq_n_s32_f32(vf, q);
vst1q_s32(&i[0], vi);
printf
(
"Q=%d\n"
, q);
printf
(
"src float 32x4: \t%f,\t%f,\t%f,\t%f\n"
, f[0], f[1], f[2], f[3]);
printf
(
"dst int 32x4: \t%8.8x,\t%8.8x,\t%8.8x,\t%8.8x\n"
, i[0], i[1], i[2], i[3]);
vf = vcvtq_n_f32_s32(vi, q);
vst1q_f32(&f[0], vf);
printf
(
"dst float 32x4: \t%f,\t%f,\t%f,\t%f\n"
, f[0], f[1], f[2], f[3]);
return
;
}
|
build:
armcc --arm_linux_paths --arm_linux_config_file=
/home/jxion/arm_linux_config
.xml main.c --cpu=Cortex-A9 -o
test
.exe
|
Result on Pandaboard:
Q=31
src float 32x4: -1.000000, -0.500000, 0.000000, 0.500000
dst int 32x4: 80000000, c0000000, 00000000, 40000000
dst float 32x4: -1.000000, -0.500000, 0.000000, 0.500000
|
Q=31
src float 32x4: -1.200000, -0.500000, 1.000000, 1.500000
dst int 32x4: 80000000, c0000000, 7fffffff, 7fffffff
dst float 32x4: -1.000000, -0.500000, 1.000000, 1.000000