此为opencv中remap函数移植和分析,整理了双线性的插值部分的代码
尚未完全移植,但最困难的部分已经完成,而恰巧在这时,发现其实现并不是那么的令我满意,于是终止,改为自己实现。
考虑到以后可能会用到,暂且保存于此。
#include "./CCC.h" #ifndef __DCC_H__ #define __DCC_H__ #define saturate_cast_uchar(v) (uchar)((unsigned)(v) <= UCHAR_MAX ? (v) : (v) > 0 ? UCHAR_MAX : 0) #define saturate_cast_short(v) (short)((unsigned)((v) - SHRT_MIN) <= (unsigned)USHRT_MAX ? (v) : (v) > 0 ? SHRT_MAX : SHRT_MIN) #define INTER_REMAP_COEF_BITS 15 #define INTER_REMAP_COEF_SCALE 32768 //(1 << INTER_REMAP_COEF_BITS) #define SHIFT INTER_REMAP_COEF_BITS #define DELTA (1 << (INTER_REMAP_COEF_BITS - 1)) + ][][];//1024=tabsz2=INTER_TAB_SIZE2 ][] = (][])alignPtr(BilinearTab_iC4_buf, ); struct RemapVec_8u { int operator()(const Mat& _src, void* _dst, const short* XY, const ushort* FXY, const void* _wtab, int width) const { , sstep = (int)_src.step; ); ? (][][]; uchar* D = (uchar*)_dst; __m128i delta = _mm_set1_epi32(INTER_REMAP_COEF_SCALE / ); __m128i xy2ofs = _mm_set1_epi32(cn + (sstep << )); __m128i z = _mm_setzero_si128(); ) iofs0[], iofs1[]; ) { ; x += ) { __m128i xy0 = _mm_loadu_si128(()); __m128i xy1 = _mm_loadu_si128(( + )); __m128i v0, v1, v2, v3, a0, a1, b0, b1; unsigned i0, i1; xy0 = _mm_madd_epi16(xy0, xy2ofs); xy1 = _mm_madd_epi16(xy1, xy2ofs); _mm_store_si128((__m128i*)iofs0, xy0); _mm_store_si128((__m128i*)iofs1, xy1); i0 = *(]) + (*(]) << ); i1 = *(]) + (*(]) << ); v0 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1)); i0 = *(]) + (*(]) << ); i1 = *(]) + (*(]) << ); v1 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1)); v0 = _mm_unpacklo_epi8(v0, z); v1 = _mm_unpacklo_epi8(v1, z); a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(tabIJ + FXY[x] * )), _mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * ))); a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * )), _mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * ))); b0 = _mm_unpacklo_epi64(a0, a1); b1 = _mm_unpackhi_epi64(a0, a1); v0 = _mm_madd_epi16(v0, b0); v1 = _mm_madd_epi16(v1, b1); v0 = _mm_add_epi32(_mm_add_epi32(v0, v1), delta); i0 = *(]) + (*(]) << ); i1 = *(]) + (*(]) << ); v2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1)); i0 = *(]) + (*(]) << ); i1 = *(]) + (*(]) << ); v3 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1)); v2 = _mm_unpacklo_epi8(v2, z); v3 = _mm_unpacklo_epi8(v3, z); a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * )), _mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * ))); a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * )), _mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * ))); b0 = _mm_unpacklo_epi64(a0, a1); b1 = _mm_unpackhi_epi64(a0, a1); v2 = _mm_madd_epi16(v2, b0); v3 = _mm_madd_epi16(v3, b1); v2 = _mm_add_epi32(_mm_add_epi32(v2, v3), delta); v0 = _mm_srai_epi32(v0, INTER_REMAP_COEF_BITS); v2 = _mm_srai_epi32(v2, INTER_REMAP_COEF_BITS); v0 = _mm_packus_epi16(_mm_packs_epi32(v0, v2), z); _mm_storel_epi64((__m128i*)(D + x), v0); } } return x; } }; void remapBilinear1(Mat_<uchar> src, Mat_<uchar> dst, Mat& mapxy, Mat& mapa, short* tabIJ) { uchar* srcdata = (uchar*)src.data; uchar* dstdata = (uchar*)dst.data; vec2s* mapxydata = (vec2s*)mapxy.data; ushort* mapadata = (ushort*)mapa.data; , cols_ = cols - ; int i, j, io, jo, jprev, jprev_, joprev, joprev_, k; RemapVec_8u vecOp; , io = ; i < rows; ++i, io += cols) { uchar* D = dst.ptr<uchar>(i); bool prevInlier = false; jprev_ = ; joprev_ = io; , jo = io; j <= cols; ++j, ++jo) { ] < cols_ && (] < rows_ : !prevInlier; if (curInlier == prevInlier) continue; prevInlier = curInlier; jprev = jprev_; joprev = joprev_; jprev_ = j; joprev_ = jo; if (curInlier == false) { int len = vecOp(src, D, (short*)(mapxydata + joprev), mapadata + joprev, tabIJ, j - jprev); D += len; jprev += len; joprev += len; for (; jprev < j; jprev++, D++, ++joprev) { ]; ]; ; uchar* S = srcdata + sy*cols + sx; ] * w[] + S[] * w[] + S[cols] * w[] + S[cols + ] * w[]; *D = saturate_cast_uchar((val + DELTA) >> SHIFT); } } else { for (; jprev < j; ++jprev, ++D, ++joprev) { ]; ]; < || sy >= rows || sy + < )) { D[] = ; } else { int sx0, sx1, sy0, sy1; uchar v0, v1, v2, v3; ; sx0 = borderInterpolate(sx, cols, BORDER_CONSTANT); sx1 = borderInterpolate(sx + , cols, BORDER_CONSTANT); sy0 = borderInterpolate(sy, rows, BORDER_CONSTANT); sy1 = borderInterpolate(sy + , rows, BORDER_CONSTANT); v0 = sx0 >= && sy0 >= ? srcdata[sy0*cols + sx0] : ; v1 = sx1 >= && sy0 >= ? srcdata[sy0*cols + sx1] : ; v2 = sx0 >= && sy1 >= ? srcdata[sy1*cols + sx0] : ; v3 = sx1 >= && sy1 >= ? srcdata[sy1*cols + sx1] : ; ] + v1*w[] + v2*w[] + v3*w[]; D[] = saturate_cast_uchar((val + DELTA) >> SHIFT); } } } } } } void auremap(Mat_<uchar> src, Mat_<uchar> dst, Mat_<float> mapx, Mat_<float> mapy, int interpolation, int borderType = BORDER_CONSTANT, const Scalar& borderValue = Scalar()) { , cols_ = cols - ; , tabsz_= tabsz - , tabsz2 = ;//tabsz=INTER_TAB_SIZE, tabsz2=INTER_TAB_SIZE2 .f / tabsz, vy, v, tmp2; ][][], *tabIJ = tab2D[][]; ];//=8*tabsz //1.初化始化双线性插值表(在实际程序中应放到主循环外) #if 1 , io = -; i < tabsz; ++i) { tmp2 = i * scale; tab1D[++io] = .f - tmp2; //tab0=1, tab2=31/32, tab4=30/32, ..., tab62=01/32 tab1D[++io] = tmp2; //tab1=0, tab3=01/32, tab5=02/32, ..., tab63=31/32 } , io = ; i < tabsz; ++i, io += ) , jo = ; j < tabsz; ++j, jo += , tabIJ += ) { sum = ; , iio = ; ii < ; ++ii, iio += ) //tabi分别于tab0, tab1,...,tab63相乘后存放于tab2D, 存储顺序如(省略前缀tab1D和tab2D) { vy = tab1D[io + ii]; //0: 0*0=0, 0*1=1; 0*2=5, 0*3=6; ...; 0*62=124, 0*63=125; , jjo = iio; jj < ; ++jj, ++jjo)//1: 1*0=2, 1*1=3; 1*2=7, 1*3=8; ...; 1*62=126, 1*63=127; { //2: 2*0=128, 2*1=129; 2*2=132, 2*3=133; ...; 2*62=252, 2*63=253; v = vy*tab1D[jo + jj]; //3: 3*0=130, 3*1=131; 3*2=134, 3*3=135; ...; 3*62=254, 3*63=255; v = v*INTER_REMAP_COEF_SCALE; tmp1 = __round(v); sum += tabIJ[jjo] = saturate_cast_short(tmp1); ; } } if (sum != INTER_REMAP_COEF_SCALE) { diff = sum - INTER_REMAP_COEF_SCALE; mm = ; //int m1 = 1, m2 = 1; MM = ; //int M1 = 1, M2 = 1; , iio = ; ii < ; ++ii, iio +=) , jjo = iio + jj; jj < ; ++jj)//寻找最大最小值 tabIJ[jjo] < tabIJ[mm] ? mm = jjo : (tabIJ[jjo] > tabIJ[MM] ? MM = jjo : ); //if (tabIJ[jjo] < tabIJ[m1*2 + m2]) {m1 = ii; m2 = jj;} else if (tabIJ[jjo] > tabIJ[M1*2 + M2]) {M1 = ii; M2 = jj; } diff < ? tabIJ[MM] = (short)(tabIJ[MM] - diff): tabIJ[mm] = (short)(tabIJ[mm] - diff); //if (diff < 0) tabIJ[M1 * 2 + M2] = (short)(tabIJ[M1 * 2 + M2] - diff); else tabIJ[m1 * 2 + m2] = (short)(tabIJ[m1 * 2 + m2] - diff); } } #endif //2. tabIJ = tab2D[][]; float *mapxdata = (float*)mapx.data, *mapydata = (float*)mapy.data; Mat_<Vec2s> mapxy(rows, cols); vec2s* mapxydata = (vec2s*)mapxy.data; Mat_<ushort> mapa(rows, cols); ushort* mapadata = (ushort*)mapa.data; , io = ; i < rows; ++i, io += cols) { , jo = io; j < cols; ++j, ++jo) { int sx = cvRound(mapxdata[jo] * tabsz); int sy = cvRound(mapydata[jo] * tabsz); int v = (sy & tabsz_)*tabsz + (sx & tabsz_); mapxydata[jo].] = saturate_cast_short(sx >> INTER_BITS); mapxydata[jo].] = saturate_cast_short(sy >> INTER_BITS); mapadata[jo] = (ushort)v; } remapBilinear1(src, dst, mapxy, mapa, tabIJ); } } #endif