图像插值:OpenCV_remap

时间:2023-03-08 16:09:27

此为opencv中remap函数移植和分析,整理了双线性的插值部分的代码
尚未完全移植,但最困难的部分已经完成,而恰巧在这时,发现其实现并不是那么的令我满意,于是终止,改为自己实现。
考虑到以后可能会用到,暂且保存于此。

#include "./CCC.h"

#ifndef __DCC_H__
#define __DCC_H__

#define saturate_cast_uchar(v) (uchar)((unsigned)(v) <= UCHAR_MAX ? (v) : (v) > 0 ? UCHAR_MAX : 0)
#define saturate_cast_short(v) (short)((unsigned)((v) - SHRT_MIN) <= (unsigned)USHRT_MAX ? (v) : (v) > 0 ? SHRT_MAX : SHRT_MIN)

#define INTER_REMAP_COEF_BITS 15
#define INTER_REMAP_COEF_SCALE 32768 //(1 << INTER_REMAP_COEF_BITS)
#define SHIFT INTER_REMAP_COEF_BITS
#define DELTA (1 << (INTER_REMAP_COEF_BITS - 1))

 + ][][];//1024=tabsz2=INTER_TAB_SIZE2
][] = (][])alignPtr(BilinearTab_iC4_buf, );

struct RemapVec_8u
{
    int operator()(const Mat& _src, void* _dst, const short* XY, const ushort* FXY, const void* _wtab, int width) const
    {
        , sstep = (int)_src.step;

        );
         ? (][][];
        uchar* D = (uchar*)_dst;
        __m128i delta = _mm_set1_epi32(INTER_REMAP_COEF_SCALE / );
        __m128i xy2ofs = _mm_set1_epi32(cn + (sstep << ));
        __m128i z = _mm_setzero_si128();
        ) iofs0[], iofs1[];

        )
        {
            ; x += )
            {
                __m128i xy0 = _mm_loadu_si128(());
                __m128i xy1 = _mm_loadu_si128(( + ));
                __m128i v0, v1, v2, v3, a0, a1, b0, b1;
                unsigned i0, i1;

                xy0 = _mm_madd_epi16(xy0, xy2ofs);
                xy1 = _mm_madd_epi16(xy1, xy2ofs);
                _mm_store_si128((__m128i*)iofs0, xy0);
                _mm_store_si128((__m128i*)iofs1, xy1);

                i0 = *(]) + (*(]) << );
                i1 = *(]) + (*(]) << );
                v0 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
                i0 = *(]) + (*(]) << );
                i1 = *(]) + (*(]) << );
                v1 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
                v0 = _mm_unpacklo_epi8(v0, z);
                v1 = _mm_unpacklo_epi8(v1, z);

                a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(tabIJ + FXY[x] * )),
                    _mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * )));
                a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * )),
                    _mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * )));
                b0 = _mm_unpacklo_epi64(a0, a1);
                b1 = _mm_unpackhi_epi64(a0, a1);
                v0 = _mm_madd_epi16(v0, b0);
                v1 = _mm_madd_epi16(v1, b1);
                v0 = _mm_add_epi32(_mm_add_epi32(v0, v1), delta);

                i0 = *(]) + (*(]) << );
                i1 = *(]) + (*(]) << );
                v2 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
                i0 = *(]) + (*(]) << );
                i1 = *(]) + (*(]) << );
                v3 = _mm_unpacklo_epi32(_mm_cvtsi32_si128(i0), _mm_cvtsi32_si128(i1));
                v2 = _mm_unpacklo_epi8(v2, z);
                v3 = _mm_unpacklo_epi8(v3, z);

                a0 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * )),
                    _mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * )));
                a1 = _mm_unpacklo_epi32(_mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * )),
                    _mm_loadl_epi64((__m128i*)(tabIJ + FXY[x + ] * )));
                b0 = _mm_unpacklo_epi64(a0, a1);
                b1 = _mm_unpackhi_epi64(a0, a1);
                v2 = _mm_madd_epi16(v2, b0);
                v3 = _mm_madd_epi16(v3, b1);
                v2 = _mm_add_epi32(_mm_add_epi32(v2, v3), delta);

                v0 = _mm_srai_epi32(v0, INTER_REMAP_COEF_BITS);
                v2 = _mm_srai_epi32(v2, INTER_REMAP_COEF_BITS);
                v0 = _mm_packus_epi16(_mm_packs_epi32(v0, v2), z);
                _mm_storel_epi64((__m128i*)(D + x), v0);
            }
        }
        return x;
    }
};

void remapBilinear1(Mat_<uchar> src, Mat_<uchar> dst, Mat& mapxy, Mat& mapa, short* tabIJ)
{
    uchar* srcdata = (uchar*)src.data;
    uchar* dstdata = (uchar*)dst.data;
    vec2s* mapxydata = (vec2s*)mapxy.data;
    ushort* mapadata = (ushort*)mapa.data;
    , cols_ = cols - ;
    int i, j, io, jo, jprev, jprev_, joprev, joprev_, k;

    RemapVec_8u vecOp;

    , io = ; i < rows; ++i, io += cols)
    {
        uchar* D = dst.ptr<uchar>(i);

        bool prevInlier = false;

        jprev_ = ; joprev_ = io;
        , jo = io; j <= cols; ++j, ++jo)
        {
            ] < cols_ && (] < rows_ : !prevInlier;

            if (curInlier == prevInlier) continue;
            prevInlier = curInlier;

            jprev = jprev_; joprev = joprev_;
            jprev_ = j; joprev_ = jo;

            if (curInlier == false)
            {
                int len = vecOp(src, D, (short*)(mapxydata + joprev), mapadata + joprev, tabIJ, j - jprev);
                D += len;
                jprev += len;
                joprev += len;

                for (; jprev < j; jprev++, D++, ++joprev)
                {
                    ];
                    ];
                    ;
                    uchar* S = srcdata + sy*cols + sx;
                    ] * w[] + S[] * w[] + S[cols] * w[] + S[cols + ] * w[];
                    *D = saturate_cast_uchar((val + DELTA) >> SHIFT);
                }
            }
            else
            {
                for (; jprev < j; ++jprev, ++D, ++joprev)
                {
                    ];
                    ];
                     <  || sy >= rows || sy +  < ))
                    {
                        D[] = ;
                    }
                    else
                    {
                        int sx0, sx1, sy0, sy1;
                        uchar v0, v1, v2, v3;
                        ;

                        sx0 = borderInterpolate(sx, cols, BORDER_CONSTANT);
                        sx1 = borderInterpolate(sx + , cols, BORDER_CONSTANT);
                        sy0 = borderInterpolate(sy, rows, BORDER_CONSTANT);
                        sy1 = borderInterpolate(sy + , rows, BORDER_CONSTANT);
                        v0 = sx0 >=  && sy0 >=  ? srcdata[sy0*cols + sx0] : ;
                        v1 = sx1 >=  && sy0 >=  ? srcdata[sy0*cols + sx1] : ;
                        v2 = sx0 >=  && sy1 >=  ? srcdata[sy1*cols + sx0] : ;
                        v3 = sx1 >=  && sy1 >=  ? srcdata[sy1*cols + sx1] : ;

                        ] + v1*w[] + v2*w[] + v3*w[];
                        D[] = saturate_cast_uchar((val + DELTA) >> SHIFT);
                    }
                }
            }
        }
    }
}

void auremap(Mat_<uchar> src, Mat_<uchar> dst, Mat_<float> mapx, Mat_<float> mapy, int interpolation, int borderType = BORDER_CONSTANT, const Scalar& borderValue = Scalar())
{
    , cols_ = cols - ;
    , tabsz_= tabsz - , tabsz2 = ;//tabsz=INTER_TAB_SIZE, tabsz2=INTER_TAB_SIZE2
    .f / tabsz, vy, v, tmp2;
    ][][], *tabIJ = tab2D[][];
    ];//=8*tabsz

    //1.初化始化双线性插值表(在实际程序中应放到主循环外)
#if 1
    , io = -; i < tabsz; ++i)
    {
        tmp2 = i * scale;
        tab1D[++io] = .f - tmp2;    //tab0=1, tab2=31/32, tab4=30/32, ..., tab62=01/32
        tab1D[++io] = tmp2;            //tab1=0, tab3=01/32, tab5=02/32, ..., tab63=31/32
    }
    , io = ; i < tabsz; ++i, io += )
        , jo = ; j < tabsz; ++j, jo += , tabIJ += )
        {
            sum = ;
            , iio = ; ii < ; ++ii, iio += )    //tabi分别于tab0, tab1,...,tab63相乘后存放于tab2D, 存储顺序如(省略前缀tab1D和tab2D)
            {
                vy = tab1D[io + ii];                        //0: 0*0=0, 0*1=1; 0*2=5, 0*3=6; ...; 0*62=124, 0*63=125;
                , jjo = iio; jj < ; ++jj, ++jjo)//1: 1*0=2, 1*1=3; 1*2=7, 1*3=8; ...; 1*62=126, 1*63=127;
                {                                            //2: 2*0=128, 2*1=129; 2*2=132, 2*3=133; ...; 2*62=252, 2*63=253;
                    v = vy*tab1D[jo + jj];                    //3: 3*0=130, 3*1=131; 3*2=134, 3*3=135; ...; 3*62=254, 3*63=255;
                    v = v*INTER_REMAP_COEF_SCALE;
                    tmp1 = __round(v);
                    sum += tabIJ[jjo] = saturate_cast_short(tmp1);
                    ;
                }
            }

            if (sum != INTER_REMAP_COEF_SCALE)
            {
                diff = sum - INTER_REMAP_COEF_SCALE;
                mm = ; //int m1 = 1, m2 = 1;
                MM = ; //int M1 = 1, M2 = 1;
                , iio = ; ii < ; ++ii, iio +=)
                    , jjo = iio + jj; jj < ; ++jj)//寻找最大最小值
                        tabIJ[jjo] < tabIJ[mm] ? mm = jjo : (tabIJ[jjo] > tabIJ[MM] ? MM = jjo : ); //if (tabIJ[jjo] < tabIJ[m1*2 + m2]) {m1 = ii; m2 = jj;} else if (tabIJ[jjo] > tabIJ[M1*2 + M2]) {M1 = ii; M2 = jj; }

                diff <  ? tabIJ[MM] = (short)(tabIJ[MM] - diff): tabIJ[mm] = (short)(tabIJ[mm] - diff); //if (diff < 0) tabIJ[M1 * 2 + M2] = (short)(tabIJ[M1 * 2 + M2] - diff); else tabIJ[m1 * 2 + m2] = (short)(tabIJ[m1 * 2 + m2] - diff);
            }
        }
#endif

    //2.
    tabIJ = tab2D[][]; float *mapxdata = (float*)mapx.data, *mapydata = (float*)mapy.data;
    Mat_<Vec2s> mapxy(rows, cols); vec2s* mapxydata = (vec2s*)mapxy.data;
    Mat_<ushort> mapa(rows, cols); ushort* mapadata = (ushort*)mapa.data;
    , io = ; i < rows; ++i, io += cols)
    {
        , jo = io; j < cols; ++j, ++jo)
        {
            int sx = cvRound(mapxdata[jo] * tabsz);
            int sy = cvRound(mapydata[jo] * tabsz);
            int v = (sy & tabsz_)*tabsz + (sx & tabsz_);

            mapxydata[jo].] = saturate_cast_short(sx >> INTER_BITS);
            mapxydata[jo].] = saturate_cast_short(sy >> INTER_BITS);
            mapadata[jo] = (ushort)v;
        }
        remapBilinear1(src, dst, mapxy, mapa, tabIJ);
    }
}

#endif