I'm trying to implement the spectral residual approach for saliency detection, described in this paper: http://www.klab.caltech.edu/~xhou/papers/cvpr07.pdf
我正在努力实现对saliency检测的光谱剩余方法,本文描述:http://www.klab.caltech.edu/~xhou/papers/cvpr07.pdf。
There is a reference implementation in Matlab Code, taken from their website: http://www.klab.caltech.edu/~xhou/projects/spectralResidual/spectralresidual.html
在Matlab代码中有一个参考实现,从他们的网站:http://www.klab.caltech.edu/~xhou/projects/ spectralresidu.html。
clear
clc
%% Read image from file
inImg = im2double(rgb2gray(imread('yourImage.jpg')));
inImg = imresize(inImg, 64/size(inImg, 2));
%% Spectral Residual
myFFT = fft2(inImg);
myLogAmplitude = log(abs(myFFT));
myPhase = angle(myFFT);
mySpectralResidual = myLogAmplitude - imfilter(myLogAmplitude, fspecial('average', 3),'replicate');
saliencyMap = abs(ifft2(exp(mySpectralResidual + i*myPhase))).^2;
%% After Effect
saliencyMap = mat2gray(imfilter(saliencyMap, fspecial('gaussian', [10, 10], 2.5)));
imshow(saliencyMap);
I've tried to translate it to C++ with CImg. Where I fail is here:
我试着用CImg把它翻译成c++。我失败的地方是:
myPhase = angle(myFFT);
and here
这里
saliencyMap = abs(ifft2(exp(mySpectralResidual + i*myPhase))).^2;
Here's my code:
这是我的代码:
#include <CImg.h>
#include <iostream>
using namespace cimg_library;
int main() {
CImg<unsigned char> image("img2.jpg");
CImg<float> mask(3,3,1,1,1.0/9.0);
image.resize(64,64);
CImgList<float> myFFT = image.get_FFT();
const CImg<float> MyLogAmplitude = ((myFFT[0].get_pow(2) + myFFT[1].get_pow(2)).get_sqrt()).get_log(); //Magnitude
const CImg<float> MyPhase = myFFT[0].get_atan2(myFFT[1]);
const CImg<float> A = MyLogAmplitude.get_convolve(mask);
const CImg<float> MySpectralResidual = MyLogAmplitude-A;
CImgList<float> tmp = CImgList<float>(MyResidual.get_exp(),MyPhase);
CImgList<float> MySaliencyMap = tmp.get_FFT(true);
CImgDisplay draw_disp0(MySaliencyMap,"Image");
while (!draw_disp0.is_closed()) {
draw_disp0.wait();
}
return 0;
}
Anybody seen an obvious mistake?
有人看到一个明显的错误吗?
2 个解决方案
#1
1
I think I can see two mistakes in your code :
我想在你们的代码中我可以看到两个错误:
-
First, the atan2() call for MyPhase has arguments inverted. Should be written as
首先,atan2()调用MyPhase的参数是倒转的。应该写成
const CImg MyPhase = myFFT[1].get_atan2(myFFT[0]);
const CImg [1] . [0];
(but this is probably not much of an issue here).
(但这可能并不是什么大问题)。
- Second, and it is more serious, you are doing the inverse FFT on a pair of complex values coded as (amplitude,phase), which is not what CImg expects there, as the FFT() function supposes you input a (real,imaginary) pair of images. That probably makes a huge difference in the result.
- 其次,更严重的是,你在对一个编码为(振幅,相位)的复杂值进行反FFT,这不是CImg期望的,因为FFT()函数假定您输入了一个(真实的,虚构的)图像。这可能会对结果产生巨大的影响。
#2
1
Actually I had the same problem. Here is the problem solving code. I edited some to create a rectangle around the object salient. This code works for me.
其实我也有同样的问题。这是解决问题的代码。我编辑了一些,以创建一个围绕对象突出的矩形。这段代码对我有效。
#include "highgui.h"
#include "opencv2/imgproc/imgproc.hpp"
#include "cv.h"
#include <stdlib.h>
#include <stdio.h>
#include <string>
#include <iostream>
using namespace cv;
using namespace std;
// function Fourier transform
void fft2(IplImage *src, IplImage *dst);
int main()
{
string imagePath = "inputgambar/34.jpg";
//string imageSave = "saliency/42.jpg";
//string imageRectangular = "rectangular/42.jpg";
IplImage *ImageAsli, *ImageSaliency, *src, *ImageRe, *ImageIm, *Fourier, *Inverse, *LogAmplitude, *Sine, *Cosine;
IplImage *Saliency, *Residual;
IplImage *tmp1, *tmp2, *tmp3;
Mat gambarSave, threshold_output;
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
double minNum = 0, maxNum = 0, scale, shift, rata2, nilaiThreshold, Lebar, gantiPixel;
// load image Asli
ImageAsli = cvLoadImage(imagePath.c_str());
cvNamedWindow("ImageAsli", CV_WINDOW_NORMAL);
cvShowImage("ImageAsli", ImageAsli);
cvMoveWindow("ImageAsli",0,100);
// Load image, jadikan single channel/gray
//inputImage = cvLoadImage(imagePath.c_str());
src = cvLoadImage(imagePath.c_str(),0);
Lebar = src->width;
gantiPixel = 64/Lebar;
// Fourier , punya 2 channel, Real dan Imajiner
Fourier = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 2);
Inverse = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 2);
// Real , Imajiner spektrum
ImageRe = cvCreateImage (cvGetSize (src), IPL_DEPTH_64F, 1);
ImageIm = cvCreateImage (cvGetSize (src), IPL_DEPTH_64F, 1);
// log amplitude
LogAmplitude = cvCreateImage (cvGetSize (src), IPL_DEPTH_64F, 1);
// Sinus, Cosinus spektrum
Sine = cvCreateImage (cvGetSize (src), IPL_DEPTH_64F, 1);
Cosine = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
// spectral residual
Residual = cvCreateImage (cvGetSize (src), IPL_DEPTH_64F, 1);
// Saliency
Saliency = cvCreateImage (cvGetSize (src), src-> depth, src-> nChannels);
// Temporary space
tmp1 = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
tmp2 = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
tmp3 = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
//
scale = 1.0/255.0;
cvConvertScale (src, tmp1, 1, 0);
//
fft2 (tmp1, Fourier);
// Real dan Imajiner ditaruh di ImageRe ImageIm
cvSplit (Fourier, ImageRe, ImageIm, 0, 0);
// Magnitude/Amplitudo Fourier di tmp3
cvPow( ImageRe, tmp1, 2.0);
cvPow( ImageIm, tmp2, 2.0);
cvAdd( tmp1, tmp2, tmp3);
cvPow( tmp3, tmp3, 0.5 );
// logAmplitude , sin, cosin
cvLog (tmp3, LogAmplitude);
cvDiv (ImageIm, tmp3, Sine);
cvDiv (ImageRe, tmp3, Cosine);
// smoothing (1/(3×3)) * ones(3), mean filter pada logAmplitude ditempatkan pada tmp3
cvSmooth (LogAmplitude, tmp3, CV_BLUR, 3, 3);
// Spectral Residual = LogAmp-tmp3
cvSub (LogAmplitude, tmp3, Residual);
/************************************************************************ /
inverse Fourier Transform --> exp (Residual + i * Phase)
Euler's formula:
exp(r + i * T) = exp(r) * (cos(T) + i * sin(T)) = exp(r) * cos(T) + i * exp(r) * sin(T)
Sin (T) = ImageIm / LogAmplitude; cos(T) = ImageRe / LogAmplitude;
/************************************************************************/
cvExp(Residual, Residual);
cvMul(Residual, Cosine, tmp1);
cvMul(Residual, Sine, tmp2);
// Merging Residual, Saliency 1 channel => Fourier 2 channel
cvMerge (tmp1, tmp2, 0, 0, Fourier);
// Inverse Fourier transform
cvDFT (Fourier, Inverse, CV_DXT_INV_SCALE);
cvSplit (Inverse, tmp1, tmp2, 0,0);
// tmp3 = kuadrat akar tmp1 tmp2
cvPow (tmp1, tmp1, 2);
cvPow (tmp2, tmp2, 2);
cvAdd (tmp1, tmp2, tmp3);
// Gaussian filter 7x7 kernel
cvSmooth (tmp3, tmp3, CV_GAUSSIAN, 7, 7);
//CoreCVminmaxloc
cvMinMaxLoc (tmp3, & minNum, & maxNum, NULL, NULL);
scale = 255 / (maxNum - minNum);
shift =-minNum * scale;
// End of Saliency
cvConvertScale(tmp3, Saliency, scale, shift);
//deteksi proto objek
CvScalar rataan = cvAvg(Saliency);
nilaiThreshold = 3* (rataan .val[0]);
//cout << nilaiThreshold ;
gambarSave = Mat(Saliency);
//imwrite(imageSave.c_str(), gambarSave);
//resize(gambarSave, gambarSave, Size(), gantiPixel, gantiPixel, CV_INTER_AREA);
//ImageSaliency = cvCreateImage(cvSize(Saliency-> width * gantiPixel, Saliency-> height *gantiPixel), Saliency -> depth, Saliency -> nChannels);
//cvResize(Saliency, ImageSaliency, CV_INTER_AREA);
cvNamedWindow("Saliency", CV_WINDOW_NORMAL);
cvShowImage("Saliency", Saliency);
cvMoveWindow("Saliency",0,500);
/// Detect edges using Threshold
threshold( gambarSave, threshold_output, nilaiThreshold, 255, THRESH_BINARY );
/// Find contours
findContours( threshold_output, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
/// Find the rotated rectangles
vector<RotatedRect> minRect( contours.size() );
for( int i = 0; i < contours.size(); i++ )
{ minRect[i] = minAreaRect( Mat(contours[i]) );
}
/// Draw rotated rects
for( int i = 0; i< contours.size(); i++ )
{
// rotated rectangle
Point2f rect_points[4]; minRect[i].points( rect_points );
for( int j = 0; j < 4; j++ )
line( gambarSave, rect_points[j], rect_points[(j+1)%4], Scalar(100), 2, 8 );
}
//imwrite(imageRectangular.c_str(), gambarSave);
/// Show in a window
namedWindow( "Rectangular", CV_WINDOW_AUTOSIZE );
imshow( "Rectangular", gambarSave );
cvMoveWindow("Rectangular",480,100);
cvWaitKey(0);
//Release images
cvReleaseImage(&src);
cvReleaseImage(&ImageIm);
cvReleaseImage(&ImageRe);
cvReleaseImage(&Fourier);
cvReleaseImage(&Inverse);
cvReleaseImage(&LogAmplitude);
cvReleaseImage(&Sine);
cvReleaseImage(&Cosine);
cvReleaseImage(&Saliency);
cvReleaseImage(&Residual);
cvReleaseImage(&tmp1);
cvReleaseImage(&tmp2);
cvReleaseImage(&tmp3);
cvReleaseImage(&ImageAsli);
cvDestroyAllWindows();
return 0;
}
//Fourier transform
void fft2(IplImage *src, IplImage *dst)
{
IplImage *image_Re = 0, *image_Im = 0, *Fourier = 0;
//1 channel ImageRe, ImageIm
image_Re = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
image_Im = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
//2 channels (image_Re, image_Im)
Fourier = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 2);
/************************************************* ***********************/
// isi nilai image_Re
cvConvertScale(src, image_Re, 1, 0);
// nilai initial Imajiner di Set 0
cvZero(image_Im);
// Join real and imaginary parts and stock them in Fourier image
cvMerge(image_Re, image_Im, 0, 0, Fourier);
// forward Fourier transform
cvDFT(Fourier, dst, CV_DXT_FORWARD);
cvReleaseImage(&image_Re);
cvReleaseImage(&image_Im);
cvReleaseImage(&Fourier);
}
http://i60.tinypic.com/5xvmhi.png
http://i60.tinypic.com/5xvmhi.png
#1
1
I think I can see two mistakes in your code :
我想在你们的代码中我可以看到两个错误:
-
First, the atan2() call for MyPhase has arguments inverted. Should be written as
首先,atan2()调用MyPhase的参数是倒转的。应该写成
const CImg MyPhase = myFFT[1].get_atan2(myFFT[0]);
const CImg [1] . [0];
(but this is probably not much of an issue here).
(但这可能并不是什么大问题)。
- Second, and it is more serious, you are doing the inverse FFT on a pair of complex values coded as (amplitude,phase), which is not what CImg expects there, as the FFT() function supposes you input a (real,imaginary) pair of images. That probably makes a huge difference in the result.
- 其次,更严重的是,你在对一个编码为(振幅,相位)的复杂值进行反FFT,这不是CImg期望的,因为FFT()函数假定您输入了一个(真实的,虚构的)图像。这可能会对结果产生巨大的影响。
#2
1
Actually I had the same problem. Here is the problem solving code. I edited some to create a rectangle around the object salient. This code works for me.
其实我也有同样的问题。这是解决问题的代码。我编辑了一些,以创建一个围绕对象突出的矩形。这段代码对我有效。
#include "highgui.h"
#include "opencv2/imgproc/imgproc.hpp"
#include "cv.h"
#include <stdlib.h>
#include <stdio.h>
#include <string>
#include <iostream>
using namespace cv;
using namespace std;
// function Fourier transform
void fft2(IplImage *src, IplImage *dst);
int main()
{
string imagePath = "inputgambar/34.jpg";
//string imageSave = "saliency/42.jpg";
//string imageRectangular = "rectangular/42.jpg";
IplImage *ImageAsli, *ImageSaliency, *src, *ImageRe, *ImageIm, *Fourier, *Inverse, *LogAmplitude, *Sine, *Cosine;
IplImage *Saliency, *Residual;
IplImage *tmp1, *tmp2, *tmp3;
Mat gambarSave, threshold_output;
vector<vector<Point> > contours;
vector<Vec4i> hierarchy;
double minNum = 0, maxNum = 0, scale, shift, rata2, nilaiThreshold, Lebar, gantiPixel;
// load image Asli
ImageAsli = cvLoadImage(imagePath.c_str());
cvNamedWindow("ImageAsli", CV_WINDOW_NORMAL);
cvShowImage("ImageAsli", ImageAsli);
cvMoveWindow("ImageAsli",0,100);
// Load image, jadikan single channel/gray
//inputImage = cvLoadImage(imagePath.c_str());
src = cvLoadImage(imagePath.c_str(),0);
Lebar = src->width;
gantiPixel = 64/Lebar;
// Fourier , punya 2 channel, Real dan Imajiner
Fourier = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 2);
Inverse = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 2);
// Real , Imajiner spektrum
ImageRe = cvCreateImage (cvGetSize (src), IPL_DEPTH_64F, 1);
ImageIm = cvCreateImage (cvGetSize (src), IPL_DEPTH_64F, 1);
// log amplitude
LogAmplitude = cvCreateImage (cvGetSize (src), IPL_DEPTH_64F, 1);
// Sinus, Cosinus spektrum
Sine = cvCreateImage (cvGetSize (src), IPL_DEPTH_64F, 1);
Cosine = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
// spectral residual
Residual = cvCreateImage (cvGetSize (src), IPL_DEPTH_64F, 1);
// Saliency
Saliency = cvCreateImage (cvGetSize (src), src-> depth, src-> nChannels);
// Temporary space
tmp1 = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
tmp2 = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
tmp3 = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
//
scale = 1.0/255.0;
cvConvertScale (src, tmp1, 1, 0);
//
fft2 (tmp1, Fourier);
// Real dan Imajiner ditaruh di ImageRe ImageIm
cvSplit (Fourier, ImageRe, ImageIm, 0, 0);
// Magnitude/Amplitudo Fourier di tmp3
cvPow( ImageRe, tmp1, 2.0);
cvPow( ImageIm, tmp2, 2.0);
cvAdd( tmp1, tmp2, tmp3);
cvPow( tmp3, tmp3, 0.5 );
// logAmplitude , sin, cosin
cvLog (tmp3, LogAmplitude);
cvDiv (ImageIm, tmp3, Sine);
cvDiv (ImageRe, tmp3, Cosine);
// smoothing (1/(3×3)) * ones(3), mean filter pada logAmplitude ditempatkan pada tmp3
cvSmooth (LogAmplitude, tmp3, CV_BLUR, 3, 3);
// Spectral Residual = LogAmp-tmp3
cvSub (LogAmplitude, tmp3, Residual);
/************************************************************************ /
inverse Fourier Transform --> exp (Residual + i * Phase)
Euler's formula:
exp(r + i * T) = exp(r) * (cos(T) + i * sin(T)) = exp(r) * cos(T) + i * exp(r) * sin(T)
Sin (T) = ImageIm / LogAmplitude; cos(T) = ImageRe / LogAmplitude;
/************************************************************************/
cvExp(Residual, Residual);
cvMul(Residual, Cosine, tmp1);
cvMul(Residual, Sine, tmp2);
// Merging Residual, Saliency 1 channel => Fourier 2 channel
cvMerge (tmp1, tmp2, 0, 0, Fourier);
// Inverse Fourier transform
cvDFT (Fourier, Inverse, CV_DXT_INV_SCALE);
cvSplit (Inverse, tmp1, tmp2, 0,0);
// tmp3 = kuadrat akar tmp1 tmp2
cvPow (tmp1, tmp1, 2);
cvPow (tmp2, tmp2, 2);
cvAdd (tmp1, tmp2, tmp3);
// Gaussian filter 7x7 kernel
cvSmooth (tmp3, tmp3, CV_GAUSSIAN, 7, 7);
//CoreCVminmaxloc
cvMinMaxLoc (tmp3, & minNum, & maxNum, NULL, NULL);
scale = 255 / (maxNum - minNum);
shift =-minNum * scale;
// End of Saliency
cvConvertScale(tmp3, Saliency, scale, shift);
//deteksi proto objek
CvScalar rataan = cvAvg(Saliency);
nilaiThreshold = 3* (rataan .val[0]);
//cout << nilaiThreshold ;
gambarSave = Mat(Saliency);
//imwrite(imageSave.c_str(), gambarSave);
//resize(gambarSave, gambarSave, Size(), gantiPixel, gantiPixel, CV_INTER_AREA);
//ImageSaliency = cvCreateImage(cvSize(Saliency-> width * gantiPixel, Saliency-> height *gantiPixel), Saliency -> depth, Saliency -> nChannels);
//cvResize(Saliency, ImageSaliency, CV_INTER_AREA);
cvNamedWindow("Saliency", CV_WINDOW_NORMAL);
cvShowImage("Saliency", Saliency);
cvMoveWindow("Saliency",0,500);
/// Detect edges using Threshold
threshold( gambarSave, threshold_output, nilaiThreshold, 255, THRESH_BINARY );
/// Find contours
findContours( threshold_output, contours, hierarchy, CV_RETR_EXTERNAL, CV_CHAIN_APPROX_SIMPLE, Point(0, 0) );
/// Find the rotated rectangles
vector<RotatedRect> minRect( contours.size() );
for( int i = 0; i < contours.size(); i++ )
{ minRect[i] = minAreaRect( Mat(contours[i]) );
}
/// Draw rotated rects
for( int i = 0; i< contours.size(); i++ )
{
// rotated rectangle
Point2f rect_points[4]; minRect[i].points( rect_points );
for( int j = 0; j < 4; j++ )
line( gambarSave, rect_points[j], rect_points[(j+1)%4], Scalar(100), 2, 8 );
}
//imwrite(imageRectangular.c_str(), gambarSave);
/// Show in a window
namedWindow( "Rectangular", CV_WINDOW_AUTOSIZE );
imshow( "Rectangular", gambarSave );
cvMoveWindow("Rectangular",480,100);
cvWaitKey(0);
//Release images
cvReleaseImage(&src);
cvReleaseImage(&ImageIm);
cvReleaseImage(&ImageRe);
cvReleaseImage(&Fourier);
cvReleaseImage(&Inverse);
cvReleaseImage(&LogAmplitude);
cvReleaseImage(&Sine);
cvReleaseImage(&Cosine);
cvReleaseImage(&Saliency);
cvReleaseImage(&Residual);
cvReleaseImage(&tmp1);
cvReleaseImage(&tmp2);
cvReleaseImage(&tmp3);
cvReleaseImage(&ImageAsli);
cvDestroyAllWindows();
return 0;
}
//Fourier transform
void fft2(IplImage *src, IplImage *dst)
{
IplImage *image_Re = 0, *image_Im = 0, *Fourier = 0;
//1 channel ImageRe, ImageIm
image_Re = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
image_Im = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 1);
//2 channels (image_Re, image_Im)
Fourier = cvCreateImage(cvGetSize(src), IPL_DEPTH_64F, 2);
/************************************************* ***********************/
// isi nilai image_Re
cvConvertScale(src, image_Re, 1, 0);
// nilai initial Imajiner di Set 0
cvZero(image_Im);
// Join real and imaginary parts and stock them in Fourier image
cvMerge(image_Re, image_Im, 0, 0, Fourier);
// forward Fourier transform
cvDFT(Fourier, dst, CV_DXT_FORWARD);
cvReleaseImage(&image_Re);
cvReleaseImage(&image_Im);
cvReleaseImage(&Fourier);
}
http://i60.tinypic.com/5xvmhi.png
http://i60.tinypic.com/5xvmhi.png