/* The original guided filter is just converted version of He et al.'s matlab code The detailed description of the code and the algorithm is presented in "http://research.microsoft.com/en-us/um/people/kahe/eccv10/index.html" The fast guided filter is approximated filter to reduce the computational complexity of filtering. We only apply the filter to sampled points. The similar works are presented in the paper, J.-Y. Kim, L.-S. Kim, S.-H. Hwang, "An advanced contrast enhancement using partially overlapped subblock histogram equalization," IEEE Trans. Circuits and Syst. Video Technol. 11 (4) (2001) 475-484. Also, at each window, the gussian weight is applied. The guided filter with shiftable window is the enhanced algorithm of original guided image filtering by shifting the filter window. Last updated: 2013-02-06 Author: Jin-Hwan, Kim. */ #include "dehazing.h" /* Function: CalcAcoeff Description: calculate the coefficent "a" of guided filter (intrinsic function for guide filtering) Parameters: pfSigma - Sigma + eps * eye(3) --> see the paper and original matlab code pfCov - Cov of image nIdx - location of image(index). Return: pfA1, pfA2, pfA3 - coefficient of "a" at each color channel */ void dehazing::CalcAcoeff(float* pfSigma, float* pfCov, float* pfA1, float* pfA2, float* pfA3, int nIdx) { float fDet; float fOneOverDeterminant; float pfInvSig[9]; int nIdx9 = nIdx*9; // a_k = (sum_i(I_i*p_i-mu_k*p_k)/(abs(omega)*(sigma_k^2+epsilon)) fDet = ( (pfSigma[nIdx9]*(pfSigma[nIdx9+4] * pfSigma[nIdx9+8] - pfSigma[nIdx9+5] * pfSigma[nIdx9+7])) - ( pfSigma[nIdx9+1]*(pfSigma[nIdx9+3] * pfSigma[nIdx9+8] - pfSigma[nIdx9+5] * pfSigma[nIdx9+6])) + ( pfSigma[nIdx9+2]*(pfSigma[nIdx9+3] * pfSigma[nIdx9+7] - pfSigma[nIdx9+4] * pfSigma[nIdx9+6])) ); fOneOverDeterminant = 1.0f/fDet; pfInvSig [0] = (( pfSigma[nIdx9+4]*pfSigma[nIdx9+8] ) - (pfSigma[nIdx9+5]*pfSigma[nIdx9+7]))*fOneOverDeterminant; pfInvSig [1] = -(( pfSigma[nIdx9+1]*pfSigma[nIdx9+8] ) - (pfSigma[nIdx9+2]*pfSigma[nIdx9+7]))*fOneOverDeterminant; pfInvSig [2] = (( pfSigma[nIdx9+1]*pfSigma[nIdx9+5] ) - (pfSigma[nIdx9+2]*pfSigma[nIdx9+4]))*fOneOverDeterminant; pfInvSig [3] = -(( pfSigma[nIdx9+3]*pfSigma[nIdx9+8] ) - (pfSigma[nIdx9+5]*pfSigma[nIdx9+6]))*fOneOverDeterminant; pfInvSig [4] = (( pfSigma[nIdx9]*pfSigma[nIdx9+8] ) - (pfSigma[nIdx9+2]*pfSigma[nIdx9+6]))*fOneOverDeterminant; pfInvSig [5] = -(( pfSigma[nIdx9]*pfSigma[nIdx9+5] ) - (pfSigma[nIdx9+2]*pfSigma[nIdx9+3]))*fOneOverDeterminant; pfInvSig [6] = (( pfSigma[nIdx9+3]*pfSigma[nIdx9+7] ) - (pfSigma[nIdx9+4]*pfSigma[nIdx9+6]))*fOneOverDeterminant; pfInvSig [7] = -(( pfSigma[nIdx9]*pfSigma[nIdx9+7] ) - (pfSigma[nIdx9+1]*pfSigma[nIdx9+6]))*fOneOverDeterminant; pfInvSig [8] = (( pfSigma[nIdx9]*pfSigma[nIdx9+4] ) - (pfSigma[nIdx9+1]*pfSigma[nIdx9+3]))*fOneOverDeterminant; int nIdx3 = nIdx*3; pfA1[nIdx] = pfCov[nIdx3]*pfInvSig[0]+pfCov[nIdx3+1]*pfInvSig[3]+pfCov[nIdx3+2]*pfInvSig[6]; pfA2[nIdx] = pfCov[nIdx3]*pfInvSig[1]+pfCov[nIdx3+1]*pfInvSig[4]+pfCov[nIdx3+2]*pfInvSig[7]; pfA3[nIdx] = pfCov[nIdx3]*pfInvSig[2]+pfCov[nIdx3+1]*pfInvSig[5]+pfCov[nIdx3+2]*pfInvSig[8]; } /* Function: BoxFilter Description: cummulative function for calculating the integral image (It may apply other arraies.) Parameters: pfInArray - input array nR - radius of filter window nWid - width of array nHei - height of array Return: fOutArray - output array (integrated array) */ void dehazing::BoxFilter(float* pfInArray, int nR, int nWid, int nHei, float*& fOutArray) { float* pfArrayCum = new float[nWid*nHei]; //cumulative sum over Y axis for ( int nX = 0; nX < nWid; nX++ ) pfArrayCum[nX] = pfInArray[nX]; for ( int nIdx = nWid; nIdx fNormV1 fNormV1 = 1.0f/m_nGBlockSize; fMeanI = 0.0f; // fNormV1 --> create perpendicular vector(m_pfSmallPk_p) // m_pfSmallPk_p == m_pfSmallY - fMeanI; sseMean = _mm_setzero_ps(); sseY = _mm_setzero_ps(); // Mean value for ( nYb = 0 ; nYb < m_nGBlockSize; nYb++ ) { for ( nXb = 0; nXb < m_nGBlockSize; nXb+=4 ) { sseY = _mm_loadu_ps((m_pfSmallY+(nIdxA+nYb*nW+nXb))); sseMean = _mm_add_ps( sseMean, sseY ); } } for ( nI = 0 ; nI < 4; nI++ ) { fMeanI += *((float*)(&sseMean)+nI); } fMeanI = fMeanI/(m_nGBlockSize*m_nGBlockSize); sseMean = _mm_set1_ps(fMeanI); sseY = _mm_setzero_ps(); ssePk_p; // m_pfSmallY - fMeanI for ( nYb = 0 ; nYb < m_nGBlockSize; nYb++ ) { for ( nXb = 0; nXb < m_nGBlockSize; nXb+=4 ) { sseY = _mm_loadu_ps(m_pfSmallY+(nIdxA+nYb*nW+nXb)); ssePk_p = _mm_sub_ps( sseY, sseMean ); for ( nI = 0 ; nI < 4; nI++) { m_pfSmallPk_p[nYb*m_nGBlockSize+nXb+nI] = *((float*)(&ssePk_p)+nI); } } } // p vector normalize -> m_pfSmallNormPk // m_pfSmallPk_p^2 fDenom=0.0f; sseDenom =_mm_setzero_ps(); for ( nIdxB = 0 ; nIdxB < m_nGBlockSize*m_nGBlockSize; nIdxB+=4 ) { ssePk_p = _mm_loadu_ps(m_pfSmallPk_p+nIdxB); sseDenom = _mm_add_ps( sseDenom, _mm_mul_ps( ssePk_p, ssePk_p )); } for ( nI = 0 ; nI < 4; nI++ ) { fDenom += *((float*)(&sseDenom)+nI); } sseDenom = _mm_set1_ps(sqrt(fDenom)); sseNormPk; fAk = 0.0f; fBk = 0.0f; sseAk = _mm_setzero_ps(); sseBk = _mm_setzero_ps(); sseP; // a = q'norm_p // b = q'norm_v1 // Exception handling (denominator == 0) if(fDenom == 0) { // a = 0; fAk = 0; sseNormV1 = _mm_set1_ps(fNormV1); for ( nYb = 0 ; nYb < m_nGBlockSize; nYb++ ) { for ( nXb = 0; nXb < m_nGBlockSize; nXb+=4 ) { sseP = _mm_loadu_ps(m_pfSmallTrans+(nIdxA+nYb*nW+nXb)); sseBk = _mm_add_ps( _mm_mul_ps( sseP , sseNormV1 ), sseBk ); } } // b = q'norm_v1 for ( nI = 0 ; nI < 4; nI++ ) fBk += *((float*)(&sseBk)+nI); } else { // m_pfSmallNormPk for ( nIdxB = 0 ; nIdxB < m_nGBlockSize*m_nGBlockSize; nIdxB+=4 ) { ssePk_p = _mm_loadu_ps(m_pfSmallPk_p+nIdxB); sseNormPk = _mm_div_ps(ssePk_p, sseDenom); for ( nI = 0 ; nI < 4; nI++ ) { m_pfSmallNormPk[nIdxB+nI] = *((float*)(&sseNormPk)+nI); } } // a = q'norm_p // b = q'norm_v1 sseNormV1 = _mm_set1_ps(fNormV1); for ( nYb = 0 ; nYb < m_nGBlockSize; nYb++ ) { for ( nXb = 0; nXb < m_nGBlockSize; nXb+=4 ) { sseP = _mm_loadu_ps(m_pfSmallTrans+(nIdxA+nYb*nW+nXb)); sseNormPk = _mm_loadu_ps(m_pfSmallNormPk+(nYb*m_nGBlockSize+nXb)); sseAk = _mm_add_ps( _mm_mul_ps( sseP , sseNormPk ), sseAk ); sseBk = _mm_add_ps( _mm_mul_ps( sseP , sseNormV1 ), sseBk ); } } for ( nI = 0 ; nI < 4; nI++ ) { fAk += *((float*)(&sseAk)+nI); fBk += *((float*)(&sseBk)+nI); } } sseGauss; sseInteg; sseBkV1 = _mm_set1_ps(fBk*fNormV1); sseAk = _mm_set1_ps(fAk); // Gaussian weighting // Weighted_denom for ( nYb = 0 ; nYb < m_nGBlockSize; nYb++ ) { for ( nXb = 0; nXb < m_nGBlockSize; nXb+=4 ) { sseNormPk = _mm_loadu_ps(m_pfSmallNormPk+(nYb*m_nGBlockSize+nXb)); sseGauss = _mm_loadu_ps(m_pfGuidedLUT+(nYb*m_nGBlockSize+nXb)); sseInteg = _mm_mul_ps(_mm_add_ps( _mm_mul_ps(sseNormPk, sseAk), sseBkV1 ), sseGauss); // m_pfSmallInteg // m_pfSmallDenom for ( nI = 0 ; nI < 4; nI++ ) { m_pfSmallInteg[nIdxA+nYb*nW+nXb+nI] += *((float*)(&sseInteg)+nI); m_pfSmallDenom[nIdxA+nYb*nW+nXb+nI] += *((float*)(&sseGauss)+nI); } } } } } // m_pfSmallTransR = m_pfSmallInteg / m_pfSmallDenom for( nIdxA = 0 ; nIdxA < nW*nH; nIdxA+=4 ) { sseInteg = _mm_loadu_ps(m_pfSmallInteg+nIdxA); sseDenom = _mm_loadu_ps(m_pfSmallDenom+nIdxA); sseQ = _mm_div_ps(sseInteg, sseDenom); for( nI = 0; nI < 4; nI++) m_pfSmallTransR[nIdxA+nI] = *((float*)(&sseQ)+nI); } } /* Function: FastGuidedFilter (original sized image) Description: Transmission refinement based on guided fitlering, but we approximate the filtering using partial window. In addtion, we analyze the guided filter using projection method. SSE (SIMD) is applied. * Limitation - In this code, we do not consider the border line of filtering, hence we will update the code. The user may regulate the window size that the image size divides into the window size. Parameters: nW - width of down-sampled image nH - height of down-sampled image (hidden) m_pnYImg - down-sampled image m_pnTransmission - down-sampled initial transmission Return: m_pfTransmissionR - down-sampled refined transmission */ void dehazing::FastGuidedFilter() { int nStep = m_nStepSize; // step size int nEPBlockSizeL = m_nGBlockSize; int nHeiL = m_nHei; int nWidL = m_nWid; float* pfYL = m_pfY; float* pfIntegL = m_pfInteg; float* pfDenomL = m_pfDenom; int* pnYImgL = m_pnYImg; float* pfPk_pL = m_pfPk_p; float* pfNormPkL = m_pfNormPk; float* pfGuidedLUTL = m_pfGuidedLUT; float* pfTransmissionL = m_pfTransmission; int nWstep = nEPBlockSizeL/nStep; int nHstep = nEPBlockSizeL/nStep; float fDenom, fNormV1, fMeanI; float fAk = 0.0f; // fAk : a in "a*I+b", fBk : b in "a*I+b" float fBk = 0.0f; int nIdxA, nYa, nXa, nYb, nXb, nI, nIdxB; // indexing // SIMD is applied __m128 sseMean, sseY, ssePk_p, sseDenom, sseInteg, sseNormPk, sseAk, sseBk, sseP, sseNormV1, sseGauss, sseBkV1, sseQ; // Initialization for ( nYb = 0 ; nYb < nHeiL; nYb++ ) { for ( nXb = 0; nXb < nWidL; nXb++ ) { pfYL[nYb*nWidL+nXb]=(float)pnYImgL[nYb*nWidL+nXb]; pfIntegL[nYb*nWidL+nXb]=0; pfDenomL[nYb*nWidL+nXb]=0; } } // sampling points for ( nYa = 0 ; nYa < (nHeiL-nEPBlockSizeL)/nHstep+1 ; nYa++ ) { for ( nXa = 0 ; nXa < (nWidL-nEPBlockSizeL)/nWstep+1 ; nXa++ ) { nIdxA=nYa*nHstep*nWidL+nXa*nWstep; // 1 vector normalize -> fNormV1 fNormV1 = 1.0f/nEPBlockSizeL; fMeanI = 0.0f; // Create p vector(m_pfSmallPk_p) which is perpendicular to fNormV1 // p vector == m_pfSmallY - fMeanI sseMean = _mm_setzero_ps(); sseY = _mm_setzero_ps(); // Mean value for ( nYb = 0 ; nYb < nEPBlockSizeL; nYb++ ) { for ( nXb = 0; nXb < nEPBlockSizeL; nXb+=4 ) { sseY = _mm_loadu_ps((pfYL+(nIdxA+nYb*nWidL+nXb))); sseMean = _mm_add_ps( sseMean, sseY ); } } for ( nI = 0 ; nI < 4; nI++ ) { fMeanI += *((float*)(&sseMean)+nI); } fMeanI = fMeanI/(nEPBlockSizeL*nEPBlockSizeL); sseMean = _mm_set1_ps(fMeanI); sseY = _mm_setzero_ps(); // m_pfSmallY - fMeanI for ( nYb = 0 ; nYb < nEPBlockSizeL; nYb++ ) { for ( nXb = 0; nXb < nEPBlockSizeL; nXb+=4 ) { sseY = _mm_loadu_ps(pfYL+(nIdxA+nYb*nWidL+nXb)); ssePk_p = _mm_sub_ps( sseY, sseMean ); for ( nI = 0 ; nI < 4; nI++) { pfPk_pL[nYb*nEPBlockSizeL+nXb+nI] = *((float*)(&ssePk_p)+nI); } } } // p vector normalize -> m_pfSmallNormPk // m_pfSmallPk_p^2 fDenom=0.0f; sseDenom =_mm_setzero_ps(); for ( nIdxB = 0 ; nIdxB < nEPBlockSizeL*nEPBlockSizeL; nIdxB+=4 ) { ssePk_p = _mm_loadu_ps(pfPk_pL+nIdxB); sseDenom = _mm_add_ps( sseDenom, _mm_mul_ps( ssePk_p, ssePk_p )); } for ( nI = 0 ; nI < 4; nI++ ) { fDenom += *((float*)(&sseDenom)+nI); } sseDenom = _mm_set1_ps(sqrt(fDenom)); sseAk = _mm_setzero_ps(); sseBk = _mm_setzero_ps(); // a = q'norm_p // b = q'norm_v1 // Exception handling (denominator == 0) // a = 0; fAk = 0.0f; fBk = 0.0f; if(fDenom == 0) { sseNormV1 = _mm_set1_ps(fNormV1); for ( nYb = 0 ; nYb < nEPBlockSizeL; nYb++ ) { for ( nXb = 0; nXb < nEPBlockSizeL; nXb+=4 ) { sseP = _mm_loadu_ps(pfTransmissionL+(nIdxA+nYb*nWidL+nXb)); sseBk = _mm_add_ps( _mm_mul_ps( sseP , sseNormV1 ), sseBk ); } } // b = q'norm_v1 for ( nI = 0 ; nI < 4; nI++ ) fBk += *((float*)(&sseBk)+nI); } else { // m_pfSmallNormPk for ( nIdxB = 0 ; nIdxB < nEPBlockSizeL*nEPBlockSizeL; nIdxB+=4 ) { ssePk_p = _mm_loadu_ps(pfPk_pL+nIdxB); sseNormPk = _mm_div_ps(ssePk_p, sseDenom); for ( int nI = 0 ; nI < 4; nI++ ) { pfNormPkL[nIdxB+nI] = *((float*)(&sseNormPk)+nI); } } sseNormV1 = _mm_set1_ps(fNormV1); // a = q'norm_p // b = q'norm_v1 for ( nYb = 0 ; nYb < nEPBlockSizeL; nYb++ ) { for ( nXb = 0; nXb < nEPBlockSizeL; nXb+=4 ) { sseP = _mm_loadu_ps(pfTransmissionL+(nIdxA+nYb*nWidL+nXb)); sseNormPk = _mm_loadu_ps(pfNormPkL+(nYb*nEPBlockSizeL+nXb)); sseAk = _mm_add_ps( _mm_mul_ps( sseP , sseNormPk ), sseAk ); sseBk = _mm_add_ps( _mm_mul_ps( sseP , sseNormV1 ), sseBk ); } } for ( nI = 0 ; nI < 4; nI++ ) { fAk += *((float*)(&sseAk)+nI); fBk += *((float*)(&sseBk)+nI); } } sseBkV1 = _mm_set1_ps(fBk*fNormV1); sseAk = _mm_set1_ps(fAk); // Gaussian weighting // Weighted_denom for ( nYb = 0 ; nYb < nEPBlockSizeL; nYb++ ) { for ( nXb = 0; nXb < nEPBlockSizeL; nXb+=4 ) { sseNormPk = _mm_loadu_ps(pfNormPkL+(nYb*nEPBlockSizeL+nXb)); sseGauss = _mm_loadu_ps(pfGuidedLUTL+(nYb*nEPBlockSizeL+nXb)); sseInteg = _mm_mul_ps(_mm_add_ps( _mm_mul_ps(sseNormPk, sseAk), sseBkV1 ), sseGauss); // m_pfSmallInteg // m_pfSmallDenom for ( nI = 0 ; nI < 4; nI++ ) { pfIntegL[nIdxA+nYb*nWidL+nXb+nI] += *((float*)(&sseInteg)+nI); pfDenomL[nIdxA+nYb*nWidL+nXb+nI] += *((float*)(&sseGauss)+nI); } } } } } // m_pfSmallTransR = m_pfSmallInteg / m_pfSmallDenom for( int nIdx = 0 ; nIdx < nWidL*nHeiL; nIdx+=4 ) { sseInteg = _mm_loadu_ps(pfIntegL+nIdx); sseDenom = _mm_loadu_ps(pfDenomL+nIdx); sseQ = _mm_div_ps(sseInteg, sseDenom); for( int nI = 0; nI < 4; nI++) m_pfTransmissionR[nIdx+nI] = *((float*)(&sseQ)+nI); } } /* Function: GuidedFilterShiftableWindow Description: It is a modified guided filter to reduce blur artifacts of original one. The algorithm shift the window which has the minimum error. The other scheme is the same with original one. Parameters: fEps - the epsilon (the same with original guided filter). Return: m_pfSmallTransR - refined transmission. */ void dehazing::GuidedFilterShiftableWindow(float fEps) { int nY, nX; int nH, nW; int nYmin, nXmin, nYmax, nXmax; int nYminOpt, nXminOpt, nYmaxOpt, nXmaxOpt; float fMeanI, fMeanR, fMeanG, fMeanB; float fActualBlock; float fMeant; float ftrans; float fa1, fa2, fa3, fb; float fCovRp, fCovGp, fCovBp; float afSigma[9], afInv[9]; float fVRR, fVRG, fVRB, fVGB, fVBB, fVGG; float fMRt, fMGt, fMBt; float fR, fG, fB; float fDet; int x1, x2, y1, y2; int nItersX, nItersY; float fVariance, fOptVariance; float *pfImageY = new float[m_nWid*m_nHei]; float *pfSqImageY = new float[m_nWid*m_nHei]; float *pfSumTrans = new float[m_nWid*m_nHei]; float *pfIntImageY = new float[m_nWid*m_nHei]; float *pfSqIntImageY= new float[m_nWid*m_nHei]; float *pfintN = new float[m_nWid*m_nHei]; float *pfones = new float[m_nWid*m_nHei]; float *pfWeight = new float[m_nWid*m_nHei]; float *pfWeiSum = new float[m_nWid*m_nHei]; float *pfVarImage = new float[m_nWid*m_nHei]; int *pnN = new int [m_nWid*m_nHei]; int *pnVarN = new int [m_nWid*m_nHei]; int nMax = 0; float fMaxvar=0; float grey; bool bIterationFlag; float fMB, fMG, fMR, fMBS, fMGS, fMRS; int nSizes = 31; int nNewX, nNewY, nOptNewX, nOptNewY; float inverseweight; bIterationFlag = true; // initialize for(nX=0; nX pfVarImage[nNewX+nNewY*m_nWid]*inverseweight) { fOptVariance = pfVarImage[nNewX+nNewY*m_nWid]*inverseweight; nYminOpt = nYmin; nXminOpt = nXmin; nYmaxOpt = nYmax; nXmaxOpt = nXmax; nOptNewX = nNewX; nOptNewY = nNewY; } } } fActualBlock = (float)(nYmaxOpt-nYminOpt)*(nXmaxOpt-nXminOpt); if(bIterationFlag) { pnVarN[nOptNewX+nOptNewY*m_nWid]++; totalsum++; } fVRR=0; fVRG=0; fVRB=0; fVGB=0; fVBB=0; fVGG=0; fMeanR=0; fMeanG=0; fMeanB=0; fMeant=0; fMRt=0; fMGt=0; fMBt=0; // guided filtering at optimal block for(nH=nYminOpt; nH