教你怎么用python删除相似度高的图片

1. 前言

因为输入是视频，切完帧之后都是连续图片，所以我的目录结构如下：

其中frame_output是视频切帧后的保存路径，1和2文件夹分别对应两个是视频切帧后的图片。

2. 切帧代码如下：

				?

									#encoding:utf-8

									import os

									import sys

									import cv2

									video_path = '/home/pythonfile/video/'  # 绝对路径，video下有两段视频

									out_frame_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'frame_output')  #frame_output是视频切帧后的保存路径

									if not os.path.exists(out_frame_path):

									    os.makedirs(out_frame_path)

									print('out_frame_path', out_frame_path)

									files = []

									list1 = os.listdir(video_path)

									print('list', list1)

									for i in range(len(list1)):

									    item = os.path.join(video_path, list1[i])

									    files.append(item)

									print('files',files)

									for k,file in enumerate(files):

									    frame_dir = os.path.join(out_frame_path, '%d'%(k+1))

									    if not os.path.exists(frame_dir):

									        os.makedirs(frame_dir)

									    cap = cv2.VideoCapture(file)

									    j = 0

									    print('start prossing NO.%d video' % (k + 1))

									    while True:

									        ret, frame = cap.read()

									        j += 1

									        if ret:

									        #每三帧保存一张

									            if j % 3 == 0:

									                cv2.imwrite(os.path.join(frame_dir, '%d.jpg'%j), frame)

									        else:

									            cap.release()

									            break

									    print('prossed NO.%d video'%(k+1))

3. 删除相似度高的图片

				?

									# coding: utf-8

									import os

									import cv2

									# from skimage.measure import compare_ssim

									# from skimage.metrics import _structural_similarity

									from skimage.metrics import structural_similarity as ssim

									def delete(filename1):

									    os.remove(filename1)

									def list_all_files(root):

									    files = []

									    list = os.listdir(root)

									    # os.listdir()方法：返回指定文件夹包含的文件或子文件夹名字的列表。该列表顺序以字母排序

									    for i in range(len(list)):

									        element = os.path.join(root, list[i])

									        # 需要先使用python路径拼接os.path.join()函数，将os.listdir()返回的名称拼接成文件或目录的绝对路径再传入os.path.isdir()和os.path.isfile().

									        if os.path.isdir(element):  # os.path.isdir()用于判断某一对象(需提供绝对路径)是否为目录

									            # temp_dir = os.path.split(element)[-1]

									            # os.path.split分割文件名与路径,分割为data_dir和此路径下的文件名，[-1]表示只取data_dir下的文件名

									            files.append(list_all_files(element))

									        elif os.path.isfile(element):

									            files.append(element)

									    # print('2',files)

									    return files

									def ssim_compare(img_files):

									    count = 0

									    for currIndex, filename in enumerate(img_files):

									        if not os.path.exists(img_files[currIndex]):

									            print('not exist', img_files[currIndex])

									            break

									        img = cv2.imread(img_files[currIndex])

									        img1 = cv2.imread(img_files[currIndex + 1])

									        #进行结构性相似度判断

									        # ssim_value = _structural_similarity.structural_similarity(img,img1,multichannel=True)

									        ssim_value = ssim(img,img1,multichannel=True)

									        if ssim_value > 0.9:

									            #基数

									            count += 1

									            imgs_n.append(img_files[currIndex + 1])

									            print('big_ssim:',img_files[currIndex], img_files[currIndex + 1], ssim_value)

									        # 避免数组越界

									        if currIndex+1 >= len(img_files)-1:

									            break

									    return count

									if __name__ == '__main__':

									    path = '/home/dj/pythonfile/frame_output/'

									    img_path = path

									    imgs_n = []

									    all_files = list_all_files(path) #返回包含完整路径的所有图片名的列表

									    print('1',len(all_files))

									    for files in all_files:

									        # 根据文件名排序，x.rfind('/')是从右边寻找第一个‘/'出现的位置，也就是最后出现的位置

									        # 注意sort和sorted的区别，sort作用于原列表，sorted生成新的列表，且sorted可以作用于所有可迭代对象

									        files.sort(key = lambda x: int(x[x.rfind('/')+1:-4]))#路径中包含“/”

									        # print(files)

									        img_files = []

									        for img in files:

									            if img.endswith('.jpg'):

									                # 将所有图片名都放入列表中

									                img_files.append(img)

									        count = ssim_compare(img_files)

									        print(img[:img.rfind('/')],"路径下删除的图片数量为：",count)

									    for image in imgs_n:

									        delete(image)

4. 导入skimage.measure import compare_ssim出错的解决方法：

将

				?

									from skimage.measure import compare_ssim

改为

				?

									from skimage.metrics import _structural_similarity

5. structural_similarity.py的源码

				?

									from warnings import warn

									import numpy as np

									from scipy.ndimage import uniform_filter, gaussian_filter

									from ..util.dtype import dtype_range

									from ..util.arraycrop import crop

									from .._shared.utils import warn, check_shape_equality

									__all__ = ['structural_similarity']

									def structural_similarity(im1, im2,

									                          *,

									                          win_size=None, gradient=False, data_range=None,

									                          multichannel=False, gaussian_weights=False,

									                          full=False, **kwargs):

									    """

									    Compute the mean structural similarity index between two images.

									    Parameters

									    ----------

									    im1, im2 : ndarray

									        Images. Any dimensionality with same shape.

									    win_size : int or None, optional

									        The side-length of the sliding window used in comparison. Must be an

									        odd value. If `gaussian_weights` is True, this is ignored and the

									        window size will depend on `sigma`.

									    gradient : bool, optional

									        If True, also return the gradient with respect to im2.

									    data_range : float, optional

									        The data range of the input image (distance between minimum and

									        maximum possible values). By default, this is estimated from the image

									        data-type.

									    multichannel : bool, optional

									        If True, treat the last dimension of the array as channels. Similarity

									        calculations are done independently for each channel then averaged.

									    gaussian_weights : bool, optional

									        If True, each patch has its mean and variance spatially weighted by a

									        normalized Gaussian kernel of width sigma=1.5.

									    full : bool, optional

									        If True, also return the full structural similarity image.

									    Other Parameters

									    ----------------

									    use_sample_covariance : bool

									        If True, normalize covariances by N-1 rather than, N where N is the

									        number of pixels within the sliding window.

									    K1 : float

									        Algorithm parameter, K1 (small constant, see [1]_).

									    K2 : float

									        Algorithm parameter, K2 (small constant, see [1]_).

									    sigma : float

									        Standard deviation for the Gaussian when `gaussian_weights` is True.

									    Returns

									    -------

									    mssim : float

									        The mean structural similarity index over the image.

									    grad : ndarray

									        The gradient of the structural similarity between im1 and im2 [2]_.

									        This is only returned if `gradient` is set to True.

									    S : ndarray

									        The full SSIM image.  This is only returned if `full` is set to True.

									    Notes

									    -----

									    To match the implementation of Wang et. al. [1]_, set `gaussian_weights`

									    to True, `sigma` to 1.5, and `use_sample_covariance` to False.

									    .. versionchanged:: 0.16

									        This function was renamed from ``skimage.measure.compare_ssim`` to

									        ``skimage.metrics.structural_similarity``.

									    References

									    ----------

									    .. [1] Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P.

									       (2004). Image quality assessment: From error visibility to

									       structural similarity. IEEE Transactions on Image Processing,

									       13, 600-612.

									       https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf,

									       :DOI:`10.1109/TIP.2003.819861`

									    .. [2] Avanaki, A. N. (2009). Exact global histogram specification

									       optimized for structural similarity. Optical Review, 16, 613-621.

									       :arxiv:`0901.0065`

									       :DOI:`10.1007/s10043-009-0119-z`

									    """

									    check_shape_equality(im1, im2)

									    if multichannel:

									        # loop over channels

									        args = dict(win_size=win_size,

									                    gradient=gradient,

									                    data_range=data_range,

									                    multichannel=False,

									                    gaussian_weights=gaussian_weights,

									                    full=full)

									        args.update(kwargs)

									        nch = im1.shape[-1]

									        mssim = np.empty(nch)

									        if gradient:

									            G = np.empty(im1.shape)

									        if full:

									            S = np.empty(im1.shape)

									        for ch in range(nch):

									            ch_result = structural_similarity(im1[..., ch],

									                                              im2[..., ch], **args)

									            if gradient and full:

									                mssim[..., ch], G[..., ch], S[..., ch] = ch_result

									            elif gradient:

									                mssim[..., ch], G[..., ch] = ch_result

									            elif full:

									                mssim[..., ch], S[..., ch] = ch_result

									            else:

									                mssim[..., ch] = ch_result

									        mssim = mssim.mean()

									        if gradient and full:

									            return mssim, G, S

									        elif gradient:

									            return mssim, G

									        elif full:

									            return mssim, S

									        else:

									            return mssim

									    K1 = kwargs.pop('K1', 0.01)

									    K2 = kwargs.pop('K2', 0.03)

									    sigma = kwargs.pop('sigma', 1.5)

									    if K1 < 0:

									        raise ValueError("K1 must be positive")

									    if K2 < 0:

									        raise ValueError("K2 must be positive")

									    if sigma < 0:

									        raise ValueError("sigma must be positive")

									    use_sample_covariance = kwargs.pop('use_sample_covariance', True)

									    if gaussian_weights:

									        # Set to give an 11-tap filter with the default sigma of 1.5 to match

									        # Wang et. al. 2004.

									        truncate = 3.5

									    if win_size is None:

									        if gaussian_weights:

									            # set win_size used by crop to match the filter size

									            r = int(truncate * sigma + 0.5)  # radius as in ndimage

									            win_size = 2 * r + 1

									        else:

									            win_size = 7   # backwards compatibility

									    if np.any((np.asarray(im1.shape) - win_size) < 0):

									        raise ValueError(

									            "win_size exceeds image extent.  If the input is a multichannel "

									            "(color) image, set multichannel=True.")

									    if not (win_size % 2 == 1):

									        raise ValueError('Window size must be odd.')

									    if data_range is None:

									        if im1.dtype != im2.dtype:

									            warn("Inputs have mismatched dtype.  Setting data_range based on "

									                 "im1.dtype.", stacklevel=2)

									        dmin, dmax = dtype_range[im1.dtype.type]

									        data_range = dmax - dmin

									    ndim = im1.ndim

									    if gaussian_weights:

									        filter_func = gaussian_filter

									        filter_args = {'sigma': sigma, 'truncate': truncate}

									    else:

									        filter_func = uniform_filter

									        filter_args = {'size': win_size}

									    # ndimage filters need floating point data

									    im1 = im1.astype(np.float64)

									    im2 = im2.astype(np.float64)

									    NP = win_size ** ndim

									    # filter has already normalized by NP

									    if use_sample_covariance:

									        cov_norm = NP / (NP - 1)  # sample covariance

									    else:

									        cov_norm = 1.0  # population covariance to match Wang et. al. 2004

									    # compute (weighted) means

									    ux = filter_func(im1, **filter_args)

									    uy = filter_func(im2, **filter_args)

									    # compute (weighted) variances and covariances

									    uxx = filter_func(im1 * im1, **filter_args)

									    uyy = filter_func(im2 * im2, **filter_args)

									    uxy = filter_func(im1 * im2, **filter_args)

									    vx = cov_norm * (uxx - ux * ux)

									    vy = cov_norm * (uyy - uy * uy)

									    vxy = cov_norm * (uxy - ux * uy)

									    R = data_range

									    C1 = (K1 * R) ** 2

									    C2 = (K2 * R) ** 2

									    A1, A2, B1, B2 = ((2 * ux * uy + C1,

									                       2 * vxy + C2,

									                       ux ** 2 + uy ** 2 + C1,

									                       vx + vy + C2))

									    D = B1 * B2

									    S = (A1 * A2) / D

									    # to avoid edge effects will ignore filter radius strip around edges

									    pad = (win_size - 1) // 2

									    # compute (weighted) mean of ssim

									    mssim = crop(S, pad).mean()

									    if gradient:

									        # The following is Eqs. 7-8 of Avanaki 2009.

									        grad = filter_func(A1 / D, **filter_args) * im1

									        grad += filter_func(-S / B2, **filter_args) * im2

									        grad += filter_func((ux * (A2 - A1) - uy * (B2 - B1) * S) / D,

									                            **filter_args)

									        grad *= (2 / im1.size)

									        if full:

									            return mssim, grad, S

									        else:

									            return mssim, grad

									    else:

									        if full:

									            return mssim, S

									        else:

									            return mssim

到此这篇关于教你怎么用python删除相似度高的图片的文章就介绍到这了,更多相关python删除相似度高的图片内容请搜索服务器之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持服务器之家！

原文链接：https://blog.csdn.net/DJames23/article/details/116430898