Python 使用命令参数(判断哈希值)删除相同文件

时间:2025-03-10 07:02:45
import os import sys from os.path import join,getsize import hashlib # 获取文件 MD5 值 def getFileMD5(filePath): f = open(filePath, "rb") content = f.read() m = hashlib.md5(content).hexdigest() print(m) # 判断是否是数字(来源于 **** 一个博主,我忘记了,不好意思) def is_number(s): try: # 如果能运行 float(s) 语句,返回 True(字符串 s 是浮点数) float(s) return True except ValueError: # ValueError 为 Python 的一种标准异常,表示"传入无效的参数" pass # 如果引发了 ValueError 这种异常,不做任何事情(pass:不做任何事情,一般用做占位语句) try: import unicodedata # 处理 ASCii 码的包 unicodedata.numeric(s) # 把一个表示数字的字符串转换为浮点数返回的函数 return True except (TypeError, ValueError): pass return False if __name__ == '__main__': while True: # 是使用命令行参数 root_ = sys.argv[1] if root_ == '': print('Directory is not specified') else: break total_files_path = [] for root, dirs, files in os.walk(root_): files_path = [join(root, name) for name in files] # 通过 join 函数把目录连接起来 total_files_path += files_path files_dic = {} for file in total_files_path: size = getsize(file) if files_dic.get(size) == None: files_dic[size] = [file] else: files_dic[size].append(file) for i in files_dic[size]: if i == 1: del files_dic[size] files_keys = [x for x in files_dic.keys()] print('Size sorting options:') print('1. Descending') print('2. Ascending') # 按照文件大小升序或者降序打印 while True: choose_one = input() if choose_one != '1' and choose_one != '2': print('Wrong option') continue else: break if choose_one == '1': files_keys.sort(key=lambda x: -x) if choose_one == '2': files_keys.sort() # 打印 for i in files_keys: print(i) for j in files_dic[i]: print(j) print('') # 第三步 按哈希值分组并给文件按顺序生成序号 print('Check for duplicates?') while True: choose_two = input() if choose_two != 'yes' and choose_two != 'no': print('Wrong') continue else: break if choose_two == 'yes': files_dic[size] = {} files_md5 = files_dic[size] for file in total_files_path: md5 = getFileMD5(file) if files_md5.get(md5) == None: files_md5[md5] = [file] else: files_md5[md5].append(file) for i in files_md5[md5]: if i == 1: del files_md5[md5] file_md5_keys = [x for x in files_md5.keys()] n = 0 files_new_dic = {} for file in files_dic: n += 1 files_new_dic[n] = {} files_new_dic[n]['file_pash'] = file files_new_dic[n]['file_size'] = size new_files_keys = [x for x in files_new_dic.keys()] for i in files_keys: print(i) for j in file_md5_keys: print(j) for x in files_md5[j]: n += 1 print(n + '.',end='') print(x) print('') # 第四步 询问是否删除文件并删除 print('Delete files?') while True: choose_three = input() if choose_three != 'yes' and choose_three != 'no': print('Wrong') continue else: break if choose_three == 'yes': print('Enter file numbers to delete:') while True: choose_four = input('(注意以空格隔开序号)') x = choose_four.split(" ") # 以空格分割 y = [] z = [] for i in x: if is_number(i) == -1: if i > 0: y.append(i) if y == []: print('请按规格输入') continue for i in y: for j in new_files_keys: if i == j: z.append(i) if z == []: print('请输入正确的序号') continue del_size = 0 for i in z: os.remove(i['file_pash']) del_size += i['file_size'] print('删除的文件总大小为:' + del_size) else: sys.exit() else: sys.exit()