Python 使用命令参数(判断哈希值)删除相同文件
import os
import sys
from os.path import join,getsize
import hashlib
# 获取文件 MD5 值
def getFileMD5(filePath):
f = open(filePath, "rb")
content = f.read()
m = hashlib.md5(content).hexdigest()
print(m)
# 判断是否是数字(来源于 **** 一个博主,我忘记了,不好意思)
def is_number(s):
try: # 如果能运行 float(s) 语句,返回 True(字符串 s 是浮点数)
float(s)
return True
except ValueError: # ValueError 为 Python 的一种标准异常,表示"传入无效的参数"
pass # 如果引发了 ValueError 这种异常,不做任何事情(pass:不做任何事情,一般用做占位语句)
try:
import unicodedata # 处理 ASCii 码的包
unicodedata.numeric(s) # 把一个表示数字的字符串转换为浮点数返回的函数
return True
except (TypeError, ValueError):
pass
return False
if __name__ == '__main__':
while True:
# 是使用命令行参数
root_ = sys.argv[1]
if root_ == '':
print('Directory is not specified')
else:
break
total_files_path = []
for root, dirs, files in os.walk(root_):
files_path = [join(root, name) for name in files]
# 通过 join 函数把目录连接起来
total_files_path += files_path
files_dic = {}
for file in total_files_path:
size = getsize(file)
if files_dic.get(size) == None:
files_dic[size] = [file]
else:
files_dic[size].append(file)
for i in files_dic[size]:
if i == 1:
del files_dic[size]
files_keys = [x for x in files_dic.keys()]
print('Size sorting options:')
print('1. Descending')
print('2. Ascending')
# 按照文件大小升序或者降序打印
while True:
choose_one = input()
if choose_one != '1' and choose_one != '2':
print('Wrong option')
continue
else:
break
if choose_one == '1':
files_keys.sort(key=lambda x: -x)
if choose_one == '2':
files_keys.sort()
# 打印
for i in files_keys:
print(i)
for j in files_dic[i]:
print(j)
print('')
# 第三步 按哈希值分组并给文件按顺序生成序号
print('Check for duplicates?')
while True:
choose_two = input()
if choose_two != 'yes' and choose_two != 'no':
print('Wrong')
continue
else:
break
if choose_two == 'yes':
files_dic[size] = {}
files_md5 = files_dic[size]
for file in total_files_path:
md5 = getFileMD5(file)
if files_md5.get(md5) == None:
files_md5[md5] = [file]
else:
files_md5[md5].append(file)
for i in files_md5[md5]:
if i == 1:
del files_md5[md5]
file_md5_keys = [x for x in files_md5.keys()]
n = 0
files_new_dic = {}
for file in files_dic:
n += 1
files_new_dic[n] = {}
files_new_dic[n]['file_pash'] = file
files_new_dic[n]['file_size'] = size
new_files_keys = [x for x in files_new_dic.keys()]
for i in files_keys:
print(i)
for j in file_md5_keys:
print(j)
for x in files_md5[j]:
n += 1
print(n + '.',end='')
print(x)
print('')
# 第四步 询问是否删除文件并删除
print('Delete files?')
while True:
choose_three = input()
if choose_three != 'yes' and choose_three != 'no':
print('Wrong')
continue
else:
break
if choose_three == 'yes':
print('Enter file numbers to delete:')
while True:
choose_four = input('(注意以空格隔开序号)')
x = choose_four.split(" ") # 以空格分割
y = []
z = []
for i in x:
if is_number(i) == -1:
if i > 0:
y.append(i)
if y == []:
print('请按规格输入')
continue
for i in y:
for j in new_files_keys:
if i == j:
z.append(i)
if z == []:
print('请输入正确的序号')
continue
del_size = 0
for i in z:
os.remove(i['file_pash'])
del_size += i['file_size']
print('删除的文件总大小为:' + del_size)
else:
sys.exit()
else:
sys.exit()