glob库是最简单的模块之一,内容非常少。用它可以查找符合特定规则的文件路径名。跟使用 windows 下的文件搜索差不多。查找文件只用到三个匹配符: ”*”, 匹配 0 个或多个字符; “?”, ”?”匹配单个字符; “[]”:”[]”匹配指定范围内的字符,如:[0-9]匹配数字; 参考文章:原文:https://blog.csdn.net/lanchunhui/article/details/70170813
import glob print(glob.glob(r"*")) user_info =glob.glob(r"C:\Users\Administrator\Desktop\*") import pprint pprint.pprint(user_info)
获取后缀为.lnk的文件
user_info =glob.glob(r"C:\Users\Administrator\Desktop\*.lnk")
获取后缀为.jpg文件
user_info =glob.glob(r"C:\Users\Administrator\Desktop\*.jpg")
user_info =glob.glob(r"C:\Users\Administrator\*\*.jpg")
['C:\\Users\\Administrator\\Desktop\\Google Chrome.lnk', 'C:\\Users\\Administrator\\Desktop\\Postman.lnk', 'C:\\Users\\Administrator\\Desktop\\RedisDesktopManager.lnk', 'C:\\Users\\Administrator\\Desktop\\Visual Studio Code.lnk', 'C:\\Users\\Administrator\\Desktop\\有道云笔记.lnk', 'C:\\Users\\Administrator\\Links\\2345Downloads.lnk', 'C:\\Users\\Administrator\\Links\\Desktop.lnk', 'C:\\Users\\Administrator\\Links\\Downloads.lnk', 'C:\\Users\\Administrator\\Links\\RecentPlaces.lnk']
['mod_study_project', 'redis_cli_project', 'redis_project'] ['C:\\Users\\Administrator\\Desktop\\Book7.xlsx', 'C:\\Users\\Administrator\\Desktop\\desktop.ini', 'C:\\Users\\Administrator\\Desktop\\down_excel.txt', 'C:\\Users\\Administrator\\Desktop\\Google Chrome.lnk', 'C:\\Users\\Administrator\\Desktop\\inmemorytozip', 'C:\\Users\\Administrator\\Desktop\\KdApiSearchDemo.php', 'C:\\Users\\Administrator\\Desktop\\Postman.lnk', 'C:\\Users\\Administrator\\Desktop\\python', 'C:\\Users\\Administrator\\Desktop\\python.zip', 'C:\\Users\\Administrator\\Desktop\\redis 和其他数据库的对比.xlsx', 'C:\\Users\\Administrator\\Desktop\\RedisDesktopManager.lnk', 'C:\\Users\\Administrator\\Desktop\\shopping', 'C:\\Users\\Administrator\\Desktop\\Visual Studio Code.lnk', 'C:\\Users\\Administrator\\Desktop\\vscode_workspace', 'C:\\Users\\Administrator\\Desktop\\webserver', 'C:\\Users\\Administrator\\Desktop\\我的文件', 'C:\\Users\\Administrator\\Desktop\\新建文本文档.txt', 'C:\\Users\\Administrator\\Desktop\\有道云笔记.lnk', 'C:\\Users\\Administrator\\Desktop\\有道云笔记网页剪报.url', 'C:\\Users\\Administrator\\Desktop\\项目 注意']
#在windows 环境 会存在重复 统计的情况
extensions=[]
if os.name=='nt':
extensions=['jpg','jpeg']
else:
extensions = ['jpg', 'jpeg', 'JPG', 'JPEG']
file_list = [] dir_name = './data/' for extension in extensions: file_glob = os.path.join(INPUT_DATA, dir_name, "*." + extension) file_list.extend(glob.glob(file_glob)) print("Size of this file_list is :",len(file_list))
参考文章 原文:https://blog.csdn.net/qq_17753903/article/details/82180227