想着用python绘制某目录的树状图,一开始想到了用grapgviz,因为去年离职的时候整理文档,用graphviz画过代码调用结构图。graphviz有一门自己的语言DOT,dot很简单,加点加边设置属性就这点东西,而且有python接口。
我在ubuntu下,先要安装graphviz软件,官网有deb包,然后python安装pygraphviz模块。
目标功能是输入一个路径,输出该路径下的目录结构以及文件和文件夹的大小
# -*- coding:utf-8 -*-
import pygraphviz as pgv
import os
from os.path import getsize, join
import sys
import Queue
reload(sys)
sys.setdefaultencoding('utf8')
class DrawDirectorTree():
"""
绘制目录结构图,用树的形式
"""
@classmethod
def getdirsize(cls, dir):
"""
获取文件夹大小
:param dir:
:return: 返回尺寸
"""
size = 0L
for root, dirs, files in os.walk(dir):
size += sum([getsize(join(root, name)) for name in files])
return size
@classmethod
def draw_director_tree(cls, input_path):
"""
深度遍历一个目录,绘制目录树形图
:param input_path: 目标目录
:return:
"""
if (not os.path.exists(input_path)) or (not os.path.isdir(input_path)):
print "Input_path Error!"
return None
# 用队列做BFS
director_queue = Queue.Queue()
director_queue.put(input_path)
# 初始化一个图
tree_graph = pgv.AGraph(directed=True, strict=True)
tree_graph.node_attr['style'] = 'filled'
tree_graph.node_attr['shape'] = 'square'
tree_graph.add_node(input_path + "\n" + str(os.path.getsize(input_path)))
while not director_queue.empty():
new_parent = director_queue.get()
if os.path.isdir(new_parent):
child_list = os.listdir(new_parent)
for child in child_list:
full_child = join(new_parent, child)
if os.path.isfile(full_child):
new_parent_lable = new_parent + "\n" + str(os.path.getsize(new_parent))
child_lable = full_child + "\n" + str(os.path.getsize(full_child))
tree_graph.add_node(child_lable)
tree_graph.add_edge(new_parent_lable, child_lable)
elif os.path.isdir(full_child):
new_parent_lable = new_parent + "\n" + str(os.path.getsize(new_parent))
child_lable = full_child + "\n" + str(os.path.getsize(full_child))
tree_graph.add_node(child_lable)
tree_graph.add_edge(new_parent_lable, child_lable)
director_queue.put(full_child)
tree_graph.graph_attr['epsilon'] = '0.001'
print tree_graph.string() # print dot file to standard output
tree_graph.write('director_tree.dot')
tree_graph.layout('dot') # layout with dot
tree_graph.draw('director_tree.png') # write to file
DrawDirectorTree.draw_director_tree('/home/aron/workspace/python_space/')
运行会生成一个dot文件,就是有向图的结构文件,还有存为一个png图
事实上效果不太好,小的目录可以搞定,大的目录就耗时太长了,主要是计算文件夹尺寸本身就很耗时,而且无法生成巨大的结构图。
之后发现networkx模块的使用方式几乎与graphviz一样,add_node、add_edge,就写了一版networkx的。
# -*- coding:utf-8 -*-能够解决稍大一点的目录,主要是生成网络图片比graphviz快一点,能展示稍微复杂一点的网络结构比如
import os
from os.path import getsize, join
import sys
import Queue
import networkx as nx
import matplotlib.pyplot as plt
reload(sys)
sys.setdefaultencoding('utf8')
class DirectorTreeNetworkx():
"""
用networkx绘制目录结构图
"""
@classmethod
def getdirsize(cls, dir):
"""
获取文件夹大小
:param dir:
:return: 返回尺寸
"""
size = 0L
for root, dirs, files in os.walk(dir):
size += sum([getsize(join(root, name)) for name in files])
return size
@classmethod
def draw_director_tree(cls, input_path):
"""
深度遍历一个目录,绘制目录树形图
:param input_path: 目标目录
:return:
"""
if (not os.path.exists(input_path)) or (not os.path.isdir(input_path)):
print "Input_path Error!"
return None
# 用队列做BFS
director_queue = Queue.Queue()
director_queue.put(input_path)
# 初始化一个图
tree_graph = nx.DiGraph()
tree_graph.add_node(input_path + "\n" + str(os.path.getsize(input_path)))
while not director_queue.empty():
new_parent = director_queue.get()
if os.path.isdir(new_parent):
child_list = os.listdir(new_parent)
for child in child_list:
full_child = join(new_parent, child)
if os.path.isfile(full_child):
new_parent_lable = new_parent + "\n" + str(os.path.getsize(new_parent))
child_lable = full_child + "\n" + str(os.path.getsize(full_child))
tree_graph.add_node(child_lable)
tree_graph.add_edge(new_parent_lable, child_lable)
elif os.path.isdir(full_child):
new_parent_lable = new_parent + "\n" + str(os.path.getsize(new_parent))
child_lable = full_child + "\n" + str(os.path.getsize(full_child))
tree_graph.add_node(child_lable)
tree_graph.add_edge(new_parent_lable, child_lable)
director_queue.put(full_child)
nx.draw(tree_graph)
plt.show()
这个也就玩一玩,真正做目录结构展示还得想别的办法,同时不能期望在获取结构的同时计算文件夹大小。
上面的代码计算文件夹大小的策略也有点问题,存在重复计算,算一次就够了。同时还可以先构建完结构自底向上的计算目录大小,这样就避免每计算一层目录都要walk一遍求和了,下层文件大小只计算一次就行了。