如下所示:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
|
# -*- coding: UTF-8 -*-
import jieba.posseg
import tensorflow as tf
import pandas as pd
import csv
import math
"""
1.必須獲取CSV文件夾(ID:文本)
2.返回(ID:分词后的文本)
"""
flags = tf.app.flags
flags.DEFINE_string( "train_file_address" , "D:/NLPWORD/cut_word_test/hzytest.csv" , "添加训练数据文件" )
flags.DEFINE_string( "result_file_address" , "D:/NLPWORD/cut_word_test/hzytest_result.csv" , "生成结果数据文件" )
FLAGS = tf.app.flags.FLAGS
def cut_word(train_data):
"""
把数据按照行进行遍历,然后把结果按照行写在csv中
:return:分词结果list
"""
jieba.load_userdict( "newdict.txt" )
with open (FLAGS.result_file_address, "w" , encoding = 'utf8' ) as csvfile:
writer = csv.writer(csvfile)
for row in train_data.index:
datas = train_data.loc[row].values[ 1 ]
if isinstance (datas, str ) or not math.isnan(datas):
words = jieba.posseg.cut(datas)
line = ''
for word in words:
line = line + word.word + " "
writer.writerow([train_data.loc[row].values[ 0 ], line])
def main(_):
data = pd.read_csv(FLAGS.train_file_address)
cut_word(data)
if __name__ = = "__main__" :
tf.app.run(main)
|
以上这篇python处理csv中的空值方法就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持服务器之家。
原文链接:https://blog.csdn.net/baidu_15113429/article/details/78615213