文件名称:中文分词开源项目 JAVA中文分词
文件大小:382KB
文件格式:RAR
更新时间:2012-09-04 13:51:46
java,JAVA中文分词,项目
import WordSegment.*;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.awt.*;
import java.io.File;
import java.util.Vector;
import javax.swing.*;
/**
*
*/
/**
* @author Truman
*
*/
public class WordSegDemoFrame extends JFrame implements ActionListener {
final static int ALGO_FMM = 1;
final static int ALGO_BMM = 2;
private JMenuBar menuBar = new JMenuBar();
private JMenuItem openDicItem, closeItem;
private JRadioButtonMenuItem fmmItem, bmmItem;
private JMenuItem openTrainFileItem, saveDicItem, aboutItem;
private JButton btSeg;
private JTextField tfInput;
private JTextArea taOutput;
private JPanel panel;
JLabel infoDic, infoAlgo;
private WordSegment seger;
private DicTrainer trainer = new DicTrainer();
private void initFrame()
{
setTitle("Mini分词器");
setDefaultCloseOperation(EXIT_ON_CLOSE);
setJMenuBar(menuBar);
JMenu fileMenu = new JMenu("文件");
JMenu algorithmMenu = new JMenu("分词算法");
JMenu trainMenu = new JMenu("训练语料");
JMenu helpMenu = new JMenu("帮助");
openDicItem = fileMenu.add("载入词典");
fileMenu.addSeparator();
closeItem = fileMenu.add("退出");
algorithmMenu.add(fmmItem = new JRadioButtonMenuItem("正向最大匹配", true));
algorithmMenu.add(bmmItem = new JRadioButtonMenuItem("逆向最大匹配", false));
ButtonGroup algorithms = new ButtonGroup();
algorithms.add(fmmItem);
algorithms.add(bmmItem);
openTrainFileItem = trainMenu.add("载入并训练语料");
saveDicItem = trainMenu.add("保存词典");
aboutItem = helpMenu.add("关于Word Segment Demo");
menuBar.add(fileMenu);
menuBar.add(algorithmMenu);
menuBar.add(trainMenu);
menuBar.add(helpMenu);
openDicItem.addActionListener(this);
closeItem.addActionListener(this);
openTrainFileItem.addActionListener(this);
saveDicItem.addActionListener(this);
aboutItem.addActionListener(this);
fmmItem.addActionListener(this);
bmmItem.addActionListener(this);
JPanel topPanel = new JPanel();
topPanel.setLayout(new FlowLayout());
JPanel centerPanel = new JPanel();
centerPanel.setLayout(new GridLayout());
JPanel bottomPanel = new JPanel();
this.getContentPane().add(topPanel, BorderLayout.NORTH);
this.getContentPane().add(centerPanel, BorderLayout.CENTER);
this.getContentPane().add(bottomPanel, BorderLayout.SOUTH);
btSeg = new JButton("分词");
tfInput = new JTextField("", 30);
taOutput = new JTextArea();
topPanel.add(tfInput);
topPanel.add(btSeg);
centerPanel.add(taOutput);
infoDic = new JLabel();
infoAlgo = new JLabel();
bottomPanel.add(infoDic);
bottomPanel.add(infoAlgo);
saveDicItem.setEnabled(false);
btSeg.addActionListener(this);
}
public WordSegDemoFrame()
{
initFrame();
seger = new WordSegment();
loadDic("dic.dat");
setAlgo(ALGO_FMM);
}
private void loadDic(String fileName)
{
seger.SetDic(fileName);
infoDic.setText("词典 " + fileName + "已载入");
}
private void setAlgo(int type)
{
String algo = null;
switch(type)
{
case ALGO_FMM:
seger.setStrategy(new FMM());
algo = "FMM";
break;
case ALGO_BMM:
seger.setStrategy(new BMM());
algo = "BMM";
break;
}
infoAlgo.setText("分词算法:" + algo);
}
private File openFile()
{
JFileChooser chooser = new JFileChooser();
int ret = chooser.showOpenDialog(this);
if (ret != JFileChooser.APPROVE_OPTION) {
return null;
}
File f = chooser.getSelectedFile();
if (f.isFile() && f.canRead())
{
return f;
}
else
{
JOptionPane.showMessageDialog(this,
"Could not open file: " + f,
"Error opening file",
JOptionPane.ERROR_MESSAGE);
return null;
}
}
private void trainDic(File f)
{
trainer.Train(f.getAbsolutePath());
seger.SetDic(trainer.getDic());
infoDic.setText("训练完成,新的词典已载入");
saveDicItem.setEnabled(true);
}
private void saveDic()
{
JFileChooser chooser = new JFileChooser();
int ret = chooser.showSaveDialog(this);
if (ret != JFileChooser.APPROVE_OPTION) {
return;
}
File f = chooser.getSelectedFile();
trainer.SaveDic(f.getAbsolutePath());
infoDic.setText("词典已保存到" + f.getAbsolutePath());
}
/* (non-Javadoc)
* @see java.awt.event.ActionListener#actionPerformed(java.awt.event.ActionEvent)
*/
public void actionPerformed(ActionEvent e) {
if(e.getSource() == openDicItem)
{
File dicFile = openFile();
if(dicFile == null)
return;
loadDic(dicFile.getAbsolutePath());
saveDicItem.setEnabled(false);
return;
}
if(e.getSource() == closeItem)
{
dispose();
System.exit(0);
return;
}
if(e.getSource() == openTrainFileItem)
{
File trainFile = openFile();
if(trainFile == null)
return;
else
trainDic(trainFile);
return;
}
if(e.getSource() == saveDicItem)
{
saveDic();
return;
}
if(e.getSource() == aboutItem)
{
JOptionPane.showMessageDialog(this, "作者:Truman\nemail: trumanhe@gmail.com", "关于Mini分词器",
JOptionPane.INFORMATION_MESSAGE);
return;
}
if(e.getSource() == fmmItem)
{
setAlgo(ALGO_FMM);
return;
}
if(e.getSource() == bmmItem)
{
setAlgo(ALGO_BMM);
return;
}
if(e.getSource() == btSeg)
{
String sentence = tfInput.getText();
Vector vec = seger.Segment(sentence);
taOutput.setText("");
for(int i=0;i
【文件预览】:
Mini分词
----Mini分词器文档.pdf(155KB)
----bin()
--------WordSegment()
--------dic.dat(731KB)
--------WordSegDemoFrame.class(7KB)
----run.bat(29B)
----语料示例.txt(420B)
----Copy (2) of New Folder()
----src()
--------WordSegment()
--------WordSegDemoFrame.java(6KB)
----Copy (3) of New Folder()
----Copy of New Folder()
----计算机编程网.txt(268B)
----New Folder()