文件名称:"结巴"分词的Node.js版本nodejieba.zip
文件大小:4.12MB
文件格式:NONE
更新时间:2022-08-07 21:29:39
开源项目
NodeJieba "结巴"分词的Node.js版本Introduction NodeJieba只是CppJieba简单包装而成的node扩展,用来进行中文分词。 详见NodeJiebaBlogInstallnpm install nodejieba 因为npm速度很慢而且经常因为墙的原因出现莫名其妙的问题,在此强烈建议使用cnpm,命令如下:npm --registry=http://r.cnpmjs.org install nodejieba默认分词算法初始化var segment = require("nodejieba"); segment.loadDict("./node_modules/nodejieba/dict/jieba.dict.utf8", "./node_modules/nodejieba/dict/hmm_model.utf8");阻塞式调用var wordList = segment.cutSync("阻塞模式分词"); if (wordList.constructor == Array) // just for tutorial, this is always be true { wordList.forEach(function(word) { console.log(word); }); }非阻塞式调用segment.cut("非阻塞模式分词", function(wordList) { wordList.forEach(function(word) { console.log(word); }); });初始化var segment = require("nodejieba"); segment.queryLoadDict("./node_modules/nodejieba/dict/jieba.dict.utf8", "./node_modules/nodejieba/dict/hmm_model.utf8");阻塞式调用var wordList = segment.queryCutSync("阻塞模式分词"); if (wordList.constructor == Array) // just for tutorial, this is always be true { wordList.forEach(function(word) { console.log(word); }); }非阻塞式调用segment.queryCut("非阻塞模式分词", function(wordList) { wordList.forEach(function(word) { console.log(word); }); }); 具体用法可以参考 test/segment.js test/query_segment.jsTesting 在node v0.10.2下测试通过http://cppjieba-webdemo.herokuapp.com/ (chrome is suggested)ThanksJieba中文分词 标签:nodejieba
【文件预览】:
nodejieba-master
----.travis.yml(552B)
----index.js(2KB)
----README_EN.md(4KB)
----package.json(940B)
----deps()
--------limonp()
--------cppjieba()
----test()
--------testdata()
--------test.js(7KB)
--------load_dict_test.js(360B)
--------demo.js(1KB)
--------load_dict_demo.js(509B)
----.npmignore(182B)
----typescript_demo.ts(872B)
----tsconfig.json(5KB)
----dict()
--------hmm_model.utf8(508KB)
--------jieba.dict.utf8(4.84MB)
--------user.dict.utf8(49B)
--------idf.utf8(5.72MB)
--------stop_words.utf8(9KB)
----.gitignore(115B)
----lib()
--------nodejieba.cpp(9KB)
--------nodejieba.h(492B)
--------index.cpp(2KB)
--------utils.h(2KB)
----types()
--------index.d.ts(848B)
----README.md(6KB)
----ChangeLog.md(6KB)
----binding.gyp(790B)