本文实例为大家分享了英文单词统计器php 实现,供大家参考,具体内容如下
程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计 Statistics"钮, 即可得到按字母顺序列出的所有单词,及其出现的次数
用于测试的数据文档: data.txt
驱动程序:word.php
output.php 和 StringTokenizer.php 是 要求在同一个文件夹中的程序
1. words_statistics_PHP.png
2. word.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
|
< html >
< style >
td{
background-color:#CF6;
width:100px;
margin:5px;
}
</ style >
< body >
<? php
/**
* 程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计"钮,
* 即可得到按字母顺序列出的所有单词,及其出现的次数
*
* 作者: 许同春 author Tongchun Xu
* @开源中国 Open Source, Chna communiity
* 完成日期:2016年6月10日 completion date: 10 June, 2016
*/
require("StringTokenizer.php");
require("output.php");
if($_POST['submit']){
if ($_FILES["file"]["error"] > 0)
echo "Error: " . $_FILES["file"]["error"] . "< br />";
else {
$myfile = fopen($_FILES["file"]["tmp_name"], "r") or die("Unable to open file!");
$str = fread($myfile,filesize($_FILES["file"]["tmp_name"]));
$delim = "?\\,. /:!\"()\t\n\r\f%";
$st = new StringTokenizer($str, $delim);
echo '找到字符串: '.$st->countTokens();
$list=new LinkedList();
while ($st->hasMoreTokens()) {
$list->orderInsert($st->nextToken());
}
$list->words_count();
$list->traversal();
fclose($myfile);
}
}
?>
< h2 >英文文档单词统计 Statistics on English words </ h2 >
< p >程序开始运行, 按"浏览"钮选择一个英文文档, 再按"统计 Statistics"钮,
即可得到按字母顺序列出的所有单词,及其出现的次数 </ p >
< form action = "word.php" method = "post"
enctype = "multipart/form-data" >
< label for = "file" >英文文档名 File Name:</ label >
< input type = "file" name = "file" id = "file" />
< input type = "submit" name = "submit" value = "统计 Statistics" />
</ form >
</ body >
</ html >
|
3. output.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
|
<meta charset= "utf-8" />
<?
/**
* The class LinkedList allows an application to store strings in
* alphabetical order by calling orderInsert().
* 此处定义的 LinkedList 类,可以调用它的 方法 orderInsert(),来以字母
* 大小的顺序储存 英文字符串。
* 同时记录 英文单词出现的次数
* 作者: 许同春 author Tongchun Xu
* @开源中国 Open Source, China communiity
* 完成日期:2016年6月10日 completion date: 10 June, 2016
*/
class Node{
public $data ;
public $frequency ;
public $next ;
function __construct( $data , $next = null, $frequency = 1){
$this ->data = $data ; //英文字符串
$this ->next = $next ; //指向后继结点的指针
$this ->frequency= $frequency ; //英文字符串出现的次数
}
}
class LinkedList{
private $head ; //单链表的头结点,不存储数据
function __construct(){ //单链表的构造方法
//头结点的数据为"傀儡", 不代表 任何数据
$this ->head = new Node( "dummy 傀儡" );
$this ->first = null;
}
function isEmpty(){
return ( $this ->head->next == null);
}
/* orderInsert($data) 方法,
* 按给定字符串 $data 的大小, 将其安插到适当的位置,
* 以保证单链表中字符串的存储,始终是有序的。
*/
function orderInsert( $data ){
$p = new Node( $data );
if ( $this ->isEmpty()){
$this ->head->next = $p ;
}
else {
$node = $this ->find( $data );
if (! $node ){
$q = $this ->head;
while ( $q ->next != NULL && strcmp ( $data , $q ->next->data)> 0 ){
$q = $q ->next;
}
$p ->next = $q ->next;
$q ->next = $p ;
} else
$node ->frequency++;
}
}
function insertLast( $data ){ //将字符串插到单链表的尾部
$p = new Node( $data );
if ( $this ->isEmpty()){
$this ->head->next = $p ;
}
else {
$q = $this ->head->next;
while ( $q ->next != NULL)
$q = $q ->next;
$q ->next = $p ;
}
}
function find( $value ){ //查询是否有给定的字符串
$q = $this ->head->next;
while ( $q ->next != null){
if ( strcmp ( $q ->data, $value )==0){
break ;
}
$q = $q ->next;
}
if ( $q ->data == $value )
return $q ;
else
return null;
}
function traversal(){ //遍历单链表
if (! $this ->isEmpty()){
$p = $this ->head->next;
echo "输出结果:<table><tr>" ;
echo "<td>" . $p ->data. "<br>出现次数:" . $p ->frequency. "</td>" ;
$n =1;
while ( $p ->next != null){
$p = $p ->next;
echo "<td>" . $p ->data. "<br>出现次数:" . $p ->frequency. "</td>" ;
$n ++;
if ( $n %11==0) echo "</tr><tr>" ;
}
echo "</tr></table>" ;
} else
echo "链表为空!" ;
}
function words_count(){
if ( $this ->isEmpty())
echo "<br>没有储存字符串 <br>" ;
else {
$counter =0;
$p = $this ->head->next;
while ( $p ->next != null){
$p = $p ->next;
$counter ++;
};
echo "***共有单词 " . $counter . " 个***" ;
}
}}
?>
|
4. StringTokenizer.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
<?php
/**
* The string tokenizer class allows an application to break a string into tokens.
*
* @author Azeem Michael
* @example The following is one example of the use of the tokenizer. The code:
* <code>
* <?php
* $str = "this is:@\t\n a test!";
* $delim = " !@:'\t\n\0"; // remove these chars
* $st = new StringTokenizer($str, $delim);
* echo 'Total tokens: '.$st->countTokens().'<br/>';
* while ($st->hasMoreTokens()) {
* echo $st->nextToken() . '<br/>';
* }
* prints the following output:
* Total tokens: 4
* this
* is
* a
* test
* ?>
* </code>
*/
class StringTokenizer {
/** @var string
*/
private $string ;
/** @var string
*/
private $token ;
/** @var string
*/
private $delim ;
/**
* Constructs a string tokenizer for the specified string.
* @param string $str String to tokenize
* @param string $delim The set of delimiters (the characters that separate tokens)
* specified at creation time, default to " \n\r\t\0"
*/
public function __construct( $str , $delim = " \n\r\t\0" ) {
$this ->string = $str ;
$this ->delim = $delim ;
$this ->token = strtok ( $str , $delim );
}
/**
* Destructor to prevent memory leaks
*/
public function __destruct() {
unset( $this );
}
/**
* Calculates the number of times that this tokenizer's nextToken method can
* be called before it generates an exception
* @return int - number of tokens
*/
public function countTokens() {
$counter = 0;
while ( $this ->hasMoreTokens()) {
$counter ++;
$this ->nextToken();
}
$this ->token = strtok ( $this ->string, $this ->delim);
return $counter ;
}
/**
* Tests if there are more tokens available from this tokenizer's string. It
* does not move the internal pointer in any way. To move the internal pointer
* to the next element call nextToken()
* @return boolean - true if has more tokens, false otherwise
*/
public function hasMoreTokens() {
return ( $this ->token !== false);
}
/**
* Returns the next token from this string tokenizer and advances the internal
* pointer by one.
* @return string - next element in the tokenized string
*/
public function nextToken() {
$hold = $this ->token; //hold current pointer value
$this ->token = strtok ( $this ->delim); //increment pointer
return $hold ; //return current pointer value
}
}
?>
|
以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持服务器之家。