php检测文件内容编码的方法

时间:2023-03-08 15:39:31

核心用到的是mb_convert_encoding函数,示例代码如下:

<?php
header("Content-type: text/html; charset=utf-8"); /**
* 获取内容的编码
* @param string $str
*/
function get_encoding($str = "") {
$encodings = array (
'ASCII',
'UTF-8',
'GBK'
);
foreach ( $encodings as $encoding ) {
if ($str === mb_convert_encoding ( mb_convert_encoding ( $str, "UTF-32", $encoding ), $encoding, "UTF-32" )) {
return $encoding;
}
}
return false;
}
/**
*
* 检测utf-8内容是否含有BOM头信息
* @param string $str
*/
function utf8_has_bom($str) {
$chars = substr ( $str, 0, 3 );
$bom = chr ( 0xEF ) . chr ( 0xBB ) . chr ( 0xBF );
return $chars === $bom;
} // $filename = 'C:\Users\Administrator\Desktop\test.txt';
// $filename = 'C:\Users\Administrator\Desktop\ansi.txt';
// $filename = 'C:\Users\Administrator\Desktop\Unicode.txt';
$filename = 'C:\Users\Administrator\Desktop\my.txt'; // 检测编码
$str = file_get_contents($filename);
$encode = get_encoding($str);
var_dump($encode); // 转换成"UTF-8"编码
if($encode != 'UTF-8'){
$str = mb_convert_encoding($str, "UTF-8", $encode);
} var_dump($str);

参考文档:https://gist.github.com/welefen/7746175