//1:获取中英文混合字符的编码(ASCII码与UNICODE码)
$str = "官网制作:网巢网络";
foreach (math($str) as $key => $value) {
if (preg_match('/^[x{4e00}-x{9fa5}]+$/u',$value)) {//中文,注意中文这里暂时不转换,因为无法通过chr函数来获取中文,
$arr_chr[]=unicode_encode($value);
} else {//英文
$arr_chr[]=asc($value);
}
}
//再将数组转换成字符串
$strs = implode(",", $arr_chr);
//2:将上面的字符串解析成原来的字符串
$arr_chr = explode(",", $strs);
foreach ($arr_chr as $key => $value) {
if(is_numeric($value))//如果是数字将ASCII码转换成英文字符
{
echo chr($value);
}else{//否则将UNICODE编码后的内容进行解码成汉字
echo unicode_decode($value);
}
}
//函数
//php将字符串分割成数组实现中文分词
function math($string,$code ='UTF-8'){
if ($code == 'UTF-8') {
$pa = "/[x01-x7f]|[xc2-xdf][x80-xbf]|xe0[xa0-xbf][x80-xbf]|[xe1-xef][x80-xbf][x80-xbf]|xf0[x90-xbf][x80-xbf][x80-xbf]|[xf1-xf7][x80-xbf][x80-xbf][x80-xbf]/";
} else {
$pa = "/[x01-x7f]|[xa1-xff][xa1-xff]/";
}
preg_match_all($pa, $string, $t_string);
$math="";
foreach($t_string[0] as $k=>$s){
$math[]=$s;
}
return $math;
}
//转换成ascii码,注意其实中文是没有ascii码的,所以最好将中文转成UNICODE编码
function asc($s) {
if(ord($s) < 128) return ord($s);
//return current(unpack('N', "xffxff$s"));
}
//将中文进行UNICODE编码,编码后的内容格式:u56feu7247
function unicode_encode($name)
{
$name = iconv('UTF-8', 'UCS-2', $name);
$len = strlen($name);
$str = '';
for ($i = 0; $i < $len - 1; $i = $i + 2)
{
$c = $name[$i];
$c2 = $name[$i + 1];
if (ord($c) > 0)
{ // 两个字节的文字
$str .= 'u'.base_convert(ord($c), 10, 16).base_convert(ord($c2), 10, 16);
}
else
{
$str .= $c2;
}
}
return $str;
}
// 将UNICODE编码后的内容进行解码变成汉字
function unicode_decode($name)
{
// 转换编码,将Unicode编码转换成可以浏览的utf-8编码
$pattern = '/([w]+)|(\u([w]{4}))/i';
preg_match_all($pattern, $name, $matches);
if (!empty($matches))
{
$name = '';
for ($j = 0; $j < count($matches[0]); $j++)
{
$str = $matches[0][$j];
if (strpos($str, '\u') === 0)
{
$code = base_convert(substr($str, 2, 2), 16, 10);
$code2 = base_convert(substr($str, 4), 16, 10);
$c = chr($code).chr($code2);
$c = iconv('UCS-2', 'UTF-8', $c);
$name .= $c;
}
else
{
$name .= $str;
}
}
}
return $name;
}