"%uXXXX" is a non-standard scheme for URL-encoding Unicode characters. Apparently it was proposed but never really used. As such, there's hardly any standard function that can decode it into an actual UTF-8 sequence. Show It's not too difficult to do it yourself though:
This converts the
PHP - convert unicode to character [duplicate]$string = '%u05E1%u05E2'; $string = preg_replace('/%u([0-9A-F]+)/', '&#x$1;', $string); echo html_entity_decode($string, ENT_COMPAT, 'UTF-8'); echo chr(hexdec("05E1")); var_dump(hexdec("%u05E1") == hexdec("05E1")); //true PHP Unicode Support in PHP Converting Unicode characters to their numeric value and/or HTML entities using PHPif (!function_exists('mb_internal_encoding')) { function mb_internal_encoding($encoding = NULL) { return ($from_encoding === NULL) ? iconv_get_encoding() : iconv_set_encoding($encoding); } } if (!function_exists('mb_convert_encoding')) { function mb_convert_encoding($str, $to_encoding, $from_encoding = NULL) { return iconv(($from_encoding === NULL) ? mb_internal_encoding() : $from_encoding, $to_encoding, $str); } } if (!function_exists('mb_chr')) { function mb_chr($ord, $encoding = 'UTF-8') { if ($encoding === 'UCS-4BE') { return pack("N", $ord); } else { return mb_convert_encoding(mb_chr($ord, 'UCS-4BE'), $encoding, 'UCS-4BE'); } } } if (!function_exists('mb_ord')) { function mb_ord($char, $encoding = 'UTF-8') { if ($encoding === 'UCS-4BE') { list(, $ord) = (strlen($char) === 4) ? @unpack('N', $char) : @unpack('n', $char); return $ord; } else { return mb_ord(mb_convert_encoding($char, 'UCS-4BE', $encoding), 'UCS-4BE'); } } } if (!function_exists('mb_htmlentities')) { function mb_htmlentities($string, $hex = true, $encoding = 'UTF-8') { return preg_replace_callback('/[\x{80}-\x{10FFFF}]/u', function ($match) use ($hex) { return sprintf($hex ? '&#x%X;' : '&#%d;', mb_ord($match[0])); }, $string); } } if (!function_exists('mb_html_entity_decode')) { function mb_html_entity_decode($string, $flags = null, $encoding = 'UTF-8') { return html_entity_decode($string, ($flags === NULL) ? ENT_COMPAT | ENT_HTML401 : $flags, $encoding); } } echo "Get string from numeric DEC value\n"; var_dump(mb_chr(50319, 'UCS-4BE')); var_dump(mb_chr(271)); echo "\nGet string from numeric HEX value\n"; var_dump(mb_chr(0xC48F, 'UCS-4BE')); var_dump(mb_chr(0x010F)); echo "\nGet numeric value of character as DEC string\n"; var_dump(mb_ord('ď', 'UCS-4BE')); var_dump(mb_ord('ď')); echo "\nGet numeric value of character as HEX string\n"; var_dump(dechex(mb_ord('ď', 'UCS-4BE'))); var_dump(dechex(mb_ord('ď'))); echo "\nEncode / decode to DEC based HTML entities\n"; var_dump(mb_htmlentities('tchüß', false)); var_dump(mb_html_entity_decode('tchüß')); echo "\nEncode / decode to HEX based HTML entities\n"; var_dump(mb_htmlentities('tchüß')); var_dump(mb_html_entity_decode('tchüß')); Get string from numeric DEC value string(4) "ď" string(2) "ď" Get string from numeric HEX value string(4) "ď" string(2) "ď" Get numeric value of character as DEC int int(50319) int(271) Get numeric value of character as HEX string string(4) "c48f" string(3) "10f" Encode / decode to DEC based HTML entities string(15) "tchüß" string(7) "tchüß" Encode / decode to HEX based HTML entities string(15) "tchüß" string(7) "tchüß" Utf8_encode5a6fc3ab PHP Unicode Support in PHP Converting Unicode characters to “\uxxxx” format using PHPif (!function_exists('codepoint_encode')) { function codepoint_encode($str) { return substr(json_encode($str), 1, -1); } } if (!function_exists('codepoint_decode')) { function codepoint_decode($str) { return json_decode(sprintf('"%s"', $str)); } } echo "\nUse JSON encoding / decoding\n"; var_dump(codepoint_encode("我好")); var_dump(codepoint_decode('\u6211\u597d')); Use JSON encoding / decoding string(12) "\u6211\u597d" string(6) "我好" How to Convert Arabic Characters to Unicode Using PHP<?php include('Arabic.php'); $Arabic = new Arabic('ArGlyphs'); $text = 'بسم الله الرحمن الرحيم'; $text = $Arabic->utf8Glyphs($text); echo $text; ?> json_encode('بهروز') // returns "\u0628\u0647\u0631\u0648\u0632" $string = 'بب'; // \u0628\u0628 $bidiString = fribidi_log2vis($string, FRIBIDI_LTR, FRIBIDI_CHARSET_UTF8); json_encode($bidiString); // \ufe90\ufe91 require_once('utf8.inc'); // http://hsivonen.iki.fi/php-utf8/ require_once('tcpdf.php'); // http://www.tcpdf.org/ $t = new TCPDF(); $text = 'بب'; $t->utf8Bidi(utf8ToUnicode($text)); // will return an array like array(0 => 65168, 1 => 65169) Unicode character in PHP stringstring str = "\u1000"; $unicodeChar = '\u1000'; echo json_decode('"'.$unicodeChar.'"'); echo mb_convert_encoding('က', 'UTF-8', 'HTML-ENTITIES'); echo mb_convert_encoding("\x10\x00", 'UTF-8', 'UTF-16BE'); $unicodeChar = "\u{1000}"; <?php echo("\x48\x65\x6C\x6C\x6F\x20\x57\x6F\x72\x6C\x64\x21"); ?> <?php header('content-type:text/html;charset=utf-16be'); echo("\x30\xA2"); ?> <?php header('content-type:text/html;charset=utf-16le'); echo("\xA2\x30"); ?> <?php header('content-type:text/html;charset=utf-8'); echo("\xE3\x82\xA2"); ?> function unicodeString($str, $encoding=null) { if (is_null($encoding)) $encoding = ini_get('mbstring.internal_encoding'); return preg_replace_callback('/\\\\u([0-9a-fA-F]{4})/u', create_function('$match', 'return mb_convert_encoding(pack("H*", $match[1]), '.var_export($encoding, true).', "UTF-16BE");'), $str); } function unicodeString($str, $encoding=null) { if (is_null($encoding)) $encoding = ini_get('mbstring.internal_encoding'); return preg_replace_callback('/\\\\u([0-9a-fA-F]{4})/u', function($match) use ($encoding) { return mb_convert_encoding(pack('H*', $match[1]), $encoding, 'UTF-16BE'); }, $str); } $str = unicodeString("\u1000"); html_entity_decode('エ', 0, 'UTF-8'); $str = utf8_chr( 0x1000 ); $str = utf8_chr( '\u1000' ); $str = utf8_chr( 4096 ); // Unicode Character 'HAIR SPACE' (U+200A) $htmlEntityChar = " "; $realChar = html_entity_decode($htmlEntityChar); $phpChar = "\xE2\x80\x8A"; echo 'Proof: '; var_dump($realChar === $phpChar); // bool(true) function str_encode_utf8binary($str) { /** @author Krinkle 2018 */ $output = ''; foreach (str_split($str) as $octet) { $ordInt = ord($octet); // Convert from int (base 10) to hex (base 16), for PHP \x syntax $ordHex = base_convert($ordInt, 10, 16); $output .= '\x' . $ordHex; } return $output; } function str_convert_html_to_utf8binary($str) { return str_encode_utf8binary(html_entity_decode($str)); } function str_convert_json_to_utf8binary($str) { return str_encode_utf8binary(json_decode($str)); } // Example for raw string: Unicode Character 'INFINITY' (U+221E) echo str_encode_utf8binary('∞') . "\n"; // \xe2\x88\x9e // Example for HTML: Unicode Character 'HAIR SPACE' (U+200A) echo str_convert_html_to_utf8binary(' ') . "\n"; // \xe2\x80\x8a // Example for JSON: Unicode Character 'HAIR SPACE' (U+200A) echo str_convert_json_to_utf8binary('"\u200a"') . "\n"; // \xe2\x80\x8a Unicode character in PHP string$unicodeChar = "\u{1000}"; $unicodeChar = '\u1000'; echo json_decode('"'.$unicodeChar.'"'); echo mb_convert_encoding('က', 'UTF-8', 'HTML-ENTITIES'); echo mb_convert_encoding("\x10\x00", 'UTF-8', 'UTF-16BE'); <?php echo("\x48\x65\x6C\x6C\x6F\x20\x57\x6F\x72\x6C\x64\x21"); ?> <?php header('content-type:text/html;charset=utf-16be'); echo("\x30\xA2"); ?> <?php header('content-type:text/html;charset=utf-16le'); echo("\xA2\x30"); ?> <?php header('content-type:text/html;charset=utf-8'); echo("\xE3\x82\xA2"); ?> Next Lesson PHP TutorialHow to convert Unicode to string in PHP?$string = '%u05E1%u05E2'; $string = preg_replace('/%u([0-9A-F]+)/', '&#x$1;', $string); echo html_entity_decode($string, ENT_COMPAT, 'UTF-8'); This converts the %uXXXX notation to HTML entity notation &#xXXXX; , which can be decoded to actual UTF-8 by html_entity_decode .
How to get Unicode of character in PHP?In PHP, we can use the mb_ord() function to get the Unicode code point value of a given character. This function is supported in PHP 7 or higher versions. The mb_ord() function complements the mc_chr() function.
How to print Unicode characters in PHP?The best way is to tell the browser that UTF-8 is being used by sending the corresponding HTTP header: header("content-type: text/html; charset=UTF-8"); Then, you can leave the rest of your code as-is and don't have to html-encode entities or create other mess.
Is UTFUTF-8 is a Unicode character encoding method. This means that UTF-8 takes the code point for a given Unicode character and translates it into a string of binary. It also does the reverse, reading in binary digits and converting them back to characters.
|