, HTML (, <body> </body> <div> </div>), 5 XML (&, <, > , ", '), OWASP (/). PHP htmlentities() , str_replace() :
function makeHTMLSafe($string) {
$string = htmlentities($string, ENT_QUOTES, 'UTF-8');
$string = str_replace('/', '/', $string);
return $string;
}
, , tainted HTML, href= <a, ([ ]% * +, -/; <= > ^ |) - HTML:
function makeHTMLAttributeSafe($string) {
$scaryCharacters = array(32, 37, 42, 43, 44, 45, 47, 59, 60, 61, 62, 94, 124);
$translationTable = array();
foreach ($scaryCharacters as $num) {
$hex = str_pad(dechex($num), 2, '0', STR_PAD_LEFT);
$translationTable[chr($num)] = '&#x' . $hex . ';';
}
$string = strtr($string, $translationTable);
return $string;
}
- UTF-8 - UTF-8 HTML. , , UTF-8, , :
function assertValidUTF8($string) {
if (strlen($string) AND !preg_match('/^.{1}/us', $string)) {
die;
}
return $string;
}
u Unicode. chararchter, ., , Unicode.
, - . , .
OWASP XSS.