. */ /* How to use ? $g2x = new Garbage2xhtml(); // Set some options $g2x->remove_forbidden_tags = false; $g2x->allowed_tags = array('p' => array('align'), 'br' => array(), 'img' => array('src', 'alt')); echo $g2x->Process($user_text); */ if (!defined('ENABLE_DEBUG')) define('ENABLE_DEBUG', false); if (!function_exists('debugLog')) { function debugLog($section, $message) { if (defined('ENABLE_DEBUG') && ENABLE_DEBUG === true) { if (empty($GLOBALS['_debug_log'])) $GLOBALS['_debug_log'] = array(); $GLOBALS['_debug_log'][] = array($section, $message); } } } class Garbage2xhtml { // Allowed tags and attributes // Minimal set (default) var $allowed_tags = array( // 'tag' => array of allowed attributes 'strong'=> array(), 'ul' => array(), 'li' => array(), 'ol' => array(), 'p' => array(), 'em' => array(), 'h3' => array(), 'h4' => array(), 'h5' => array(), 'pre' => array(), 'a' => array('href'), 'img' => array('src') ); // Complete xHTML set (uncomment to enable) /* var $allowed_tags = array( // Inline 'strong'=> array('class'), 'em' => array('class'), 'sup' => array('class'), 'sub' => array('class'), 'span' => array('class'), 'abbr' => array('class', 'title'), 'acronym' => array('class', 'title'), 'a' => array('class', 'href'), 'img' => array('class', 'src'), 'code' => array('class'), 'cite' => array('class'), 'del' => array('class'), 'ins' => array('class'), 'kbd' => array('class'), 'samp' => array('class'), // Block 'p' => array('class'), 'blockquote' => array('class'), 'ul' => array('class'), 'li' => array('class'), 'ol' => array('class'), 'h3' => array('class'), 'h4' => array('class'), 'h5' => array('class'), 'pre' => array('class'), 'dl' => array('class'), 'dt' => array('class'), 'dd' => array('class'), 'hr' => array('class'), // Table elements 'table' => array('class'), 'caption' => array('class'), 'col' => array('class'), 'colgroup' => array('class'), 'thead' => array('class'), 'tbody' => array('class'), 'tfoot' => array('class'), 'tr' => array('class'), 'td' => array('class', 'rowspan', 'colspan'), 'th' => array('class', 'rowspan', 'colspan'), ); */ // If this option is set to false, all non-allowed tags will be enties-ed var $remove_forbidden_tags = true; // Internal use only var $opened_tags = array(); var $text = ''; var $start_pos = false; var $end_pos = false; var $mytags = array(); function Process($string) { debugLog('g2x', 'Processing text ('.strlen($string).' car.)'); // Handling non standard line breaks $this->text = preg_replace('/<(br\s*\/?)>/i', '
', $string); $this->text = preg_replace('/\r/', '', $this->text); // Handling non standard img tags $this->text = preg_replace('/]*[^\/])>/i', '', $this->text); $this->text = str_replace('&', '&', $this->text); $this->text = $this->cleanEntities($this->text); // Clean stupid use of minus signs $this->text = preg_replace('/<+-+/', '←', $this->text); $this->text = preg_replace('/-+>+/', '→', $this->text); $this->text = preg_replace('/<<+/', '«', $this->text); $this->text = preg_replace('/>>+/', '»', $this->text); // Browse tags while ($tag = $this->getNextTag()) { // If tag isn't valid and already deleted skip to next if ($tag === true) continue; // Attributes $attrs = ''; if (!empty($tag['attributes'])) { foreach ($tag['attributes'] as $name=>$value) $attrs.= ' '.$name.'="'.$value.'"'; } // Write tag with [ ] replacing < > this is to make difference between checked tags and not checked $new_tag = '<'; if (!empty($tag['close'])) $new_tag.= '/'; $new_tag.= $tag['name'].$attrs; if (!empty($tag['selfclose'])) $new_tag.= ' /'; $new_tag.= '>'; // If closing tag, close all opened tags before closing this one if (!empty($tag['close'])) { $last = $this->getLastOpenedTag(); while ($last != $tag['name']) { if ($this->isBlockElement($last)) $new_tag = '\n" . $new_tag; else $new_tag = '" . $new_tag; $this->closeLastTag(); $last = $this->getLastOpenedTag(); } } $id = md5($new_tag); $this->mytags[$id] = $new_tag; $this->ClearCurrentTag('[[g2x-'.$id.']]'); // selfclose tags are like and others // If tag is not a selfclose tag, check opening and closing if (empty($tag['selfclose'])) { // Closing tag if (!empty($tag['close'])) { $this->closeLastTag($tag['name']); } // Opening tag else { $this->openTag($tag['name']); } } } // In case of some ugly code is still present $this->text = strtr($this->text, array('<' => '<', '>' => '>')); $this->text = trim($this->text); // Take back the tags to their original state if (preg_match_all('/\[\[g2x-([a-f0-9]+)\]\]/i', $this->text, $match, PREG_SET_ORDER)) { foreach($match as $m) { $this->text = str_replace($m[0], $this->mytags[$m[1]], $this->text); } } $this->closeOpenedTags(); #$this->cleanCode(); return $this->text; } function getNextTag() { if(($pos = strpos($this->text, '<')) !== false) { $this->start_pos = $pos + 1; $this->end_pos = strpos(substr($this->text, $this->start_pos), '>') + $this->start_pos; $garbage = strpos(substr($this->text, $this->start_pos), '<'); if ($garbage !== false AND ($garbage + $this->start_pos) < $this->end_pos) { $this->ClearCurrentTag(''); return true; } $tag = substr($this->text, $this->start_pos, $this->end_pos - $this->start_pos ); $tag = stripslashes($tag); $orig_tag = substr($this->text, $this->start_pos - 1, $this->end_pos - $this->start_pos + 2 ); $datas = array(); if (preg_match('/^\//', $tag)) { $tag = substr($tag, 1); $tag = strtolower($tag); if (!$this->tagIsOpen($tag)) { // Tag closed but never opened? to the bin! $this->ClearCurrentTag(); return true; } $datas['close'] = true; } if (preg_match('/\/$/', $tag)) { $datas['selfclose'] = true; $tag = preg_replace('/\s*\/$/', '', $tag); } $datas['attributes'] = array(); $tag = preg_replace("/='([^']+)'/", '="\\1"', $tag); // Getting all attributes if (preg_match_all('/([a-zA-Z]+)\s*=\s*("([^"]+)")/', $tag, $match, PREG_SET_ORDER)) { foreach($match as $m) { $attr_name = strtolower(trim($m[1])); if ($this->isAttrSecure($m[3])) $datas['attributes'][$attr_name] = $this->cleanAttribute($m[3]); $tag = str_replace($m[0], '', $tag); } } // Clean unrecognized garbage attributes $tag = preg_replace('/^([a-zA-Z0-9]+).*$/i', '\\1', $tag); $tag = trim($tag); if (preg_match('/^([a-zA-Z0-9]+)$/', $tag)) { $tag = strtolower($tag); if(!isset($this->allowed_tags[$tag])) { // Tag not allowed ? to trash if ($this->remove_forbidden_tags) $this->ClearCurrentTag(); else $this->ClearCurrentTag(htmlspecialchars($orig_tag)); return true; } $datas['name'] = $tag; // Keep only allowed attributes foreach($datas['attributes'] as $attr=>$value) { if (!in_array($attr, $this->allowed_tags[$tag])) { unset($datas['attributes'][$attr]); } } if ($datas['name'] == 'img' && !isset($datas['attributes']['alt'])) $datas['attributes']['alt'] = ''; return $datas; } $this->ClearCurrentTag(); return true; } return false; } function isAttrSecure($attr) { if (preg_match('/(^javascript:|^&#[0-9x]+|^vbscript:|\.js$)/', trim($attr))) return false; return true; } function cleanAttribute($attr) { $attr = str_replace('&', '&', $attr); $attr = $this->cleanEntities($attr); return htmlspecialchars($attr, ENT_QUOTES); } function isBlockElement($tag) { if (preg_match('/^(h[1-6]|p|object|pre|blockquote|div|dl|fieldset|form|ol|ul|table|address|hr)$/', $tag)) return true; else return false; } function getLastOpenedTag() { return $this->opened_tags[0]; } function openTag($tag) { array_unshift($this->opened_tags, $tag); } function tagIsOpen($tag) { if (in_array($tag, $this->opened_tags)) return true; else return false; } function closeLastTag() { array_shift($this->opened_tags); return true; } function closeOpenedTags() { foreach($this->opened_tags as $k=>$tag) { if ($this->isBlockElement($tag)) $this->text .= '\n"; else $this->text .= ''; unset($this->opened_tags[$k]); } } function cleanCode() { $this->text = preg_replace('/[ \t]{2,}/', ' ',$this->text); } function cleanEntities($str) { return preg_replace('/&(#[0-9a-fx]+|[a-z]+);/i', '&\\1;', $str); } function ClearCurrentTag($value='') { $text = substr($this->text, 0, $this->start_pos - 1 ); $text.= $value; // Against ugly code $text = strtr($text, '<>', '[]'); $text.= substr($this->text, $this->end_pos + 1); $this->text = $text; $this->start_pos = false; $this->end_pos = false; } } ?>