remove_forbidden_tags = false; $g2x->allowed_tags = array('p' => array('align'), 'br' => array(), 'img' => array('src', 'alt')); echo $g2x->Process($user_text); */ class Garbage2xhtml { // Allowed tags and attributes // Minimal set (default) var $allowed_tags = array( // 'tag' => array of allowed attributes 'strong'=> array(), 'ul' => array(), 'li' => array(), 'ol' => array(), 'p' => array(), 'em' => array(), 'h3' => array(), 'h4' => array(), 'h5' => array(), 'pre' => array(), 'a' => array('href'), 'img' => array('src') ); // Complete xHTML set (uncomment to enable) /* var $allowed_tags = array( // Inline 'strong'=> array('class'), 'em' => array('class'), 'sup' => array('class'), 'sub' => array('class'), 'span' => array('class'), 'abbr' => array('class', 'title'), 'acronym' => array('class', 'title'), 'a' => array('class', 'href'), 'img' => array('class', 'src'), 'code' => array('class'), 'cite' => array('class'), 'del' => array('class'), 'ins' => array('class'), 'kbd' => array('class'), 'samp' => array('class'), // Block 'p' => array('class'), 'blockquote' => array('class'), 'ul' => array('class'), 'li' => array('class'), 'ol' => array('class'), 'h3' => array('class'), 'h4' => array('class'), 'h5' => array('class'), 'pre' => array('class'), 'dl' => array('class'), 'dt' => array('class'), 'dd' => array('class'), 'hr' => array('class'), // Table elements 'table' => array('class'), 'caption' => array('class'), 'col' => array('class'), 'colgroup' => array('class'), 'thead' => array('class'), 'tbody' => array('class'), 'tfoot' => array('class'), 'tr' => array('class'), 'td' => array('class', 'rowspan', 'colspan'), 'th' => array('class', 'rowspan', 'colspan'), ); */ // If this option is set to false, all non-allowed tags will be enties-ed var $remove_forbidden_tags = true; // Internal use only var $opened_tags = array(); var $text = ''; var $start_pos = false; var $end_pos = false; var $mytags = array(); function Process($string) { // Handling non standard line breaks $this->text = preg_replace('/<(br\s*\/?)>/i', '
', $string); // Handling non standard img tags $this->text = preg_replace('/]*[^\/])>/i', '', $this->text); // Browse tags while ($tag = $this->getNextTag()) { // If tag isn't valid and already deleted skip to next if ($tag === true) continue; // Attributes $attrs = ''; if (!empty($tag['attributes'])) { foreach ($tag['attributes'] as $name=>$value) $attrs.= ' '.$name.'="'.$value.'"'; } // Write tag with [ ] replacing < > this is to make difference between checked tags and not checked $new_tag = '<'; if (!empty($tag['close'])) $new_tag.= '/'; $new_tag.= $tag['name'].$attrs; if (!empty($tag['selfclose'])) $new_tag.= ' /'; $new_tag.= '>'; $id = md5($new_tag); $this->mytags[$id] = $new_tag; $this->ClearCurrentTag('[[g2x-'.$id.']]'); // selfclose tags are like and others // If tag is not a selfclose tag, check opening and closing if (empty($tag['selfclose'])) { // Closing tag if (!empty($tag['close'])) { $this->closeTag($tag['name']); } // Opening tag else { $this->openTag($tag['name'], $id); } } } // Delete opened tags which are not closed $this->clearOpenedTags(); // In case of some ugly code is still present $this->text = strtr($this->text, '<>', '[]'); $this->text = trim($this->text); // Take back the tags to their original state if (preg_match_all('/\[\[g2x-([a-f0-9]+)\]\]/i', $this->text, $match, PREG_SET_ORDER)) { foreach($match as $m) { $this->text = str_replace($m[0], $this->mytags[$m[1]], $this->text); } } return $this->text; } function getNextTag() { if(($pos = strpos($this->text, '<')) !== false) { $this->start_pos = $pos + 1; $this->end_pos = strpos(substr($this->text, $this->start_pos), '>') + $this->start_pos; $garbage = strpos(substr($this->text, $this->start_pos), '<'); if ($garbage !== false AND ($garbage + $this->start_pos) < $this->end_pos) { $this->ClearCurrentTag(''); return true; } $tag = substr($this->text, $this->start_pos, $this->end_pos - $this->start_pos ); $tag = stripslashes($tag); $orig_tag = substr($this->text, $this->start_pos - 1, $this->end_pos - $this->start_pos + 2 ); $datas = array(); if (preg_match('/^\//', $tag)) { $tag = substr($tag, 1); if (!$this->TagIsOpen($tag)) { // Tag closed but not opened? to trash if ($this->remove_forbidden_tags) $this->ClearCurrentTag(); else $this->ClearCurrentTag(htmlspecialchars($orig_tag)); return true; } $datas['close'] = true; } if (preg_match('/\/$/', $tag)) { $datas['selfclose'] = true; $tag = preg_replace('/\s*\/$/', '', $tag); } $datas['attributes'] = array(); $tag = preg_replace("/='([^']+)'/", '="\\1"', $tag); // Getting all attributes if (preg_match_all('/([a-zA-Z]+)\s*=\s*("([^"]+)")/', $tag, $match, PREG_SET_ORDER)) { foreach($match as $m) { $attr_name = strtolower(trim($m[1])); if ($this->isAttrSecure($m[3])) $datas['attributes'][$attr_name] = $m[3]; $tag = str_replace($m[0], '', $tag); } } // Clean unrecognized garbage attributes $tag = preg_replace('/^([a-z]+).*$/i', '\\1', $tag); $tag = trim($tag); if (preg_match('/^([a-zA-Z0-9]+)$/', $tag)) { $tag = strtolower($tag); $tag = $this->CleanOutdatedTag($tag); if(!array_key_exists($tag, $this->allowed_tags)) { // Tag not allowed ? to trash if ($this->remove_forbidden_tags) $this->ClearCurrentTag(); else $this->ClearCurrentTag(htmlspecialchars($orig_tag)); return true; } $datas['name'] = $tag; // Keep only allowed attributes foreach($datas['attributes'] as $attr=>$value) { if (!in_array($attr, $this->allowed_tags[$tag])) { unset($datas['attributes'][$attr]); } } return $datas; } $this->ClearCurrentTag(); return true; } return false; } function isAttrSecure($attr) { if (preg_match('/(^javascript:|^&#[0-9x]+|^vbscript:|\.js$)/', trim($attr))) return false; return true; } function openTag($tag, $id) { $this->opened_tags[$id] = $tag; } function TagIsOpen($tag) { // One day we will check opened hierarchy... return in_array($tag, $this->opened_tags); } function closeTag($tag) { $tags = array_reverse($this->opened_tags, true); foreach($tags as $key=>$t) { if($t == $tag) { unset($this->opened_tags[$key]); return true; } } return false; } function clearOpenedTags() { foreach($this->opened_tags as $key=>$t) { $this->text = str_replace('[[g2x-'.$key.']]', '', $this->text); } } function ClearCurrentTag($value='') { $text = substr($this->text, 0, $this->start_pos - 1 ); $text.= $value; // Against ugly code $text = strtr($text, '<>', '[]'); $text.= substr($this->text, $this->end_pos + 1); $this->text = $text; $this->start_pos = false; $this->end_pos = false; } function CleanOutdatedTag($tag) { switch ($tag) { case 'i': return 'em'; case 'b': return 'strong'; default: return $tag; } } } ?>