remove_forbidden_tags = false;
$g2x->allowed_tags = array('p' => array('align'), 'br' => array(), 'img' => array('src', 'alt'));
echo $g2x->Process($user_text);
*/
class Garbage2xhtml
{
// Allowed tags and attributes
// Minimal set (default)
var $allowed_tags = array(
// 'tag' => array of allowed attributes
'strong'=> array(),
'ul' => array(),
'li' => array(),
'ol' => array(),
'p' => array(),
'em' => array(),
'h3' => array(),
'h4' => array(),
'h5' => array(),
'pre' => array(),
'a' => array('href'),
'img' => array('src')
);
// Complete xHTML set (uncomment to enable)
/*
var $allowed_tags = array(
// Inline
'strong'=> array('class'),
'em' => array('class'),
'sup' => array('class'),
'sub' => array('class'),
'span' => array('class'),
'abbr' => array('class', 'title'),
'acronym' => array('class', 'title'),
'a' => array('class', 'href'),
'img' => array('class', 'src'),
'code' => array('class'),
'cite' => array('class'),
'del' => array('class'),
'ins' => array('class'),
'kbd' => array('class'),
'samp' => array('class'),
// Block
'p' => array('class'),
'blockquote' => array('class'),
'ul' => array('class'),
'li' => array('class'),
'ol' => array('class'),
'h3' => array('class'),
'h4' => array('class'),
'h5' => array('class'),
'pre' => array('class'),
'dl' => array('class'),
'dt' => array('class'),
'dd' => array('class'),
'hr' => array('class'),
// Table elements
'table' => array('class'),
'caption' => array('class'),
'col' => array('class'),
'colgroup' => array('class'),
'thead' => array('class'),
'tbody' => array('class'),
'tfoot' => array('class'),
'tr' => array('class'),
'td' => array('class', 'rowspan', 'colspan'),
'th' => array('class', 'rowspan', 'colspan'),
);
*/
// If this option is set to false, all non-allowed tags will be enties-ed
var $remove_forbidden_tags = true;
// Internal use only
var $opened_tags = array();
var $text = '';
var $start_pos = false;
var $end_pos = false;
var $mytags = array();
function Process($string)
{
// Handling non standard line breaks
$this->text = preg_replace('/<(br\s*\/?)>/i', '
', $string);
// Handling non standard img tags
$this->text = preg_replace('/]*[^\/])>/i', '', $this->text);
// Browse tags
while ($tag = $this->getNextTag())
{
// If tag isn't valid and already deleted skip to next
if ($tag === true)
continue;
// Attributes
$attrs = '';
if (!empty($tag['attributes']))
{
foreach ($tag['attributes'] as $name=>$value)
$attrs.= ' '.$name.'="'.$value.'"';
}
// Write tag with [ ] replacing < > this is to make difference between checked tags and not checked
$new_tag = '<';
if (!empty($tag['close']))
$new_tag.= '/';
$new_tag.= $tag['name'].$attrs;
if (!empty($tag['selfclose']))
$new_tag.= ' /';
$new_tag.= '>';
$id = md5($new_tag);
$this->mytags[$id] = $new_tag;
$this->ClearCurrentTag('[[g2x-'.$id.']]');
// selfclose tags are like and others
// If tag is not a selfclose tag, check opening and closing
if (empty($tag['selfclose']))
{
// Closing tag
if (!empty($tag['close']))
{
$this->closeTag($tag['name']);
}
// Opening tag
else
{
$this->openTag($tag['name'], $id);
}
}
}
// Delete opened tags which are not closed
$this->clearOpenedTags();
// In case of some ugly code is still present
$this->text = strtr($this->text, '<>', '[]');
$this->text = trim($this->text);
// Take back the tags to their original state
if (preg_match_all('/\[\[g2x-([a-f0-9]+)\]\]/i', $this->text, $match, PREG_SET_ORDER))
{
foreach($match as $m)
{
$this->text = str_replace($m[0], $this->mytags[$m[1]], $this->text);
}
}
return $this->text;
}
function getNextTag()
{
if(($pos = strpos($this->text, '<')) !== false)
{
$this->start_pos = $pos + 1;
$this->end_pos = strpos(substr($this->text, $this->start_pos), '>') + $this->start_pos;
$garbage = strpos(substr($this->text, $this->start_pos), '<');
if ($garbage !== false AND ($garbage + $this->start_pos) < $this->end_pos)
{
$this->ClearCurrentTag('');
return true;
}
$tag = substr($this->text, $this->start_pos, $this->end_pos - $this->start_pos );
$tag = stripslashes($tag);
$orig_tag = substr($this->text, $this->start_pos - 1, $this->end_pos - $this->start_pos + 2 );
$datas = array();
if (preg_match('/^\//', $tag))
{
$tag = substr($tag, 1);
if (!$this->TagIsOpen($tag))
{
// Tag closed but not opened? to trash
if ($this->remove_forbidden_tags)
$this->ClearCurrentTag();
else
$this->ClearCurrentTag(htmlspecialchars($orig_tag));
return true;
}
$datas['close'] = true;
}
if (preg_match('/\/$/', $tag))
{
$datas['selfclose'] = true;
$tag = preg_replace('/\s*\/$/', '', $tag);
}
$datas['attributes'] = array();
$tag = preg_replace("/='([^']+)'/", '="\\1"', $tag);
// Getting all attributes
if (preg_match_all('/([a-zA-Z]+)\s*=\s*("([^"]+)")/', $tag, $match, PREG_SET_ORDER))
{
foreach($match as $m)
{
$attr_name = strtolower(trim($m[1]));
if ($this->isAttrSecure($m[3]))
$datas['attributes'][$attr_name] = $m[3];
$tag = str_replace($m[0], '', $tag);
}
}
// Clean unrecognized garbage attributes
$tag = preg_replace('/^([a-z]+).*$/i', '\\1', $tag);
$tag = trim($tag);
if (preg_match('/^([a-zA-Z0-9]+)$/', $tag))
{
$tag = strtolower($tag);
$tag = $this->CleanOutdatedTag($tag);
if(!array_key_exists($tag, $this->allowed_tags))
{
// Tag not allowed ? to trash
if ($this->remove_forbidden_tags)
$this->ClearCurrentTag();
else
$this->ClearCurrentTag(htmlspecialchars($orig_tag));
return true;
}
$datas['name'] = $tag;
// Keep only allowed attributes
foreach($datas['attributes'] as $attr=>$value)
{
if (!in_array($attr, $this->allowed_tags[$tag]))
{
unset($datas['attributes'][$attr]);
}
}
return $datas;
}
$this->ClearCurrentTag();
return true;
}
return false;
}
function isAttrSecure($attr)
{
if (preg_match('/(^javascript:|^[0-9x]+|^vbscript:|\.js$)/', trim($attr)))
return false;
return true;
}
function openTag($tag, $id)
{
$this->opened_tags[$id] = $tag;
}
function TagIsOpen($tag)
{
// One day we will check opened hierarchy...
return in_array($tag, $this->opened_tags);
}
function closeTag($tag)
{
$tags = array_reverse($this->opened_tags, true);
foreach($tags as $key=>$t)
{
if($t == $tag)
{
unset($this->opened_tags[$key]);
return true;
}
}
return false;
}
function clearOpenedTags()
{
foreach($this->opened_tags as $key=>$t)
{
$this->text = str_replace('[[g2x-'.$key.']]', '', $this->text);
}
}
function ClearCurrentTag($value='')
{
$text = substr($this->text, 0, $this->start_pos - 1 );
$text.= $value;
// Against ugly code
$text = strtr($text, '<>', '[]');
$text.= substr($this->text, $this->end_pos + 1);
$this->text = $text;
$this->start_pos = false;
$this->end_pos = false;
}
function CleanOutdatedTag($tag)
{
switch ($tag)
{
case 'i':
return 'em';
case 'b':
return 'strong';
default:
return $tag;
}
}
}
?>