.
*/
/*
How to use ?
$g2x = new Garbage2xhtml();
// Set some options
$g2x->remove_forbidden_tags = false;
$g2x->allowed_tags = array('p' => array('align'), 'br' => array(), 'img' => array('src', 'alt'));
echo $g2x->Process($user_text);
*/
if (!defined('ENABLE_DEBUG'))
define('ENABLE_DEBUG', false);
if (!function_exists('debugLog'))
{
function debugLog($section, $message)
{
if (defined('ENABLE_DEBUG') && ENABLE_DEBUG === true)
{
if (empty($GLOBALS['_debug_log']))
$GLOBALS['_debug_log'] = array();
$GLOBALS['_debug_log'][] = array($section, $message);
}
}
}
class Garbage2xhtml
{
// Allowed tags and attributes
// Minimal set (default)
var $allowed_tags = array(
// 'tag' => array of allowed attributes
'strong'=> array(),
'ul' => array(),
'li' => array(),
'ol' => array(),
'p' => array(),
'em' => array(),
'h3' => array(),
'h4' => array(),
'h5' => array(),
'pre' => array(),
'a' => array('href'),
'img' => array('src')
);
// Complete xHTML set (uncomment to enable)
/*
var $allowed_tags = array(
// Inline
'strong'=> array('class'),
'em' => array('class'),
'sup' => array('class'),
'sub' => array('class'),
'span' => array('class'),
'abbr' => array('class', 'title'),
'acronym' => array('class', 'title'),
'a' => array('class', 'href'),
'img' => array('class', 'src'),
'code' => array('class'),
'cite' => array('class'),
'del' => array('class'),
'ins' => array('class'),
'kbd' => array('class'),
'samp' => array('class'),
// Block
'p' => array('class'),
'blockquote' => array('class'),
'ul' => array('class'),
'li' => array('class'),
'ol' => array('class'),
'h3' => array('class'),
'h4' => array('class'),
'h5' => array('class'),
'pre' => array('class'),
'dl' => array('class'),
'dt' => array('class'),
'dd' => array('class'),
'hr' => array('class'),
// Table elements
'table' => array('class'),
'caption' => array('class'),
'col' => array('class'),
'colgroup' => array('class'),
'thead' => array('class'),
'tbody' => array('class'),
'tfoot' => array('class'),
'tr' => array('class'),
'td' => array('class', 'rowspan', 'colspan'),
'th' => array('class', 'rowspan', 'colspan'),
);
*/
// If this option is set to false, all non-allowed tags will be enties-ed
var $remove_forbidden_tags = true;
// Internal use only
var $opened_tags = array();
var $text = '';
var $start_pos = false;
var $end_pos = false;
var $mytags = array();
function Process($string)
{
debugLog('g2x', 'Processing text ('.strlen($string).' car.)');
// Handling non standard line breaks
$this->text = preg_replace('/<(br\s*\/?)>/i', '
', $string);
$this->text = preg_replace('/\r/', '', $this->text);
// Handling non standard img tags
$this->text = preg_replace('/]*[^\/])>/i', '', $this->text);
$this->text = str_replace('&', '&', $this->text);
$this->text = $this->cleanEntities($this->text);
// Clean stupid use of minus signs
$this->text = preg_replace('/<+-+/', '←', $this->text);
$this->text = preg_replace('/-+>+/', '→', $this->text);
$this->text = preg_replace('/<<+/', '«', $this->text);
$this->text = preg_replace('/>>+/', '»', $this->text);
// Browse tags
while ($tag = $this->getNextTag())
{
// If tag isn't valid and already deleted skip to next
if ($tag === true)
continue;
// Attributes
$attrs = '';
if (!empty($tag['attributes']))
{
foreach ($tag['attributes'] as $name=>$value)
$attrs.= ' '.$name.'="'.$value.'"';
}
// Write tag with [ ] replacing < > this is to make difference between checked tags and not checked
$new_tag = '<';
if (!empty($tag['close']))
$new_tag.= '/';
$new_tag.= $tag['name'].$attrs;
if (!empty($tag['selfclose']))
$new_tag.= ' /';
$new_tag.= '>';
// If closing tag, close all opened tags before closing this one
if (!empty($tag['close']))
{
$last = $this->getLastOpenedTag();
while ($last != $tag['name'])
{
if ($this->isBlockElement($last))
$new_tag = ''.$last.">\n" . $new_tag;
else
$new_tag = ''.$last.">" . $new_tag;
$this->closeLastTag();
$last = $this->getLastOpenedTag();
}
}
$id = md5($new_tag);
$this->mytags[$id] = $new_tag;
$this->ClearCurrentTag('[[g2x-'.$id.']]');
// selfclose tags are like and others
// If tag is not a selfclose tag, check opening and closing
if (empty($tag['selfclose']))
{
// Closing tag
if (!empty($tag['close']))
{
$this->closeLastTag($tag['name']);
}
// Opening tag
else
{
$this->openTag($tag['name']);
}
}
}
// In case of some ugly code is still present
$this->text = strtr($this->text, array('<' => '<', '>' => '>'));
$this->text = trim($this->text);
// Take back the tags to their original state
if (preg_match_all('/\[\[g2x-([a-f0-9]+)\]\]/i', $this->text, $match, PREG_SET_ORDER))
{
foreach($match as $m)
{
$this->text = str_replace($m[0], $this->mytags[$m[1]], $this->text);
}
}
$this->closeOpenedTags();
#$this->cleanCode();
return $this->text;
}
function getNextTag()
{
if(($pos = strpos($this->text, '<')) !== false)
{
$this->start_pos = $pos + 1;
$this->end_pos = strpos(substr($this->text, $this->start_pos), '>') + $this->start_pos;
$garbage = strpos(substr($this->text, $this->start_pos), '<');
if ($garbage !== false AND ($garbage + $this->start_pos) < $this->end_pos)
{
$this->ClearCurrentTag('');
return true;
}
$tag = substr($this->text, $this->start_pos, $this->end_pos - $this->start_pos );
$tag = stripslashes($tag);
$orig_tag = substr($this->text, $this->start_pos - 1, $this->end_pos - $this->start_pos + 2 );
$datas = array();
if (preg_match('/^\//', $tag))
{
$tag = substr($tag, 1);
$tag = strtolower($tag);
if (!$this->tagIsOpen($tag))
{
// Tag closed but never opened? to the bin!
$this->ClearCurrentTag();
return true;
}
$datas['close'] = true;
}
if (preg_match('/\/$/', $tag))
{
$datas['selfclose'] = true;
$tag = preg_replace('/\s*\/$/', '', $tag);
}
$datas['attributes'] = array();
$tag = preg_replace("/='([^']+)'/", '="\\1"', $tag);
// Getting all attributes
if (preg_match_all('/([a-zA-Z]+)\s*=\s*("([^"]+)")/', $tag, $match, PREG_SET_ORDER))
{
foreach($match as $m)
{
$attr_name = strtolower(trim($m[1]));
if ($this->isAttrSecure($m[3]))
$datas['attributes'][$attr_name] = $this->cleanAttribute($m[3]);
$tag = str_replace($m[0], '', $tag);
}
}
// Clean unrecognized garbage attributes
$tag = preg_replace('/^([a-zA-Z0-9]+).*$/i', '\\1', $tag);
$tag = trim($tag);
if (preg_match('/^([a-zA-Z0-9]+)$/', $tag))
{
$tag = strtolower($tag);
if(!isset($this->allowed_tags[$tag]))
{
// Tag not allowed ? to trash
if ($this->remove_forbidden_tags)
$this->ClearCurrentTag();
else
$this->ClearCurrentTag(htmlspecialchars($orig_tag));
return true;
}
$datas['name'] = $tag;
// Keep only allowed attributes
foreach($datas['attributes'] as $attr=>$value)
{
if (!in_array($attr, $this->allowed_tags[$tag]))
{
unset($datas['attributes'][$attr]);
}
}
if ($datas['name'] == 'img' && !isset($datas['attributes']['alt']))
$datas['attributes']['alt'] = '';
return $datas;
}
$this->ClearCurrentTag();
return true;
}
return false;
}
function isAttrSecure($attr)
{
if (preg_match('/(^javascript:|^[0-9x]+|^vbscript:|\.js$)/', trim($attr)))
return false;
return true;
}
function cleanAttribute($attr)
{
$attr = str_replace('&', '&', $attr);
$attr = $this->cleanEntities($attr);
return htmlspecialchars($attr, ENT_QUOTES);
}
function isBlockElement($tag)
{
if (preg_match('/^(h[1-6]|p|object|pre|blockquote|div|dl|fieldset|form|ol|ul|table|address|hr)$/', $tag))
return true;
else
return false;
}
function getLastOpenedTag()
{
return $this->opened_tags[0];
}
function openTag($tag)
{
array_unshift($this->opened_tags, $tag);
}
function tagIsOpen($tag)
{
if (in_array($tag, $this->opened_tags))
return true;
else
return false;
}
function closeLastTag()
{
array_shift($this->opened_tags);
return true;
}
function closeOpenedTags()
{
foreach($this->opened_tags as $k=>$tag)
{
if ($this->isBlockElement($tag))
$this->text .= ''.$tag['tag'].">\n";
else
$this->text .= ''.$tag['tag'].'>';
unset($this->opened_tags[$k]);
}
}
function cleanCode()
{
$this->text = preg_replace('/[ \t]{2,}/', ' ',$this->text);
}
function cleanEntities($str)
{
return preg_replace('/&(#[0-9a-fx]+|[a-z]+);/i', '&\\1;', $str);
}
function ClearCurrentTag($value='')
{
$text = substr($this->text, 0, $this->start_pos - 1 );
$text.= $value;
// Against ugly code
$text = strtr($text, '<>', '[]');
$text.= substr($this->text, $this->end_pos + 1);
$this->text = $text;
$this->start_pos = false;
$this->end_pos = false;
}
}
?>