* @license BSD License */ class Parser { /** * _whiteList * * @var string */ private $_commonWhiteList = 'kbd|b|i|strong|em|sup|sub|br|code|del|a|hr|small'; /** * html tags * * @var string */ private $_blockHtmlTags = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|form|fieldset|iframe|hr|legend|article|section|nav|aside|hgroup|header|footer|figcaption|svg|script|noscript'; /** * _specialWhiteList * * @var mixed * @access private */ private $_specialWhiteList = array( 'table' => 'table|tbody|thead|tfoot|tr|td|th', ); /** * _footnotes * * @var array */ private $_footnotes; /** * @var bool */ private $_html = false; /** * @var bool */ private $_line = false; /** * @var array */ private $blockParsers = array( array('code', 10), array('shtml', 20), array('pre', 30), array('ahtml', 40), array('shr', 50), array('list', 60), array('math', 70), array('html', 80), array('footnote', 90), array('definition', 100), array('quote', 110), array('table', 120), array('sh', 130), array('mh', 140), array('dhr', 150), array('default', 9999), ); /** * _blocks * * @var array */ private $_blocks; /** * _current * * @var string */ private $_current; /** * _pos * * @var int */ private $_pos; /** * _definitions * * @var array */ private $_definitions; /** * @var array */ private $_hooks = array(); /** * @var array */ private $_holders; /** * @var string */ private $_uniqid; /** * @var int */ private $_id; /** * @var array */ private $_parsers = array(); /** * makeHtml * * @param mixed $text * @return string */ public function makeHtml($text) { $this->_footnotes = array(); $this->_definitions = array(); $this->_holders = array(); $this->_uniqid = md5(uniqid()); $this->_id = 0; usort($this->blockParsers, function ($a, $b) { return $a[1] < $b[1] ? -1 : 1; }); foreach ($this->blockParsers as $parser) { list($name) = $parser; if (isset($parser[2])) { $this->_parsers[$name] = $parser[2]; } else { $this->_parsers[$name] = array($this, 'parseBlock' . ucfirst($name)); } } $text = $this->initText($text); $html = $this->parse($text); $html = $this->makeFootnotes($html); $html = $this->optimizeLines($html); return $this->call('makeHtml', $html); } /** * @param $html */ public function enableHtml($html = true) { $this->_html = $html; } /** * @param bool $line */ public function enableLine($line = true) { $this->_line = $line; } /** * @param $type * @param $callback */ public function hook($type, $callback) { $this->_hooks[$type][] = $callback; } /** * @param $str * @return string */ public function makeHolder($str) { $key = "\r" . $this->_uniqid . $this->_id . "\r"; $this->_id++; $this->_holders[$key] = $str; return $key; } /** * @param $text * @return mixed */ private function initText($text) { $text = str_replace(array("\t", "\r"), array(' ', ''), $text); return $text; } /** * @param $html * @return string */ private function makeFootnotes($html) { if (count($this->_footnotes) > 0) { $html .= '
' . htmlspecialchars($matches[3]) . '
'
);
},
$text
);
// mathjax
$text = preg_replace_callback(
"/(^|[^\\\])(\\$+)(.+?)\\2/",
function ($matches) {
return $matches[1] . $this->makeHolder(
$matches[2] . htmlspecialchars($matches[3]) . $matches[2]
);
},
$text
);
// escape
$text = preg_replace_callback(
"/\\\(.)/u",
function ($matches) {
$prefix = preg_match("/^[-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]$/", $matches[1]) ? '' : '\\';
$escaped = htmlspecialchars($matches[1]);
$escaped = str_replace('$', '$', $escaped);
return $this->makeHolder($prefix . $escaped);
},
$text
);
// link
$text = preg_replace_callback(
"/<(https?:\/\/.+|(?:mailto:)?[_a-z0-9-\.\+]+@[_\w-]+(?:\.[a-z]{2,})+)>/i",
function ($matches) {
$url = $this->cleanUrl($matches[1]);
$link = $this->call('parseLink', $url);
return $this->makeHolder(
"{$link}"
);
},
$text
);
// encode unsafe tags
$text = preg_replace_callback(
"/<(\/?)([a-z0-9-]+)(\s+[^>]*)?>/i",
function ($matches) use ($whiteList) {
if ($this->_html || false !== stripos(
'|' . $this->_commonWhiteList . '|' . $whiteList . '|', '|' . $matches[2] . '|'
)) {
return $this->makeHolder($matches[0]);
} else {
return $this->makeHolder(htmlspecialchars($matches[0]));
}
},
$text
);
if ($this->_html) {
$text = preg_replace_callback("//", function ($matches) {
return $this->makeHolder($matches[0]);
}, $text);
}
$text = str_replace(array('<', '>'), array('<', '>'), $text);
// footnote
$text = preg_replace_callback(
"/\[\^((?:[^\]]|\\\\\]|\\\\\[)+?)\]/",
function ($matches) {
$id = array_search($matches[1], $this->_footnotes);
if (false === $id) {
$id = count($this->_footnotes) + 1;
$this->_footnotes[$id] = $this->parseInline($matches[1], '', false);
}
return $this->makeHolder(
"{$id}"
);
},
$text
);
// image
$text = preg_replace_callback(
"/!\[((?:[^\]]|\\\\\]|\\\\\[)*?)\]\(((?:[^\)]|\\\\\)|\\\\\()+?)\)/",
function ($matches) {
$escaped = htmlspecialchars($this->escapeBracket($matches[1]));
$url = $this->escapeBracket($matches[2]);
list($url, $title) = $this->cleanUrl($url, true);
$title = empty($title) ? $escaped : " title=\"{$title}\"";
return $this->makeHolder(
""
);
},
$text
);
$text = preg_replace_callback(
"/!\[((?:[^\]]|\\\\\]|\\\\\[)*?)\]\[((?:[^\]]|\\\\\]|\\\\\[)+?)\]/",
function ($matches) {
$escaped = htmlspecialchars($this->escapeBracket($matches[1]));
$result = isset($this->_definitions[$matches[2]]) ?
"_definitions[$matches[2]]}\" alt=\"{$escaped}\" title=\"{$escaped}\">"
: $escaped;
return $this->makeHolder($result);
},
$text
);
// link
$text = preg_replace_callback(
"/\[((?:[^\]]|\\\\\]|\\\\\[)+?)\]\(((?:[^\)]|\\\\\)|\\\\\()+?)\)/",
function ($matches) {
$escaped = $this->parseInline(
$this->escapeBracket($matches[1]), '', false, false
);
$url = $this->escapeBracket($matches[2]);
list($url, $title) = $this->cleanUrl($url, true);
$title = empty($title) ? '' : " title=\"{$title}\"";
return $this->makeHolder("{$escaped}");
},
$text
);
$text = preg_replace_callback(
"/\[((?:[^\]]|\\\\\]|\\\\\[)+?)\]\[((?:[^\]]|\\\\\]|\\\\\[)+?)\]/",
function ($matches) {
$escaped = $this->parseInline(
$this->escapeBracket($matches[1]), '', false
);
$result = isset($this->_definitions[$matches[2]]) ?
"_definitions[$matches[2]]}\">{$escaped}"
: $escaped;
return $this->makeHolder($result);
},
$text
);
// strong and em and some fuck
$text = $this->parseInlineCallback($text);
$text = preg_replace(
"/<([_a-z0-9-\.\+]+@[^@]+\.[a-z]{2,})>/i",
"\\1",
$text
);
// autolink url
if ($enableAutoLink) {
$text = preg_replace_callback(
"/(^|[^\"])(https?:\/\/(www\.)?[-a-zA-Z0-9@:%._\+~#=]{1,256}\b([-a-zA-Z0-9@:%_\+.~#?&\/=]*)|(?:mailto:)?[_a-z0-9-\.\+]+@[_\w-]+(?:\.[a-z]{2,})+)($|[^\"])/",
function ($matches) {
$url = $this->cleanUrl($matches[2]);
$link = $this->call('parseLink', $matches[2]);
return "{$matches[1]}{$link}{$matches[5]}";
},
$text
);
}
$text = $this->call('afterParseInlineBeforeRelease', $text);
$text = $this->releaseHolder($text, $clearHolders);
$text = $this->call('afterParseInline', $text);
return $text;
}
/**
* @param $text
* @return mixed
*/
private function parseInlineCallback($text)
{
$text = preg_replace_callback(
"/(\*{3})(.+?)\\1/",
function ($matches) {
return '' .
$this->parseInlineCallback($matches[2]) .
'';
},
$text
);
$text = preg_replace_callback(
"/(\*{2})(.+?)\\1/",
function ($matches) {
return '' .
$this->parseInlineCallback($matches[2]) .
'';
},
$text
);
$text = preg_replace_callback(
"/(\*)(.+?)\\1/",
function ($matches) {
return '' .
$this->parseInlineCallback($matches[2]) .
'';
},
$text
);
$text = preg_replace_callback(
"/(\s+|^)(_{3})(.+?)\\2(\s+|$)/",
function ($matches) {
return $matches[1] . '' .
$this->parseInlineCallback($matches[3]) .
'' . $matches[4];
},
$text
);
$text = preg_replace_callback(
"/(\s+|^)(_{2})(.+?)\\2(\s+|$)/",
function ($matches) {
return $matches[1] . '' .
$this->parseInlineCallback($matches[3]) .
'' . $matches[4];
},
$text
);
$text = preg_replace_callback(
"/(\s+|^)(_)(.+?)\\2(\s+|$)/",
function ($matches) {
return $matches[1] . '' .
$this->parseInlineCallback($matches[3]) .
'' . $matches[4];
},
$text
);
$text = preg_replace_callback(
"/(~{2})(.+?)\\1/",
function ($matches) {
return ''
. $str . '
';
}
/**
* parsePre
*
* @param array $lines
* @param mixed $value
* @param int $start
* @return string
*/
private function parsePre(array $lines, $value, $start)
{
foreach ($lines as &$line) {
$line = htmlspecialchars(substr($line, 4));
}
$str = implode("\n", $this->markLines($lines, $start));
return preg_match("/^\s*$/", $str) ? '' : '' . $str . '
';
}
/**
* parseAhtml
*
* @param array $lines
* @param mixed $value
* @param int $start
* @return string
*/
private function parseAhtml(array $lines, $value, $start)
{
return trim(implode("\n", $this->markLines($lines, $start)));
}
/**
* parseShtml
*
* @param array $lines
* @param mixed $value
* @param int $start
* @return string
*/
private function parseShtml(array $lines, $value, $start)
{
return trim(implode("\n", $this->markLines(array_slice($lines, 1, -1), $start + 1)));
}
/**
* parseMath
*
* @param array $lines
* @param mixed $value
* @param int $start
* @param int $end
* @return string
*/
private function parseMath(array $lines, $value, $start, $end)
{
return '' . $this->markLine($start, $end) . htmlspecialchars(implode("\n", $lines)) . '
'; } /** * parseSh * * @param array $lines * @param int $num * @param int $start * @param int $end * @return string */ private function parseSh(array $lines, $num, $start, $end) { $line = $this->markLine($start, $end) . $this->parseInline(trim($lines[0], '# ')); return preg_match("/^\s*$/", $line) ? '' : "' . $this->parse($str, true, $start) . ''; } /** * parseList * * @param array $lines * @param mixed $value * @param int $start * @return string */ private function parseList(array $lines, $value, $start) { $html = ''; list($space, $type, $tab) = $value; $rows = array(); $suffix = ''; $last = 0; foreach ($lines as $key => $line) { if (preg_match("/^(\s{" . $space . "})((?:[0-9]+\.?)|\-|\+|\*)(\s+)(.*)$/i", $line, $matches)) { if ($type == 'ol' && $key == 0) { $start = intval($matches[2]); if ($start != 1) { $suffix = ' start="' . $start . '"'; } } $rows[] = [$matches[4]]; $last = count($rows) - 1; } else { $rows[$last][] = preg_replace("/^\s{" . ($tab + $space) . "}/", '', $line); } } foreach ($rows as $row) { $html .= "
";
}, $str);
$str = preg_replace("/\n/", "
", $str);
return preg_match("/^\s*$/", $str) ? '' : ($inline ? $str : "
{$str}
"); } /** * parseFootnote * * @param array $lines * @param array $value * @return string */ private function parseFootnote(array $lines, array $value) { list($space, $note) = $value; $index = array_search($note, $this->_footnotes); if (false !== $index) { $lines[0] = preg_replace("/^\[\^((?:[^\]]|\\]|\\[)+?)\]:/", '', $lines[0]); $this->_footnotes[$index] = $lines; } return ''; } /** * parseDefine * * @return string */ private function parseDefinition() { return ''; } /** * parseHtml * * @param array $lines * @param string $type * @param int $start * @return string */ private function parseHtml(array $lines, $type, $start) { foreach ($lines as &$line) { $line = $this->parseInline($line, isset($this->_specialWhiteList[$type]) ? $this->_specialWhiteList[$type] : ''); } return implode("\n", $this->markLines($lines, $start)); } /** * @param $url * @param bool $parseTitle * * @return mixed */ private function cleanUrl($url, $parseTitle = false) { $title = null; $url = trim($url); if ($parseTitle) { $pos = strpos($url, ' '); if ($pos !== false) { $title = htmlspecialchars(trim(substr($url, $pos + 1), ' "\'')); $url = substr($url, 0, $pos); } } $url = preg_replace("/[\"'<>\s]/", '', $url); if (preg_match("/^(mailto:)?[_a-z0-9-\.\+]+@[_\w-]+(?:\.[a-z]{2,})+$/i", $url, $matches)) { if (empty($matches[1])) { $url = 'mailto:' . $url; } } if (preg_match("/^\w+:/i", $url) && !preg_match("/^(https?|mailto):/i", $url)) { return '#'; } return $parseTitle ? [$url, $title] : $url; } /** * @param $str * @return mixed */ private function escapeBracket($str) { return str_replace( array('\[', '\]', '\(', '\)'), array('[', ']', '(', ')'), $str ); } /** * startBlock * * @param mixed $type * @param mixed $start * @param mixed $value * @return $this */ private function startBlock($type, $start, $value = null) { $this->_pos++; $this->_current = $type; $this->_blocks[$this->_pos] = array($type, $start, $start, $value); return $this; } /** * endBlock * * @return $this */ private function endBlock() { $this->_current = 'normal'; return $this; } /** * isBlock * * @param mixed $type * @param mixed $value * @return bool */ private function isBlock($type, $value = null) { return $this->_current == $type && (null === $value ? true : $this->_blocks[$this->_pos][3] == $value); } /** * getBlock * * @return array */ private function getBlock() { return isset($this->_blocks[$this->_pos]) ? $this->_blocks[$this->_pos] : null; } /** * setBlock * * @param mixed $to * @param mixed $value * @return $this */ private function setBlock($to = null, $value = null) { if (null !== $to) { $this->_blocks[$this->_pos][2] = $to; } if (null !== $value) { $this->_blocks[$this->_pos][3] = $value; } return $this; } /** * backBlock * * @param mixed $step * @param mixed $type * @param mixed $value * @return $this */ private function backBlock($step, $type, $value = null) { if ($this->_pos < 0) { return $this->startBlock($type, 0, $value); } $last = $this->_blocks[$this->_pos][2]; $this->_blocks[$this->_pos][2] = $last - $step; if ($this->_blocks[$this->_pos][1] <= $this->_blocks[$this->_pos][2]) { $this->_pos++; } $this->_current = $type; $this->_blocks[$this->_pos] = array( $type, $last - $step + 1, $last, $value, ); return $this; } /** * @return $this */ private function combineBlock() { if ($this->_pos < 1) { return $this; } $prev = $this->_blocks[$this->_pos - 1]; $current = $this->_blocks[$this->_pos]; $prev[2] = $current[2]; $this->_blocks[$this->_pos - 1] = $prev; $this->_current = $prev[0]; unset($this->_blocks[$this->_pos]); $this->_pos--; return $this; } }