You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
389 lines
9.4 KiB
389 lines
9.4 KiB
<?php |
|
/** |
|
* @copyright Copyright (c) 2014 Carsten Brandt |
|
* @license https://github.com/cebe/markdown/blob/master/LICENSE |
|
* @link https://github.com/cebe/markdown#readme |
|
*/ |
|
|
|
namespace cebe\markdown; |
|
use ReflectionMethod; |
|
|
|
/** |
|
* A generic parser for markdown-like languages. |
|
* |
|
* @author Carsten Brandt <mail@cebe.cc> |
|
*/ |
|
abstract class Parser |
|
{ |
|
/** |
|
* @var integer the maximum nesting level for language elements. |
|
*/ |
|
public $maximumNestingLevel = 32; |
|
|
|
/** |
|
* @var string the current context the parser is in. |
|
* TODO remove in favor of absy |
|
*/ |
|
protected $context = []; |
|
/** |
|
* @var array these are "escapeable" characters. When using one of these prefixed with a |
|
* backslash, the character will be outputted without the backslash and is not interpreted |
|
* as markdown. |
|
*/ |
|
protected $escapeCharacters = [ |
|
'\\', // backslash |
|
]; |
|
|
|
private $_depth = 0; |
|
|
|
|
|
/** |
|
* Parses the given text considering the full language. |
|
* |
|
* This includes parsing block elements as well as inline elements. |
|
* |
|
* @param string $text the text to parse |
|
* @return string parsed markup |
|
*/ |
|
public function parse($text) |
|
{ |
|
$this->prepare(); |
|
|
|
if (ltrim($text) === '') { |
|
return ''; |
|
} |
|
|
|
$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text); |
|
|
|
$this->prepareMarkers($text); |
|
|
|
$absy = $this->parseBlocks(explode("\n", $text)); |
|
$markup = $this->renderAbsy($absy); |
|
|
|
$this->cleanup(); |
|
return $markup; |
|
} |
|
|
|
/** |
|
* Parses a paragraph without block elements (block elements are ignored). |
|
* |
|
* @param string $text the text to parse |
|
* @return string parsed markup |
|
*/ |
|
public function parseParagraph($text) |
|
{ |
|
$this->prepare(); |
|
|
|
if (ltrim($text) === '') { |
|
return ''; |
|
} |
|
|
|
$text = str_replace(["\r\n", "\n\r", "\r"], "\n", $text); |
|
|
|
$this->prepareMarkers($text); |
|
|
|
$absy = $this->parseInline($text); |
|
$markup = $this->renderAbsy($absy); |
|
|
|
$this->cleanup(); |
|
return $markup; |
|
} |
|
|
|
/** |
|
* This method will be called before `parse()` and `parseParagraph()`. |
|
* You can override it to do some initialization work. |
|
*/ |
|
protected function prepare() |
|
{ |
|
} |
|
|
|
/** |
|
* This method will be called after `parse()` and `parseParagraph()`. |
|
* You can override it to do cleanup. |
|
*/ |
|
protected function cleanup() |
|
{ |
|
} |
|
|
|
|
|
// block parsing |
|
|
|
private $_blockTypes; |
|
|
|
/** |
|
* @return array a list of block element types available. |
|
*/ |
|
protected function blockTypes() |
|
{ |
|
if ($this->_blockTypes === null) { |
|
// detect block types via "identify" functions |
|
$reflection = new \ReflectionClass($this); |
|
$this->_blockTypes = array_filter(array_map(function($method) { |
|
$name = $method->getName(); |
|
return strncmp($name, 'identify', 8) === 0 ? strtolower(substr($name, 8)) : false; |
|
}, $reflection->getMethods(ReflectionMethod::IS_PROTECTED))); |
|
|
|
sort($this->_blockTypes); |
|
} |
|
return $this->_blockTypes; |
|
} |
|
|
|
/** |
|
* Given a set of lines and an index of a current line it uses the registed block types to |
|
* detect the type of this line. |
|
* @param array $lines |
|
* @param integer $current |
|
* @return string name of the block type in lower case |
|
*/ |
|
protected function detectLineType($lines, $current) |
|
{ |
|
$line = $lines[$current]; |
|
$blockTypes = $this->blockTypes(); |
|
foreach($blockTypes as $blockType) { |
|
if ($this->{'identify' . $blockType}($line, $lines, $current)) { |
|
return $blockType; |
|
} |
|
} |
|
// consider the line a normal paragraph if no other block type matches |
|
return 'paragraph'; |
|
} |
|
|
|
/** |
|
* Parse block elements by calling `detectLineType()` to identify them |
|
* and call consume function afterwards. |
|
*/ |
|
protected function parseBlocks($lines) |
|
{ |
|
if ($this->_depth >= $this->maximumNestingLevel) { |
|
// maximum depth is reached, do not parse input |
|
return [['text', implode("\n", $lines)]]; |
|
} |
|
$this->_depth++; |
|
|
|
$blocks = []; |
|
|
|
// convert lines to blocks |
|
for ($i = 0, $count = count($lines); $i < $count; $i++) { |
|
$line = $lines[$i]; |
|
if ($line !== '' && rtrim($line) !== '') { // skip empty lines |
|
// identify a blocks beginning and parse the content |
|
list($block, $i) = $this->parseBlock($lines, $i); |
|
if ($block !== false) { |
|
$blocks[] = $block; |
|
} |
|
} |
|
} |
|
|
|
$this->_depth--; |
|
|
|
return $blocks; |
|
} |
|
|
|
/** |
|
* Parses the block at current line by identifying the block type and parsing the content |
|
* @param $lines |
|
* @param $current |
|
* @return array Array of two elements, the first element contains the block, |
|
* the second contains the next line index to be parsed. |
|
*/ |
|
protected function parseBlock($lines, $current) |
|
{ |
|
// identify block type for this line |
|
$blockType = $this->detectLineType($lines, $current); |
|
|
|
// call consume method for the detected block type to consume further lines |
|
return $this->{'consume' . $blockType}($lines, $current); |
|
} |
|
|
|
protected function renderAbsy($blocks) |
|
{ |
|
$output = ''; |
|
foreach ($blocks as $block) { |
|
array_unshift($this->context, $block[0]); |
|
$output .= $this->{'render' . $block[0]}($block); |
|
array_shift($this->context); |
|
} |
|
return $output; |
|
} |
|
|
|
/** |
|
* Consume lines for a paragraph |
|
* |
|
* @param $lines |
|
* @param $current |
|
* @return array |
|
*/ |
|
protected function consumeParagraph($lines, $current) |
|
{ |
|
// consume until newline |
|
$content = []; |
|
for ($i = $current, $count = count($lines); $i < $count; $i++) { |
|
if (ltrim($lines[$i]) !== '') { |
|
$content[] = $lines[$i]; |
|
} else { |
|
break; |
|
} |
|
} |
|
$block = [ |
|
'paragraph', |
|
'content' => $this->parseInline(implode("\n", $content)), |
|
]; |
|
return [$block, --$i]; |
|
} |
|
|
|
/** |
|
* Render a paragraph block |
|
* |
|
* @param $block |
|
* @return string |
|
*/ |
|
protected function renderParagraph($block) |
|
{ |
|
return '<p>' . $this->renderAbsy($block['content']) . "</p>\n"; |
|
} |
|
|
|
|
|
// inline parsing |
|
|
|
|
|
/** |
|
* @var array the set of inline markers to use in different contexts. |
|
*/ |
|
private $_inlineMarkers = []; |
|
|
|
/** |
|
* Returns a map of inline markers to the corresponding parser methods. |
|
* |
|
* This array defines handler methods for inline markdown markers. |
|
* When a marker is found in the text, the handler method is called with the text |
|
* starting at the position of the marker. |
|
* |
|
* Note that markers starting with whitespace may slow down the parser, |
|
* you may want to use [[renderText]] to deal with them. |
|
* |
|
* You may override this method to define a set of markers and parsing methods. |
|
* The default implementation looks for protected methods starting with `parse` that |
|
* also have an `@marker` annotation in PHPDoc. |
|
* |
|
* @return array a map of markers to parser methods |
|
*/ |
|
protected function inlineMarkers() |
|
{ |
|
$markers = []; |
|
// detect "parse" functions |
|
$reflection = new \ReflectionClass($this); |
|
foreach($reflection->getMethods(ReflectionMethod::IS_PROTECTED) as $method) { |
|
$methodName = $method->getName(); |
|
if (strncmp($methodName, 'parse', 5) === 0) { |
|
preg_match_all('/@marker ([^\s]+)/', $method->getDocComment(), $matches); |
|
foreach($matches[1] as $match) { |
|
$markers[$match] = $methodName; |
|
} |
|
} |
|
} |
|
return $markers; |
|
} |
|
|
|
/** |
|
* Prepare markers that are used in the text to parse |
|
* |
|
* Add all markers that are present in markdown. |
|
* Check is done to avoid iterations in parseInline(), good for huge markdown files |
|
* @param string $text |
|
*/ |
|
protected function prepareMarkers($text) |
|
{ |
|
$this->_inlineMarkers = []; |
|
foreach ($this->inlineMarkers() as $marker => $method) { |
|
if (strpos($text, $marker) !== false) { |
|
$m = $marker[0]; |
|
// put the longest marker first |
|
if (isset($this->_inlineMarkers[$m])) { |
|
reset($this->_inlineMarkers[$m]); |
|
if (strlen($marker) > strlen(key($this->_inlineMarkers[$m]))) { |
|
$this->_inlineMarkers[$m] = array_merge([$marker => $method], $this->_inlineMarkers[$m]); |
|
continue; |
|
} |
|
} |
|
$this->_inlineMarkers[$m][$marker] = $method; |
|
} |
|
} |
|
} |
|
|
|
/** |
|
* Parses inline elements of the language. |
|
* |
|
* @param string $text the inline text to parse. |
|
* @return array |
|
*/ |
|
protected function parseInline($text) |
|
{ |
|
if ($this->_depth >= $this->maximumNestingLevel) { |
|
// maximum depth is reached, do not parse input |
|
return [['text', $text]]; |
|
} |
|
$this->_depth++; |
|
|
|
$markers = implode('', array_keys($this->_inlineMarkers)); |
|
|
|
$paragraph = []; |
|
|
|
while (!empty($markers) && ($found = strpbrk($text, $markers)) !== false) { |
|
|
|
$pos = strpos($text, $found); |
|
|
|
// add the text up to next marker to the paragraph |
|
if ($pos !== 0) { |
|
$paragraph[] = ['text', substr($text, 0, $pos)]; |
|
} |
|
$text = $found; |
|
|
|
$parsed = false; |
|
foreach ($this->_inlineMarkers[$text[0]] as $marker => $method) { |
|
if (strncmp($text, $marker, strlen($marker)) === 0) { |
|
// parse the marker |
|
array_unshift($this->context, $method); |
|
list($output, $offset) = $this->$method($text); |
|
array_shift($this->context); |
|
|
|
$paragraph[] = $output; |
|
$text = substr($text, $offset); |
|
$parsed = true; |
|
break; |
|
} |
|
} |
|
if (!$parsed) { |
|
$paragraph[] = ['text', substr($text, 0, 1)]; |
|
$text = substr($text, 1); |
|
} |
|
} |
|
|
|
$paragraph[] = ['text', $text]; |
|
|
|
$this->_depth--; |
|
|
|
return $paragraph; |
|
} |
|
|
|
/** |
|
* Parses escaped special characters. |
|
* @marker \ |
|
*/ |
|
protected function parseEscape($text) |
|
{ |
|
if (isset($text[1]) && in_array($text[1], $this->escapeCharacters)) { |
|
return [['text', $text[1]], 2]; |
|
} |
|
return [['text', $text[0]], 1]; |
|
} |
|
|
|
/** |
|
* This function renders plain text sections in the markdown text. |
|
* It can be used to work on normal text sections for example to highlight keywords or |
|
* do special escaping. |
|
*/ |
|
protected function renderText($block) |
|
{ |
|
return $block[1]; |
|
} |
|
}
|
|
|