PHP code
/*
GMLP - The Goofy Markup Language Processor; version 1.4.4
This code is an algorithm to support the translating[1] of any markup
language to any other markup language through the use of markup language
definition files. But it also can be an "any input text to any output text"
processor.[2]
Copyright 2017-2018, Andova Begarin.
Copyright 2013-2021, G.A.Jennings.
License: The (MIT|PHP|Apache|Mozilla) License.
See DOC/GMLP.TXT for test text and documentation. See also INDEX.PHP.
See also TESTRE.PHP and MDTESTRE.PHP.
(It's name is "Markup Language" because it converts input text formatted
like Markdown and produces HTML output like Markdown.pl and it's variants.
"Processor" because the input format is not hard-coded but defined as PHP
data. And "Goofy" because the code is a bit bloated and confusing.)
[1] I use the word "translate" but actually what happens is conversion (or
perhaps even more accurately substitution), and I interchange the two words
at times depending on context.
[2] Which means is a PHP Code/Data way to do what SED and AWK already do.
*/
defined('GMLP_TERM') || define('GMLP_TERM',!empty($argv));
defined('GMLP_DIR') || define('GMLP_DIR',__DIR__);
defined('DEF_DIR') || define('DEF_DIR',GMLP_DIR.'/defs/');
/* for debugging with debug() - the CLI can do this */
//define('DEBUG_LOG',1);
//define('DEBUG_TRUNC',1000);
//define('DEBUG_ERR',1);
if (!function_exists('debug')) {
if (defined('DEBUG_LOG') && DEBUG_LOG) {
include GMLP_DIR.'/debug.php';
} else {
function debug(){};
function debuglog(){};
}
}
const OPTIONS = [
'entities' => 0,
'greedy' => 0,
'newlines' => 0,
'prepend' => '',
'append' => '',
'funclist' => array(),
'linesfunc' => 'gmlp_lines',
'blocksfunc' => 'gmlp_blocks',
];
const BLOCKS = [
'begin','end','first','last','pre','post','replace','convert','newline',
];
const ERRMAX = 100;
error_reporting(-1);
set_error_handler('gmlp_error_handler');
if (!defined('EXCEPTION_NO')) set_exception_handler('gmlp_exception_handler');
/* gmlp_file - convert file according to $gmlp_translate data */
function gmlp_file($file, $pre = FALSE) {
// front-end to gmlp_convert(), so can skip the explode() of the data
if ($pre)
$data = file_get_contents($file);
else
$data = file($file,FILE_IGNORE_NEW_LINES);
return gmlp_convert($data);
}
/* gmlp_convert - convert string according to $gmlp_translate data */
function gmlp_convert($data) {
global $gmlp_translate;
extract($gmlp_translate['EOL']); // $BR, $P1, $P2, $NL
$newlines = $gmlp_translate['OPTIONS']['newlines'];
// $data can be either a string (typically) or an array (see gmlp_file()).
if (is_string($data)) {
gmlp_filter($data);
if (gmlp_hooks('pre-convert',$data) == TRUE) {
$body = $data;
goto post;
}
if ($s = $gmlp_translate['STRIP']) {
if (is_string($s))
$data = str_replace($s,'',$data);
else
$data = preg_replace($s,'',$data);
}
$data = explode($gmlp_translate['NEWLINE'],$data);
}
gmlp_filter($data);
if (gmlp_hooks('convert',$data) == TRUE) {
$body = implode($NL,$data);
goto post;
}
$P = $P1; // the paragraph state flag
$body = ''; // return value
/*
The gotos eliminate duplicate code. I always figured to redesign to make
them go away, but... Guess how many gotos are in the Perl source code?
1400. Python? 2500. Ruby? 1400. Bash? 190. PHP has only 50. Everybody
uses goto. (And I have not yet been attacked by a dinosaur.)
*/
while (($_ = array_shift($data)) !== NULL) {
if ($_ === '') {
newline: if ($newlines)
$body .= $NL;
continue;
}
if (gmlp_skip($_)) // does 'skip'
goto skip;
$t = gmlp_lines_blocks($_,$data);// does 'lines' & 'blocks'
if ($t === NULL)
continue; // discard line
if ($t === TRUE)
goto skip2; // skip further processing
// not used yet
// if ($t === '')
// break; // stop processing
gmlp_get_line($_,$data); // greedy lines?
gmlp_convert_line($_); // does all the others
if ($_ === NULL)
continue;
if ($_ === '')
goto newline;
gmlp_hooks('inlines',$_);
skip:
$_ = $P.$_; // prepends begin or break
$P = $BR; // set to break
// look to next line
if (isset($data[0]) && $data[0] === '') {
$_ .= $P2; // end paragraph
skip2: $P = $P1; // reset to begin paragraph
}
$body .= $_.$NL;
}
post:
gmlp_hooks('post-convert',$body);
return $body;
}
/* gmlp_filter - filter out data before or during conversion */
// preg_error() is used to prevent hundreds of (possible) PHP errors
// if a regex has an error; if ['linefilter']['datafilter']['datafilters']
// empty, nothing happens
function gmlp_filter(&$data) {
global $gmlp_translate;
# these REs will be tested at start up at some point; or along with -testdef
// input data
if (is_string($data)) {
if ($filter =& $gmlp_translate['datafilters']) {
// replace by regex=>replace on buffer
$p = $filter['pattern'];
$data = preg_replace($p,$filter['replace'],$data);
if ($data === FALSE)
preg_error("datafilters $p error",TRUE);
}
else
if ($filter =& $gmlp_translate['datafilter']) {
$replace = '';
$pattern = $filter;
if (is_array($filter)) {
foreach ($filter as $re => $repl)
;
$pattern = $re;
$replace = $repl;
}
// replace by regex=>array, replace=>array
$data = preg_replace($pattern,$replace,$data);
if ($data === FALSE)
preg_error("datafilter $pattern error",TRUE);
}
}
// lines array
else {
if ($filter =& $gmlp_translate['linefilter']) {
// remove lines
if ($filter[0] == '^') {
$not = TRUE;
$filter = substr($filter,1);
}
foreach ($data as $i => $line) {
$r = preg_match($filter,$line);
if ($r === FALSE)
preg_error("linefilter $filter error",TRUE);
if (isset($not))
$r = !$r;
if (!$r)
unset($data[$i]);
}
}
}
}
/* gmlp_get_line - if "greedy" can append more lines */
// has to be "tricky" to be sure lines separated by spaces
function gmlp_get_line(&$_, &$data) {
global $gmlp_translate;
$greedy = $gmlp_translate['OPTIONS']['greedy'];
if (!$greedy || $_ === '') return;
if (substr($_,-1,1) != ' ')
$_ .= ' ';
if (isset($data[0]) && $data[0]) {
gmlp_line($_);
while (isset($data[0]) && $data[0]) {
$l = array_shift($data);
gmlp_line($l);
if (substr($l,-1,1) != ' ')
$l .= ' ';
$_ .= $l;
}
}
$_ = rtrim($_);
}
/* gmlp_skip - skip over lines that match a RE - DEPRECATED */
function gmlp_skip(&$_) {
global $gmlp_translate;
$skip =& $gmlp_translate['skip'];
foreach ($skip as $s)
if (preg_match($s,$_,$res)) {
if (isset($res[1]))
$_ = $res[1];
return TRUE;
}
return FALSE;
}
/* gmlp_lines_blocks - see if line to block - start of block (process block) */
// uses undocumented feature for supplying one's own lines/blocks function
function gmlp_lines_blocks(&$_, &$data) {
global $gmlp_translate;
$linesfunc = $gmlp_translate['OPTIONS']['linesfunc']; // gmlp_lines()
$blocksfunc = $gmlp_translate['OPTIONS']['blocksfunc']; // gmlp_blocks()
$t = $linesfunc($_);
if ($t === NULL || $t === TRUE)
return $t;
$t = $blocksfunc($_,$data);
if ($t === NULL || $t === TRUE)
return $t;
return FALSE;
}
/* gmlp_convert_line - do all the line conversions in 'funclist' order */
// gmlp_inlines, gmlp_entities, gmlp_chars
function gmlp_convert_line(&$_) {
global $gmlp_translate;
$funclist =& $gmlp_translate['OPTIONS']['funclist'];
if (!$funclist)
return;
foreach ($funclist as $func) {
$f='gmlp_'.$func;
$r = $f($_);
}
}
/* gmlp_lines - REs to convert lines - skip other conversions */
function gmlp_lines(&$_) {
global $gmlp_translate;
$lines =& $gmlp_translate['lines'];
if (gmlp_hooks('lines',$_)) // if hook occurred
return TRUE; // continue
foreach ($lines as $re => $repl) {
if ($n=preg_match($re,$_,$res)) {
// $repl special cases: NULL discard line; FALSE stop processing 'lines' and
// continue processing the line (go on to 'blocks' and 'inlines', etc); '' use
// line as is or $1 if defined; otherwise, if line matched return TRUE to skip
// further processing of line (and on to the next line)
if ($repl === NULL)
return NULL;
if ($repl === FALSE)
return FALSE;
if ($repl === '') {
if (isset($res[1]))
$_ = $res[1];
return TRUE;
}
// previously, closures only expected the 2nd argument, now both can use it
// or not; now, if function returns non-string return that value - means that
// the function can modify line reference and return FALSE
if (is_object($repl) || function_exists($repl)) {
$r = $repl($_,$res);
if (!is_string($r))
return $r;
$_ = $r;
return TRUE;
}
/*
Next thing not always wanted, like if this is wanted:
'/^(;.*)/' => '<p class="cm">$1</p>',
Result is probably incorrect, so this needs to be done:
'/^(;.*)/' => function($s,$r){return "<p class=\"cm\">$s</p>";},
Or:
'/^(;.*)/' => 'highlight_str_func',
The latter for when there are multiple 'lines' similar to the one with
the closure.
*/
# That needs to be fixed; the !isset() sort of gets around it.
# But NOT if there is a subpattern that SHOULD not be used:
# '/^([ ]?\*[ ]?){3,}/' => '<hr>',
# I can't (yet) see a way to fix that kind of regex, so a function is required.
# See the markdown definition file. (I think I have to learn PCRE assertions.)
// if subpattern the line is converted
if (isset($res[1]) && !isset($res[2]))
gmlp_convert_line($_);
// if no subpatterns just substitute
if (!isset($res[1]))
$_ = $repl;
else
$_ = preg_replace($re,$repl,$_);
return TRUE;
}
if ($n === FALSE) {
// no sense continuing as nothing will be right
gmlp_error("\ninvalid 'lines' RE: '$re'=>'$repl'\n",1);
}
}
return FALSE;
}
/* gmlp_inlines - string replacements on all matches */
// see GMLP_DEF.PHP data 'inlines' for details on how these work
function gmlp_inlines(&$_) {
global $gmlp_translate;
$inlines =& $gmlp_translate['inlines'];
foreach ($inlines as $re => $repl) {
if (isset($gmlp_translate['OPTIONS']['INLINES_CLOSURE_RES'])) {
# Alternative way for more versatility; see the closure in the markdown
# definition file.
if (is_object($repl)) {
if (preg_match_all($re,$_,$res))
$_ = $repl($_,$res);
continue;
}
if (function_exists($repl)) {
if (preg_match_all($re,$_,$res)) {
foreach ($res[0] as $k => $v)
$_ = str_replace($v,$repl($res[1][$k]),$_);
}
continue;
}
}
else {
if (is_object($repl) || function_exists($repl)) {
if (preg_match_all($re,$_,$res)) {
foreach ($res[0] as $k => $v)
$_ = str_replace($v,$repl($res[1][$k]),$_);
}
continue;
}
}
// both above blocks fall through:
if ($n = preg_match($re,$_))
$_ = preg_replace($re,$repl,$_);
if ($n === FALSE)
preg_error("$re");
}
}
/* gmlp_entities - string replacements on each line */
function gmlp_entities(&$_) {
global $gmlp_translate;
$entities =& $gmlp_translate['entities'];
if (!$gmlp_translate['OPTIONS']['entities'])
return;
if (isset($entities[0]))
$_ = str_replace($entities[0],$entities[1],$_);
else
foreach ($entities as $str => $replace)
$_ = str_replace($str,$replace,$_);
}
/* gmlp_chars - basic bold, italic, etc. - too be DEPRECATED */
# Though at the time it was written and thought to be "oh so clever", this
# function is embarrassingly lame. It works, and has worked, but then
# markdown came along and PCRE assertions were finally understood...
function gmlp_chars(&$_) {
global $gmlp_translate;
$chars =& $gmlp_translate['chars'];
$beg =& $gmlp_translate['WORDBEG'];
$end =& $gmlp_translate['WORDEND'];
/*
Note on the above. These functions get called for each line of text, and
perhaps it would appear that executing those three lines would be a
performance hit. Not really. Having tried many different ways to access
those data -- function, static variables, etc. -- timings show that
PHP seems optimised in the use of global. (Despite some calling them
"evil" -- it is only that over-use and forgetfulness can cause a
global/local mis-match or inadvertent overwrite to occur. In my opinion.)
*/
foreach ($chars as $char => $tag) {
$char = addcslashes($char,'*^+[]/\\?.|{}');
$repl = $beg.$char.'(.*)'.$char.$end;
$tags = '$1<'.$tag.'>$2</'.$tag.'>$3';
$_ = preg_replace($repl,$tags,$_);
# preg_replace() returns NULL on error, not FALSE like the others
if ($_ === NULL)
preg_error("'chars' re error: '$repl'",TRUE);
}
return $_;
}
/* gmlp_line - REs applied to each line of block conversions */
// it's currently an obsure thing, in an obscure place...
function gmlp_line(&$_) {
global $gmlp_translate;
$line =& $gmlp_translate['line'];
foreach ($line as $re => $repl) {
if (preg_match($re,$_))
$_ = preg_replace($re,$repl,$_);
}
}
/* gmlp_blocks - convert lines between start and end REs */
// called with the input array starting one line past the start tag,
// and the start tag line (both as references; the array will be
// modified if block tag found, which is why array_shift is used
// rather than foreach)
function gmlp_blocks(&$s, &$data) {
if (!($block = gmlp_lookup_block($s)))
return FALSE;
extract($block);
/*
// above similar to:
$end =& $block['end'];
$pre =& $block['pre'];
$post =& $block['post'];
$first =& $block['first'];
$last =& $block['last'];
$replace =& $block['replace'];
$convert =& $block['convert'];
$newline = isset($block['newline']);
*/
if (isset($block['function'])) { // if function, that function
$func = $block['function']; // will handle the processing
$r = $func($data,$s, $block); // not this function
return $r;
// function definition: name(&$data, &$s, $block)
}
if ($first)
array_unshift($data,$s);
$s = $pre;
$b = FALSE;
while (($_ = array_shift($data)) !== NULL) {
if (preg_match($end,$_)) {
if (!$last)
break;
$b = TRUE;
}
if ($convert == 0)
gmlp_line($_);
else
gmlp_convert_line($_);
if ($_ == '' && !$newline)
continue;
if (is_object($replace) || function_exists($replace))
$_ = $replace($_);
else
if ($replace)
$_ = $_.$replace;
$s .= $_."\n";
if ($b) break;
}
if ($newline == 2)
$s = trim($s);
if (isset($block['continue']))
array_unshift($data,$_);
if ($post === NULL)
return NULL;
if ($last === 0) // eat last newline (dont like)
$s = substr_replace($s,'',-1);
if (is_object($post) || function_exists($post))
$s = $post($s); // 'post' is either a
else // function or string to append
$s .= $post; // ... might want to do more
return TRUE;
}
/* gmlp_read_block - to be used by definition file to read a block */
function gmlp_read_block(&$data, $s, $re, $string = FALSE) {
if ($re['first'])
if ($string)
$block = "$s\n";
else
$block[] = $s;
$end = $re['end'];
while (($_ = array_shift($data)) !== NULL) {
if (preg_match($end,$_))
break;
if ($string)
$block .= "$_\n";
else
$block[] = $_;
}
if ($re['continue'])
array_unshift($data,$_);
return $block;
}
/* gmlp_hooks - function callback */
// 'pre-convert' $data input as string - gmlp_convert()
// 'convert' $data input as array - gmlp_convert()
// 'inlines' $data is line for each line - gmlp_convert()
// 'lines' $data is line for each line - gmlp_lines()
// 'post-convert' $data is converted text as string - gmlp_convert()
function gmlp_hooks($name, &$data) {
global $gmlp_translate;
$hooks =& $gmlp_translate['HOOKS'];
if (isset($hooks[$name])) {
$f = $hooks[$name];
if (is_object($f) || function_exists($f)) {
return $f($data);
}
}
return FALSE;
}
/* gmlp_open - load definition file / load functions file / initialize data */
function gmlp_open($def_file = NULL, $funcs_file = NULL) {
global $gmlp_translate;
gmlp_def_def();
if ($funcs_file)
include $funcs_file; // let fail for not found
if ($def_file)
include $def_file;
if (defined('DEF_TEXT')) {
$gmlp_translate['EOL'] = array(
'BR' => '',
'P1' => '',
'P2' => '',
'NL' => "\n",
);
}
if (defined('DEF_NEWLINES')) {
$gmlp_translate['OPTIONS']['newlines'] = 1;
}
gmlp_def_def(1);
gmlp_block_defaults($gmlp_translate['blocks']);
return TRUE;
}
/* gmlp_test - tests all lines and inlines regular expressions */
function gmlp_test($s = '') {
global $gmlp_translate;
$lines = $gmlp_translate['lines'];
$inlines = $gmlp_translate['inlines'];
($lines) || print "\nlines: not defined";
($inlines) || print "\ninlines: not defined";
if ($lines) {
print "\nlines:\n";
$l = $n = 0;
foreach ($lines as $re => $repl) {
$l++;
if (preg_match($re,$s) === FALSE) {
preg_error("$l: $re");
$n++;
}
}
print "$l entries, $n errors";
}
if ($inlines) {
print "\ninlines:\n";
$l = $n = 0;
foreach ($inlines as $re => $repl) {
$l++;
if (preg_match($re,$s,$r) === FALSE) {
preg_error("$l: $re");
$n++;
}
}
print "$l entries, $n errors";
}
print "\n";
}
/* preg_error - inform of RE error */
function preg_error($s, $die = FALSE) {
$e = preg_last_error();
if ($e) $s .= " ($e)";
gmlp_error($s,$die);
}
function gmlp_error($s, $die = FALSE) {
gmlp_out("$s\n");
if ($die) die;
}
/* gmlp_option - get or set an option */
function gmlp_option($var = NULL, $val = NULL) {
global $gmlp_translate;
$options =& $gmlp_translate['OPTIONS'];
if ($var == NULL)
return $options;
if ($val)
return $options[$var] = $val;
return (isset($options[$var])) ? $options[$var] : OPTIONS[$var];
}
/* gmlp_add - add data to the translate array */
/*
This is a complex function because it needs to add data to the translate
array in various ways. For example:
gmlp_add('EOL','BR','<br/>'); // set EOL member
gmlp_add('EOL',$eol_array); // set EOL array
See CLI.md for more examples.
*/
function gmlp_add($var, $val = NULL, $data = NULL) {
global $gmlp_translate;
// meta data (OPTIONS, HOOKS) are uppercase (assumes arrays exist)
if (ctype_upper($var)) {
if ($val === NULL)
return $gmlp_translate[$var]; // return all
if ($data !== NULL) {
if ($data == 'UNSET') {
$t = $gmlp_translate[$var][$val];
unset($gmlp_translate[$var][$val]);
return $t;
}
conv_type($data);
$gmlp_translate[$var][$val] = $data; // set one
}
else {
if (is_array($val)) // set array
return $gmlp_translate[$var] = $val;
if (isset($gmlp_translate[$var][$val]))
return $gmlp_translate[$var][$val];
}
return '';
}
// special case
if ($var == 'include') {
if (is_file($val))
include $val;
return;
}
// adding a block array? (add or replace)
if ($val && $data === 'block') {
gmlp_block_defaults($val);
$gmlp_translate['blocks'][$var] = $val;
return;
}
// adding to a non block array/value?
if ($val && ($data || $data === NULL)) {
conv_type($val);
if (!isset($gmlp_translate[$var]))
$gmlp_translate[$var] = $val;
else
if ($data === TRUE)
$gmlp_translate[$var] = $val + $gmlp_translate[$var];
elseif ($data)
$gmlp_translate[$var][$val] = $data;
else
$gmlp_translate[$var] += $val;
return;
}
// return member (undefined is array())
return (isset($gmlp_translate[$var])) ? $gmlp_translate[$var] : array();
}
function conv_type(&$data) {
if ($data == '""') $data = '';
elseif ($data == "''") $data = '';
elseif ($data == 'NULL') $data = NULL;
}
/* index into data's 'blocks' section and return one if match */
function gmlp_lookup_block($_) {
global $gmlp_translate;
$blocks =& $gmlp_translate['blocks'];
foreach ($blocks as $name => $block)
if (preg_match($block['begin'],$_))
return $block;
return FALSE;
}
/* gmlp_block_defaults - set (missing) defaults for a block definition */
function gmlp_block_defaults(&$blocks) {
if (!$blocks) return;
if (is_string(current($blocks))) {
gmlp_def_block($blocks);
return;
}
foreach ($blocks as $name => &$block) {
$block['name'] = $name;
gmlp_def_block($block);
}
}
/* gmlp_def_block - default missing block array elements */
function gmlp_def_block(&$block) {
foreach (BLOCKS as $key)
if (!array_key_exists($key,$block)) $block[$key] = '';
}
/* gmlp_def_def - default the translate definitions array */
function gmlp_def_def($options = 0) {
global $gmlp_translate;
// special case
if ($options == 1) {
foreach (OPTIONS as $option => $default)
if (!isset($gmlp_translate['OPTIONS'][$option]))
$gmlp_translate['OPTIONS'][$option] = $default;
return;
}
$gmlp_translate['STRIP'] = "\r";
$gmlp_translate['NEWLINE'] = "\n";
$gmlp_translate['EOL'] = array(
'BR' => '<br>',
'P1' => '<p>',
'P2' => '</p>',
'NL' => "\n",
);
$gmlp_translate['OPTIONS'] = array(
'entities' => 0,
'greedy' => 0,
'newlines' => 0,
'funclist' =>
array('inlines','entities','chars'),
'linesfunc' => 'gmlp_lines',
'blocksfunc' => 'gmlp_blocks',
);
$gmlp_translate['HOOKS'] = array();
$gmlp_translate['skip'] = array();
$gmlp_translate['lines'] = array();
$gmlp_translate['line'] = array();
$gmlp_translate['inlines'] = array();
$gmlp_translate['entities'] = array();
$gmlp_translate['WORDBEG'] = '/(^|\s*)';
$gmlp_translate['WORDEND'] = '(\s*|$)/U';
$gmlp_translate['chars'] = array();
$gmlp_translate['blocks'] = array();
}
function gmlp($var = NULL) {
global $gmlp_translate;
if ($var == NULL)
return $gmlp_translate;
return (isset($gmlp_translate[$var])) ? $gmlp_translate[$var] : array();
}
function gmlp_error_handler($errno, $errstr, $errfile, $errline) {
static $errcnt = 0;
$errfile = basename($errfile);
$error = "$errstr ($errfile, $errline)";
if (++$errcnt == ERRMAX) die("maximum number of errors reached\n");
if (ini_get('html_errors'))
$error = preg_replace('/\[.*\]/','',$error);
if (GMLP_TERM) {
if ($errno == E_NOTICE)
$error = "\e[33m\e[1mnotice:\e[0m\e[33m $error\e[0m";
else
$error = "\e[35m\e[1mwarning:\e[0m\e[35m $error\e[0m";
}
else {
if ($errno == E_NOTICE)
$error = "<p style='color:blue'><b>notice:</b> $error</p>";
else
$error = "<p style='color:orange'><b>warning:</b> $error</p>";
}
gmlp_out("$error\n",0);
return TRUE;
}
function gmlp_exception_handler($e) {
if (defined('EXCEPTION_MIN')) {
$f = $e->getFile();
$l = $e->getLine();
$tr = $e->getTrace();
$f = str_replace(__DIR__.'/','',$f);
$m = "Error: {$e->getMessage()} in $f:$l ";
$t = array_shift($tr);
$l = $t['line'];
$m .= "({$t['function']}:$l, ";
$s = '';
foreach ($tr as $t) {
$l = $t['line'];
$s .= "{$t['function']}:$l,";
}
$m .= "$s)";
}
else {
$m = $e->__toString();
$m = str_replace(__DIR__.'/','',$m);
}
gmlp_out("\n$m\n");
}
function gmlp_out($s, $he = 1) {
if (GMLP_TERM)
fwrite(STDERR,$s);
else {
if ($he)
print(nl2br(htmlentities($s)));
else
print($s);
}
}
// END