-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmarkdownSplit.php
More file actions
145 lines (120 loc) · 4.08 KB
/
markdownSplit.php
File metadata and controls
145 lines (120 loc) · 4.08 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
<?php
namespace diversen;
/**
* Class that splits markdown by headers
* Regex found in Michelf markdown class
* See: https://github.com/michelf/php-markdown
*/
class markdownSplit {
/**
* setext regex
* @var string
*/
var $setextRegex = '{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx';
/**
* atx regex
* @var string
*/
var $atxRegex = '{
^(\#{1,6}) # $1 = string of #\'s
[ ]*
(.+?) # $2 = Header text
[ ]*
\#* # optional closing #\'s (not counted)
\n+
}xm';
/**
* Change setext headers to atx
* @param string $text
* @return string $text
*/
public function normalize($text) {
$text = preg_replace_callback($this->setextRegex, array($this, '_doHeaders_callback_setext'), $text);
return $text;
}
/**
* Transform setext to atx headers callback
* @param type $matches
* @return type
*/
protected function _doHeaders_callback_setext($matches) {
// Terrible hack to check we haven't found an empty list item.
if ($matches[2] == '-' && preg_match('{^-(?: |$)}', $matches[1])) {
return $matches[0];
}
$level = $matches[2][0] == '=' ? '#' : '##';
// ID attribute generation
return $block = $level. ' ' . $matches[1] . "\n\n";
// return "\n" . $this->hashBlock($block) . "\n\n";
}
/**
* Split markdown string into an array by headers
* @param string $text
* @param boolean $setext Use and transform setext headers to atx headers
* @return array $ret array of sub-arrays containing ['header', 'header_md', 'body', 'level']
*/
public function splitMarkdown($text, $setext = false) {
if ($setext) {
$text = $this->normalize($text);
}
$headers = $sections = [];
preg_match_all($this->atxRegex, $text, $headers);
$headers_md = $headers[0];
$headers_level = $headers[1];
$headers_names = $headers[2];
$sections = preg_split($this->atxRegex, $text);
// Before any headers
$ret = [];
$ret[0]['header'] = '';
$ret[0]['header_md'] = '';
$ret[0]['body'] = '';
$i = 1;
foreach ($sections as $key => $section) {
if ($key == 0) {
continue;
}
$current_level = strlen($headers_level[$key - 1]);
$ret[$i]['header'] = $headers_names[$key - 1];
$ret[$i]['header_md'] = $headers_md[$key - 1];
$ret[$i]['level'] = $current_level;
$ret[$i]['body'] = $section;
$i++;
}
return $ret;
}
/**
* Split a markdown text into an array. You can specify header level to split at.
* E.g. 3 as split level means that a header has to be at least level 3 (###) in order
* to by placed in it as own array. E.g. 1: Then only top level headers get an array
* All subsequent headers will be placed under the array of the parent header
* @param string $text
* @param boolean $setext Use and transform setext headers to atx headers
* @param int $split_level 1-6
* @return array $ret array of sub-arrays containing ['header', 'header_md', 'body', 'level']
*/
public function splitMarkdownAtLevel($text, $setext = false, $split_level = 1) {
$ary = $this->splitMarkdown($text, $setext);
$i = 0;
$ret = [];
$ret[] = $ary[0];
foreach ($ary as $key => $val) {
if ($key == 0) {
continue;
}
if ($val['level'] == $split_level) {
$i++;
$ret[$i] = $val;
continue;
}
// Current level is bigger than split level - attach
if ($val['level'] > $split_level) {
$ret[$i]['body'].= $val['header_md'];
$ret[$i]['body'].= $val['body'];
continue;
}
$i++;
$ret[$i] = $val;
}
return $ret;
}
}