Skip to content

Commit 171bc83

Browse files
committed
HTML parser fix
- URLs followed with space were broken
1 parent b7e75b9 commit 171bc83

File tree

1 file changed

+12
-4
lines changed

1 file changed

+12
-4
lines changed

src/bitmessageqt/safehtmlparser.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,22 @@ class SafeHTMLParser(HTMLParser):
1919
'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong',
2020
'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot',
2121
'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video']
22-
replaces = [["&", "&amp;"], ["\"", "&quot;"], ["<", "&lt;"], [">", "&gt;"], ["\n", "<br/>"], ["\t", "&nbsp;&nbsp;&nbsp;&nbsp;"], [" ", "&nbsp; "], [" ", "&nbsp; "], ["<br/> ", "<br/>&nbsp;"]]
22+
replaces_pre = [["&", "&amp;"], ["\"", "&quot;"], ["<", "&lt;"], [">", "&gt;"]]
23+
replaces_post = [["\n", "<br/>"], ["\t", "&nbsp;&nbsp;&nbsp;&nbsp;"], [" ", "&nbsp; "], [" ", "&nbsp; "], ["<br/> ", "<br/>&nbsp;"]]
2324
src_schemes = [ "data" ]
2425
uriregex1 = re.compile(r'(?i)\b((?:(https?|ftp|bitcoin):(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?]))')
2526
uriregex2 = re.compile(r'<a href="([^"]+)&amp;')
2627
emailregex = re.compile(r'\b([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})\b')
2728

2829
@staticmethod
29-
def multi_replace(text):
30-
for a in SafeHTMLParser.replaces:
30+
def replace_pre(text):
31+
for a in SafeHTMLParser.replaces_pre:
32+
text = text.replace(a[0], a[1])
33+
return text
34+
35+
@staticmethod
36+
def replace_post(text):
37+
for a in SafeHTMLParser.replaces_post:
3138
text = text.replace(a[0], a[1])
3239
if len(text) > 1 and text[0] == " ":
3340
text = "&nbsp;" + text[1:]
@@ -95,12 +102,13 @@ def feed(self, data):
95102
except UnicodeDecodeError:
96103
data = unicode(data, 'utf-8', errors='replace')
97104
HTMLParser.feed(self, data)
98-
tmp = SafeHTMLParser.multi_replace(data)
105+
tmp = SafeHTMLParser.replace_pre(data)
99106
tmp = SafeHTMLParser.uriregex1.sub(
100107
r'<a href="\1">\1</a>',
101108
tmp)
102109
tmp = SafeHTMLParser.uriregex2.sub(r'<a href="\1&', tmp)
103110
tmp = SafeHTMLParser.emailregex.sub(r'<a href="mailto:\1">\1</a>', tmp)
111+
tmp = SafeHTMLParser.replace_post(tmp)
104112
self.raw += tmp
105113

106114
def is_html(self, text = None, allow_picture = False):

0 commit comments

Comments
 (0)