@@ -19,15 +19,22 @@ class SafeHTMLParser(HTMLParser):
1919 'small' , 'sound' , 'source' , 'spacer' , 'span' , 'strike' , 'strong' ,
2020 'sub' , 'sup' , 'table' , 'tbody' , 'td' , 'textarea' , 'time' , 'tfoot' ,
2121 'th' , 'thead' , 'tr' , 'tt' , 'u' , 'ul' , 'var' , 'video' ]
22- replaces = [["&" , "&" ], ["\" " , """ ], ["<" , "<" ], [">" , ">" ], ["\n " , "<br/>" ], ["\t " , " " ], [" " , " " ], [" " , " " ], ["<br/> " , "<br/> " ]]
22+ replaces_pre = [["&" , "&" ], ["\" " , """ ], ["<" , "<" ], [">" , ">" ]]
23+ replaces_post = [["\n " , "<br/>" ], ["\t " , " " ], [" " , " " ], [" " , " " ], ["<br/> " , "<br/> " ]]
2324 src_schemes = [ "data" ]
2425 uriregex1 = re .compile (r'(?i)\b((?:(https?|ftp|bitcoin):(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s`!()\[\]{};:\'".,<>?]))' )
2526 uriregex2 = re .compile (r'<a href="([^"]+)&' )
2627 emailregex = re .compile (r'\b([A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})\b' )
2728
2829 @staticmethod
29- def multi_replace (text ):
30- for a in SafeHTMLParser .replaces :
30+ def replace_pre (text ):
31+ for a in SafeHTMLParser .replaces_pre :
32+ text = text .replace (a [0 ], a [1 ])
33+ return text
34+
35+ @staticmethod
36+ def replace_post (text ):
37+ for a in SafeHTMLParser .replaces_post :
3138 text = text .replace (a [0 ], a [1 ])
3239 if len (text ) > 1 and text [0 ] == " " :
3340 text = " " + text [1 :]
@@ -95,12 +102,13 @@ def feed(self, data):
95102 except UnicodeDecodeError :
96103 data = unicode (data , 'utf-8' , errors = 'replace' )
97104 HTMLParser .feed (self , data )
98- tmp = SafeHTMLParser .multi_replace (data )
105+ tmp = SafeHTMLParser .replace_pre (data )
99106 tmp = SafeHTMLParser .uriregex1 .sub (
100107 r'<a href="\1">\1</a>' ,
101108 tmp )
102109 tmp = SafeHTMLParser .uriregex2 .sub (r'<a href="\1&' , tmp )
103110 tmp = SafeHTMLParser .emailregex .sub (r'<a href="mailto:\1">\1</a>' , tmp )
111+ tmp = SafeHTMLParser .replace_post (tmp )
104112 self .raw += tmp
105113
106114 def is_html (self , text = None , allow_picture = False ):
0 commit comments