-
Notifications
You must be signed in to change notification settings - Fork 2.8k
fix: Fix the . dot in the regex not matching \n
#5179
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: v2
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -132,15 +132,14 @@ def reset_application_node_dict(application_node_dict, runtime_node_id, node_dat | |
| application_node = application_node_dict[key] | ||
| if application_node.get('runtime_node_id') == runtime_node_id: | ||
| content: str = application_node.get('content') | ||
| match = re.search('<form_rander>.*?</form_rander>', content) | ||
| match = re.search(r'<form_rander>.*?<\/form_rander>', content, flags=re.DOTALL) | ||
| if match: | ||
| form_setting_str = match.group().replace('<form_rander>', '').replace('</form_rander>', '') | ||
| form_setting = json.loads(form_setting_str) | ||
| form_setting['is_submit'] = True | ||
| form_setting['form_data'] = node_data | ||
| value = f'<form_rander>{json.dumps(form_setting)}</form_rander>' | ||
| res = re.sub('<form_rander>.*?</form_rander>', | ||
| '${value}', content) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 修复2 |
||
| res = re.sub(r'<form_rander>.*?<\/form_rander>', '${value}', content, flags=re.DOTALL) | ||
| application_node['content'] = res.replace('${value}', value) | ||
| except Exception as e: | ||
| maxkb_logger.warning(f'reset_application_node_dict error: {e}', exc_info=True) | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -115,18 +115,18 @@ def markdown_to_plain_text(md: str) -> str: | |
| # 移除行内代码 `code` | ||
| text = re.sub(r'`(.*?)`', r'\1', text) | ||
| # 移除代码块 ```code``` | ||
| text = re.sub(r'```[\s\S]*?```', '', text) | ||
| text = re.sub(r'```.*?```', '', text, flags=re.DOTALL) | ||
| # 移除多余的换行符 | ||
| text = re.sub(r'\n{2,}', '\n', text) | ||
| # 使用正则表达式去除所有 HTML 标签 | ||
| text = re.sub(r'<[^>]+>', '', text) | ||
| # 先移除特定媒体标签(优先级高于通用HTML标签移除) | ||
| text = re.sub(r'<(?:audio|video)(?:\s+[^>]*)?>[\s\S]*?(?:</(?:audio|video)>)?', '', text, flags=re.IGNORECASE) | ||
| text = re.sub(r'<(?:audio|video)(?:\s+[^>]*)?>.*?(?:</(?:audio|video)>)?', '', text, flags=re.DOTALL | re.IGNORECASE) | ||
| text = re.sub(r'<img[^>]*>', '', text) # 匹配图片标签 | ||
| # 去除多余的空白字符(包括换行符、制表符等) | ||
| text = re.sub(r'\s+', ' ', text) | ||
| # 去除表单渲染 | ||
| re.sub(r'<form_rander>[\s\S]*?<\/form_rander>', '', text) | ||
| text = re.sub(r'<form_rander>.*?<\/form_rander>', '', text, flags=re.DOTALL) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 修复4:遗漏了 |
||
| # 去除首尾空格 | ||
| text = text.strip() | ||
| return text | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -102,8 +102,7 @@ def save_problem(knowledge_id, document_id, paragraph_id, problem): | |
| # print(f"paragraph_id: {paragraph_id}") | ||
| # print(f"problem: {problem}") | ||
| problem = re.sub(r"^\d+\.\s*", "", problem) | ||
| pattern = r"<question>(.*?)</question>" | ||
| match = re.search(pattern, problem) | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 修复3 |
||
| match = re.search(r"<question>(.*?)<\/question>", problem, flags=re.DOTALL) | ||
| problem = match.group(1) if match else None | ||
| if problem is None or len(problem) == 0: | ||
| return | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
修复1