From c167c6568905f39d3ffbd6e66f963e88af756a33 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Berson?= Date: Tue, 12 Nov 2013 12:45:09 +0100 Subject: [PATCH] Add some symbols to punctuation in strip_punctuation --- src/preprocessing.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/preprocessing.jl b/src/preprocessing.jl index aef656f9..437c162c 100644 --- a/src/preprocessing.jl +++ b/src/preprocessing.jl @@ -362,7 +362,7 @@ function _build_regex_patterns{T <: String}(lang, flags::Uint32, patterns::Set{T if (flags & strip_non_letters) > 0 push!(patterns, "[^a-zA-Z\\s]") else - ((flags & strip_punctuation) > 0) && push!(patterns, "[,;:.!?()]+") + ((flags & strip_punctuation) > 0) && push!(patterns, "[\"',;:.!?()]+") ((flags & strip_numbers) > 0) && push!(patterns, "\\d+") end if (flags & strip_articles) > 0