diff --git a/src/Database/Adapter/MariaDB.php b/src/Database/Adapter/MariaDB.php index 1bd8797c9..a6214e2da 100644 --- a/src/Database/Adapter/MariaDB.php +++ b/src/Database/Adapter/MariaDB.php @@ -1593,12 +1593,20 @@ protected function getSQLCondition(Query $query, array &$binds): string return empty($conditions) ? '' : ' '. $method .' (' . implode(' AND ', $conditions) . ')'; case Query::TYPE_SEARCH: - $binds[":{$placeholder}_0"] = $this->getFulltextValue($query->getValue()); + $fulltextValue = $this->getFulltextValue($query->getValue()); + if ($fulltextValue === '') { + return '0 = 1'; + } + $binds[":{$placeholder}_0"] = $fulltextValue; return "MATCH({$alias}.{$attribute}) AGAINST (:{$placeholder}_0 IN BOOLEAN MODE)"; case Query::TYPE_NOT_SEARCH: - $binds[":{$placeholder}_0"] = $this->getFulltextValue($query->getValue()); + $fulltextValue = $this->getFulltextValue($query->getValue()); + if ($fulltextValue === '') { + return '1 = 1'; + } + $binds[":{$placeholder}_0"] = $fulltextValue; return "NOT (MATCH({$alias}.{$attribute}) AGAINST (:{$placeholder}_0 IN BOOLEAN MODE))"; diff --git a/src/Database/Adapter/Postgres.php b/src/Database/Adapter/Postgres.php index 2af11aea3..84e09f61b 100644 --- a/src/Database/Adapter/Postgres.php +++ b/src/Database/Adapter/Postgres.php @@ -1796,11 +1796,19 @@ protected function getSQLCondition(Query $query, array &$binds): string return empty($conditions) ? '' : ' ' . $method . ' (' . implode(' AND ', $conditions) . ')'; case Query::TYPE_SEARCH: - $binds[":{$placeholder}_0"] = $this->getFulltextValue($query->getValue()); + $fulltextValue = $this->getFulltextValue($query->getValue()); + if ($fulltextValue === '') { + return '0 = 1'; + } + $binds[":{$placeholder}_0"] = $fulltextValue; return "to_tsvector(regexp_replace({$attribute}, '[^\w]+',' ','g')) @@ websearch_to_tsquery(:{$placeholder}_0)"; case Query::TYPE_NOT_SEARCH: - $binds[":{$placeholder}_0"] = $this->getFulltextValue($query->getValue()); + $fulltextValue = $this->getFulltextValue($query->getValue()); + if ($fulltextValue === '') { + return '1 = 1'; + } + $binds[":{$placeholder}_0"] = $fulltextValue; return "NOT (to_tsvector(regexp_replace({$attribute}, '[^\w]+',' ','g')) @@ websearch_to_tsquery(:{$placeholder}_0))"; case Query::TYPE_VECTOR_DOT: @@ -1912,10 +1920,16 @@ protected function getVectorDistanceOrder(Query $query, array &$binds, string $a protected function getFulltextValue(string $value): string { $exact = str_ends_with($value, '"') && str_starts_with($value, '"'); - $value = str_replace(['@', '+', '-', '*', '.', "'", '"'], ' ', $value); - $value = preg_replace('/\s+/', ' ', $value); // Remove multiple whitespaces + + /** Keep only unicode letters, numbers, underscores, and whitespace. */ + $value = preg_replace('/[^\p{L}\p{N}_\s]/u', ' ', $value) ?? ''; + $value = preg_replace('/\s+/', ' ', $value) ?? ''; $value = trim($value); + if (empty($value)) { + return ''; + } + if (!$exact) { $value = str_replace(' ', ' or ', $value); } diff --git a/src/Database/Adapter/SQL.php b/src/Database/Adapter/SQL.php index fb949dfa4..04db9a002 100644 --- a/src/Database/Adapter/SQL.php +++ b/src/Database/Adapter/SQL.php @@ -1752,10 +1752,9 @@ protected function getFulltextValue(string $value): string { $exact = str_ends_with($value, '"') && str_starts_with($value, '"'); - /** Replace reserved chars with space. */ - $specialChars = '@,+,-,*,),(,<,>,~,"'; - $value = str_replace(explode(',', $specialChars), ' ', $value); - $value = preg_replace('/\s+/', ' ', $value); // Remove multiple whitespaces + /** Keep only unicode letters, numbers, underscores, and whitespace. */ + $value = preg_replace('/[^\p{L}\p{N}_\s]/u', ' ', $value) ?? ''; + $value = preg_replace('/\s+/', ' ', $value) ?? ''; $value = trim($value); if (empty($value)) { diff --git a/tests/e2e/Adapter/Scopes/DocumentTests.php b/tests/e2e/Adapter/Scopes/DocumentTests.php index e79e9ccec..82b9dc0e4 100644 --- a/tests/e2e/Adapter/Scopes/DocumentTests.php +++ b/tests/e2e/Adapter/Scopes/DocumentTests.php @@ -2179,6 +2179,95 @@ public function testFindFulltextSpecialChars(): void $this->assertEquals(1, count($documents)); } + /** + * Regression: accented characters and non-operator special chars + * previously caused SQLSTATE[42000] syntax error in FTS BOOLEAN MODE. + * + * @see https://appwrite.sentry.io/issues/5628237003 + */ + public function testFindFulltextAccentedAndSpecialChars(): void + { + /** @var Database $database */ + $database = $this->getDatabase(); + + if (!$database->getAdapter()->getSupportForFulltextIndex()) { + $this->expectNotToPerformAssertions(); + return; + } + + $collection = 'full_text_unicode'; + $database->createCollection($collection, permissions: [ + Permission::create(Role::any()), + Permission::update(Role::users()) + ]); + + $this->assertTrue($database->createAttribute($collection, 'nombre', Database::VAR_STRING, 128, true)); + $this->assertTrue($database->createIndex($collection, 'nombre-ft', Database::INDEX_FULLTEXT, ['nombre'])); + + $database->createDocument($collection, new Document([ + '$permissions' => [Permission::read(Role::any())], + 'nombre' => 'Luis García' + ])); + + $database->createDocument($collection, new Document([ + '$permissions' => [Permission::read(Role::any())], + 'nombre' => 'Álvaro Yair Cuéllar' + ])); + + $database->createDocument($collection, new Document([ + '$permissions' => [Permission::read(Role::any())], + 'nombre' => 'Fernando naïve über' + ])); + + /** + * Accented characters must not cause FTS parser errors + */ + $documents = $database->find($collection, [ + Query::search('nombre', 'García'), + ]); + $this->assertGreaterThanOrEqual(1, count($documents)); + + $documents = $database->find($collection, [ + Query::search('nombre', 'Álvaro'), + ]); + $this->assertGreaterThanOrEqual(1, count($documents)); + + $documents = $database->find($collection, [ + Query::search('nombre', 'Cuéllar'), + ]); + $this->assertGreaterThanOrEqual(1, count($documents)); + + /** + * Non-operator special chars (! . #) were not stripped by old code, + * producing values like "!!!...###*" that crash MySQL's FTS parser. + */ + $documents = $database->find($collection, [ + Query::search('nombre', '!!!...###'), + ]); + $this->assertEquals(0, count($documents)); + + $documents = $database->find($collection, [ + Query::search('nombre', '$$$%%%^^^'), + ]); + $this->assertEquals(0, count($documents)); + + /** + * FTS operator-only input also must not error + */ + $documents = $database->find($collection, [ + Query::search('nombre', '+-*@<>~'), + ]); + $this->assertEquals(0, count($documents)); + + /** + * Mixed special chars + accented word should still find results + */ + $documents = $database->find($collection, [ + Query::search('nombre', '@García!'), + ]); + $this->assertGreaterThanOrEqual(1, count($documents)); + } + public function testFindMultipleConditions(): void { /** @var Database $database */