1+ /*
2+ * Copyright 2024-present MongoDB, Inc.
3+ *
4+ * Licensed under the Apache License, Version 2.0 (the "License");
5+ * you may not use this file except in compliance with the License.
6+ * You may obtain a copy of the License at
7+ *
8+ * http://www.apache.org/licenses/LICENSE-2.0
9+ *
10+ * Unless required by applicable law or agreed to in writing, software
11+ * distributed under the License is distributed on an "AS IS" BASIS,
12+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+ * See the License for the specific language governing permissions and
14+ * limitations under the License.
15+ */
16+
17+ #ifndef MONGOCRYPT_STR_ENCODE_STRING_SETS_PRIVATE_H
18+ #define MONGOCRYPT_STR_ENCODE_STRING_SETS_PRIVATE_H
19+
20+ #include "mongocrypt-buffer-private.h"
21+ #include "mongocrypt.h"
22+
23+ // Represents a valid unicode string with the bad character 0xFF appended to the end. This is our base string which
24+ // we build substring trees on. Stores all the valid code points in the string, plus one code point for 0xFF.
25+ // Exposed for testing.
26+ typedef struct {
27+ _mongocrypt_buffer_t buf ;
28+ uint32_t * codepoint_offsets ;
29+ uint32_t codepoint_len ;
30+ } mc_utf8_string_with_bad_char_t ;
31+
32+ // Initialize by copying buffer into data and adding the bad character.
33+ mc_utf8_string_with_bad_char_t * mc_utf8_string_with_bad_char_from_buffer (const char * buf , uint32_t len );
34+
35+ void mc_utf8_string_with_bad_char_destroy (mc_utf8_string_with_bad_char_t * utf8 );
36+
37+ // Set of affixes of a shared base string. Does not do any duplicate prevention.
38+ typedef struct _mc_affix_set_t mc_affix_set_t ;
39+
40+ // Initialize affix set from base string and number of entries (this must be known as a prior).
41+ mc_affix_set_t * mc_affix_set_new (const mc_utf8_string_with_bad_char_t * base_string , uint32_t n_indices );
42+
43+ void mc_affix_set_destroy (mc_affix_set_t * set );
44+
45+ // Insert affix into set. base_start/end_idx are codepoint indices. base_end_idx is exclusive. Returns true if
46+ // inserted, false otherwise.
47+ bool mc_affix_set_insert (mc_affix_set_t * set , uint32_t base_start_idx , uint32_t base_end_idx );
48+
49+ // Insert the base string count times into the set. Treated as a special case, since this is the only affix that
50+ // will appear multiple times. Returns true if inserted, false otherwise.
51+ bool mc_affix_set_insert_base_string (mc_affix_set_t * set , uint32_t count );
52+
53+ // Iterator on affix set.
54+ typedef struct {
55+ mc_affix_set_t * set ;
56+ uint32_t cur_idx ;
57+ } mc_affix_set_iter_t ;
58+
59+ // Point the iterator to the first affix of the given set.
60+ void mc_affix_set_iter_init (mc_affix_set_iter_t * it , mc_affix_set_t * set );
61+
62+ // Get the next affix, its length in bytes, and its count. Returns false if the set does not have a next element, true
63+ // otherwise.
64+ bool mc_affix_set_iter_next (mc_affix_set_iter_t * it , const char * * str , uint32_t * byte_len , uint32_t * count );
65+
66+ // Set of substrings of a shared base string. Prevents duplicates.
67+ typedef struct _mc_substring_set_t mc_substring_set_t ;
68+
69+ mc_substring_set_t * mc_substring_set_new (const mc_utf8_string_with_bad_char_t * base_string );
70+
71+ void mc_substring_set_destroy (mc_substring_set_t * set );
72+
73+ // Insert the base string count times into the set. Treated as a special case, since this is the only substring that
74+ // will appear multiple times. Always inserts successfully.
75+ void mc_substring_set_increment_fake_string (mc_substring_set_t * set , uint32_t count );
76+
77+ // Insert substring into set. base_start/end_idx are codepoint indices. base_end_idx is exclusive. Returns true if
78+ // inserted, false otherwise.
79+ bool mc_substring_set_insert (mc_substring_set_t * set , uint32_t base_start_idx , uint32_t base_end_idx );
80+
81+ // Iterator on substring set.
82+ typedef struct {
83+ mc_substring_set_t * set ;
84+ void * cur_node ;
85+ uint32_t cur_idx ;
86+ } mc_substring_set_iter_t ;
87+
88+ // Point the iterator to the first substring of the given set.
89+ void mc_substring_set_iter_init (mc_substring_set_iter_t * it , mc_substring_set_t * set );
90+
91+ // Get the next substring, its length in bytes, and its count. Returns false if the set does not have a next element,
92+ // true otherwise.
93+ bool mc_substring_set_iter_next (mc_substring_set_iter_t * it , const char * * str , uint32_t * byte_len , uint32_t * count );
94+
95+ #endif
0 commit comments