forked from ClickHouse/ClickHouse
-
Notifications
You must be signed in to change notification settings - Fork 18
Expand file tree
/
Copy pathStorageObjectStorageStableTaskDistributor.cpp
More file actions
205 lines (168 loc) · 5.62 KB
/
Copy pathStorageObjectStorageStableTaskDistributor.cpp
File metadata and controls
205 lines (168 loc) · 5.62 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
#include <Storages/ObjectStorage/StorageObjectStorageStableTaskDistributor.h>
#include <Common/SipHash.h>
#include <consistent_hashing.h>
#include <optional>
namespace DB
{
namespace ErrorCodes
{
extern const int LOGICAL_ERROR;
}
StorageObjectStorageStableTaskDistributor::StorageObjectStorageStableTaskDistributor(
std::shared_ptr<IObjectIterator> iterator_,
std::vector<std::string> && ids_of_nodes_,
bool send_over_whole_archive_)
: iterator(std::move(iterator_))
, send_over_whole_archive(send_over_whole_archive_)
, connection_to_files(ids_of_nodes_.size())
, ids_of_nodes(std::move(ids_of_nodes_))
, iterator_exhausted(false)
{
}
ObjectInfoPtr StorageObjectStorageStableTaskDistributor::getNextTask(size_t number_of_current_replica)
{
LOG_TRACE(log, "Received request from replica {} looking for a file", number_of_current_replica);
// 1. Check pre-queued files first
if (auto file = getPreQueuedFile(number_of_current_replica))
return file;
// 2. Try to find a matching file from the iterator
if (auto file = getMatchingFileFromIterator(number_of_current_replica))
return file;
// 3. Process unprocessed files if iterator is exhausted
return getAnyUnprocessedFile(number_of_current_replica);
}
size_t StorageObjectStorageStableTaskDistributor::getReplicaForFile(const String & file_path)
{
size_t nodes_count = ids_of_nodes.size();
/// Trivial case
if (nodes_count < 2)
return 0;
/// Rendezvous hashing
size_t best_id = 0;
UInt64 best_weight = sipHash64(ids_of_nodes[0] + file_path);
for (size_t id = 1; id < nodes_count; ++id)
{
UInt64 weight = sipHash64(ids_of_nodes[id] + file_path);
if (weight > best_weight)
{
best_weight = weight;
best_id = id;
}
}
return best_id;
}
ObjectInfoPtr StorageObjectStorageStableTaskDistributor::getPreQueuedFile(size_t number_of_current_replica)
{
std::lock_guard lock(mutex);
if (connection_to_files.size() <= number_of_current_replica)
throw Exception(
ErrorCodes::LOGICAL_ERROR,
"Replica number {} is out of range. Expected range: [0, {})",
number_of_current_replica,
connection_to_files.size()
);
auto & files = connection_to_files[number_of_current_replica];
while (!files.empty())
{
auto next_file = files.back();
files.pop_back();
auto file_identifier = getFileIdentifier(next_file);
auto it = unprocessed_files.find(file_identifier);
if (it == unprocessed_files.end())
continue;
unprocessed_files.erase(it);
LOG_TRACE(
log,
"Assigning pre-queued file {} to replica {}",
file_identifier,
number_of_current_replica
);
return next_file;
}
return {};
}
ObjectInfoPtr StorageObjectStorageStableTaskDistributor::getMatchingFileFromIterator(size_t number_of_current_replica)
{
{
std::lock_guard lock(mutex);
if (iterator_exhausted)
return {};
}
while (true)
{
ObjectInfoPtr object_info;
{
std::lock_guard lock(mutex);
object_info = iterator->next(0);
if (!object_info)
{
LOG_TEST(log, "Iterator is exhausted");
iterator_exhausted = true;
break;
}
}
String file_identifier = getFileIdentifier(object_info, true);
size_t file_replica_idx;
{
std::lock_guard lock(mutex);
file_replica_idx = getReplicaForFile(file_identifier);
}
if (file_replica_idx == number_of_current_replica)
{
LOG_TRACE(
log, "Found file {} for replica {}",
file_identifier, number_of_current_replica
);
return object_info;
}
LOG_TEST(
log,
"Found file {} for replica {} (number of current replica: {})",
file_identifier,
file_replica_idx,
number_of_current_replica
);
// Queue file for its assigned replica
{
std::lock_guard lock(mutex);
unprocessed_files.emplace(file_identifier, object_info);
connection_to_files[file_replica_idx].push_back(object_info);
}
}
return {};
}
ObjectInfoPtr StorageObjectStorageStableTaskDistributor::getAnyUnprocessedFile(size_t number_of_current_replica)
{
std::lock_guard lock(mutex);
if (!unprocessed_files.empty())
{
auto it = unprocessed_files.begin();
auto next_file = it->second;
unprocessed_files.erase(it);
auto file_path = getFileIdentifier(next_file);
LOG_TRACE(
log,
"Iterator exhausted. Assigning unprocessed file {} to replica {}",
file_path,
number_of_current_replica
);
return next_file;
}
return {};
}
String StorageObjectStorageStableTaskDistributor::getFileIdentifier(ObjectInfoPtr file_object, bool write_to_log) const
{
if (send_over_whole_archive && file_object->isArchive())
{
auto file_identifier = file_object->getPathOrPathToArchiveIfArchive();
if (write_to_log)
{
LOG_TEST(log, "Will send over the whole archive {} to replicas. "
"This will be suboptimal, consider turning on "
"cluster_function_process_archive_on_multiple_nodes setting", file_identifier);
}
return file_identifier;
}
return file_object->getIdentifier();
}
}