16#ifndef TLX_SORT_STRINGS_SAMPLE_SORT_TOOLS_HEADER
17#define TLX_SORT_STRINGS_SAMPLE_SORT_TOOLS_HEADER
39template <
typename Type>
41std::string
to_binary(Type v,
const size_t width = (8 *
sizeof(Type))) {
42 std::string str(width,
' ');
43 for (
size_t i = 0; i < width; i++) {
44 str[width - i - 1] = (v & 1) ?
'1' :
'0';
51template <
size_t TreeBits>
53 static const bool debug =
false;
67 unsigned int bkt = ((
id << (hi + 1)) & bitmask) | (1 << hi);
82 unsigned int bkt = ((
id >> lo) & bitmask) | (1 << (
treebits - lo));
122template <
typename key_type,
size_t num_splitters>
131 key_type tree[num_splitters + 1],
132 unsigned char splitter_lcp[num_splitters + 1],
133 const key_type* samples,
size_t samplesize)
136 key_type sentinel = 0;
137 recurse(samples, samples + samplesize, 1, sentinel);
139 assert(
splitter_ == splitter + num_splitters);
140 assert(
lcp_iter_ == splitter_lcp + num_splitters);
142 splitter_lcp[0] &= 0x80;
144 splitter_lcp[num_splitters] = 0;
147 ptrdiff_t
snum(
const key_type* s)
const {
148 return static_cast<ptrdiff_t
>(s -
samples_);
151 key_type
recurse(
const key_type* lo,
const key_type* hi,
152 unsigned int treeidx, key_type& rec_prevkey) {
154 <<
"rec_buildtree(" <<
snum(lo) <<
"," <<
snum(hi)
155 <<
", treeidx=" << treeidx <<
")";
158 const key_type* mid = lo +
static_cast<ptrdiff_t
>(hi - lo) / 2;
161 <<
"tree[" << treeidx <<
"] = samples[" <<
snum(mid) <<
"] = "
164 key_type mykey =
tree_[treeidx] = *mid;
166 const key_type* midlo = mid;
167 while (lo < midlo && *(midlo - 1) == mykey) midlo--;
169 const key_type* midhi = mid;
170 while (midhi + 1 < hi && *midhi == mykey) midhi++;
172 if (midhi - midlo > 1)
175 const key_type* midlo = mid, * midhi = mid + 1;
177 if (2 * treeidx < num_splitters)
179 key_type prevkey =
recurse(lo, midlo, 2 * treeidx + 0, rec_prevkey);
181 key_type xorSplit = prevkey ^ mykey;
187 <<
" - " <<
clz(xorSplit)
188 <<
" bits = " <<
clz(xorSplit) / 8
194 (
clz(xorSplit) / 8) |
196 ((mykey & 0xFF) ? 0 : 0x80);
198 return recurse(midhi, hi, 2 * treeidx + 1, mykey);
202 key_type xorSplit = rec_prevkey ^ mykey;
208 <<
" - " <<
clz(xorSplit)
209 <<
" bits = " <<
clz(xorSplit) / 8
215 (
clz(xorSplit) / 8) |
217 ((mykey & 0xFF) ? 0 : 0x80);
232template <
typename key_type,
size_t num_splitters>
240 unsigned char splitter_lcp[num_splitters + 1],
241 const key_type* samples,
size_t samplesize)
245 key_type sentinel = 0;
246 recurse(samples, samples + samplesize, 1, sentinel);
248 assert(
lcp_iter_ == splitter_lcp + num_splitters);
250 splitter_lcp[0] &= 0x80;
252 splitter_lcp[num_splitters] = 0;
255 ptrdiff_t
snum(
const key_type* s)
const {
256 return static_cast<ptrdiff_t
>(s -
samples_);
259 key_type
recurse(
const key_type* lo,
const key_type* hi,
260 unsigned int treeidx, key_type& rec_prevkey) {
262 <<
"rec_buildtree(" <<
snum(lo) <<
"," <<
snum(hi)
263 <<
", treeidx=" << treeidx <<
")";
266 const key_type* mid = lo +
static_cast<ptrdiff_t
>(hi - lo) / 2;
269 <<
"tree[" << treeidx <<
"] = samples[" <<
snum(mid) <<
"] = "
272 key_type mykey =
tree_[treeidx] = *mid;
274 const key_type* midlo = mid;
275 while (lo < midlo && *(midlo - 1) == mykey) midlo--;
277 const key_type* midhi = mid;
278 while (midhi + 1 < hi && *midhi == mykey) midhi++;
280 if (midhi - midlo > 1)
283 const key_type* midlo = mid, * midhi = mid + 1;
285 if (2 * treeidx < num_splitters)
287 key_type prevkey =
recurse(lo, midlo, 2 * treeidx + 0, rec_prevkey);
289 key_type xorSplit = prevkey ^ mykey;
295 <<
" - " <<
clz(xorSplit)
296 <<
" bits = " <<
clz(xorSplit) / 8
300 (
clz(xorSplit) / 8) |
302 ((mykey & 0xFF) ? 0 : 0x80);
304 return recurse(midhi, hi, 2 * treeidx + 1, mykey);
308 key_type xorSplit = rec_prevkey ^ mykey;
314 <<
" - " <<
clz(xorSplit)
315 <<
" bits = " <<
clz(xorSplit) / 8
319 (
clz(xorSplit) / 8) |
321 ((mykey & 0xFF) ? 0 : 0x80);
336template <
typename key_type,
size_t TreeBits,
size_t Rollout = 4>
344 void build(key_type* samples,
size_t samplesize,
345 unsigned char* splitter_lcp) {
348 samples, samplesize);
369#define TLX_CLASSIFY_TREE_STEP \
370 for (size_t u = 0; u < Rollout; ++u) { \
371 i[u] = 2 * i[u] + (key[u] <= splitter_tree_[i[u]] ? 0 : 1); \
373 TLX_ATTRIBUTE_FALLTHROUGH;
378 const key_type key[Rollout], std::uint16_t obkt[Rollout])
const {
381 unsigned int i[Rollout];
382 std::fill(i, i + Rollout, 1u);
422 for (
size_t u = 0; u < Rollout; ++u)
425 for (
size_t u = 0; u < Rollout; ++u) {
430 for (
size_t u = 0; u < Rollout; ++u) {
437#undef TLX_CLASSIFY_TREE_STEP
440 template <
typename StringSet>
443 const StringSet& strset,
444 typename StringSet::Iterator begin,
typename StringSet::Iterator end,
445 std::uint16_t* bktout,
size_t depth)
const {
448 if (begin + Rollout <= end)
450 key_type key[Rollout];
451 for (
size_t u = 0; u < Rollout; ++u)
456 begin += Rollout, bktout += Rollout;
478template <
typename key_type,
size_t TreeBits>
486 void build(key_type* samples,
size_t samplesize,
unsigned char* splitter_lcp) {
491#define TLX_CLASSIFY_TREE_STEP \
492 if (TLX_UNLIKELY(key == splitter_tree_[i])) { \
494 2 * PerfectTreeCalculations<treebits>::level_to_preorder(i) - 1; \
496 i = 2 * i + (key < splitter_tree_[i] ? 0 : 1); \
497 TLX_ATTRIBUTE_FALLTHROUGH;
547#undef TLX_CLASSIFY_TREE_STEP
550 template <
typename StringSet>
552 const StringSet& strset,
553 typename StringSet::Iterator begin,
typename StringSet::Iterator end,
554 std::uint16_t* bktout,
size_t depth)
const {
576template <
typename key_type,
size_t TreeBits,
unsigned Rollout = 4>
584 void build(key_type* samples,
size_t samplesize,
585 unsigned char* splitter_lcp) {
614#define TLX_CLASSIFY_TREE_STEP \
615 for (size_t u = 0; u < Rollout; ++u) { \
616 i[u] = 2 * i[u] + (key[u] <= splitter_tree_[i[u]] ? 0 : 1); \
618 TLX_ATTRIBUTE_FALLTHROUGH;
623 const key_type key[Rollout], std::uint16_t obkt[Rollout])
const {
626 unsigned int i[Rollout];
627 std::fill(i + 0, i + Rollout, 1);
668 for (
unsigned u = 0; u < Rollout; ++u)
671 for (
unsigned u = 0; u < Rollout; ++u) {
676 for (
unsigned u = 0; u < Rollout; ++u) {
683#undef TLX_CLASSIFY_TREE_STEP
686 template <
typename StringSet>
689 const StringSet& strset,
690 typename StringSet::Iterator begin,
typename StringSet::Iterator end,
691 std::uint16_t* bktout,
size_t depth)
const {
694 if (begin + Rollout <= end)
696 key_type key[Rollout];
697 for (
size_t u = 0; u < Rollout; ++u)
702 begin += Rollout, bktout += Rollout;
Sample Sort Classification Tree Unrolled with Equal Comparisons.
key_type splitter_tree_[num_splitters+1]
void build(key_type *samples, size_t samplesize, unsigned char *splitter_lcp)
build tree and splitter array from sample
static const size_t treebits
key_type get_splitter(unsigned int i) const
return a splitter
void classify(const StringSet &strset, typename StringSet::Iterator begin, typename StringSet::Iterator end, std::uint16_t *bktout, size_t depth) const
classify all strings in area by walking tree and saving bucket id
unsigned int find_bkt(const key_type &key) const
binary search on splitter array for bucket number
static const size_t num_splitters
Sample Sort Classification Tree Unrolled, Interleaved, and with Perfect Tree Index Calculations.
key_type splitter_tree_[num_splitters+1]
void find_bkt_unroll(const key_type key[Rollout], std::uint16_t obkt[Rollout]) const
search in splitter tree for bucket number, unrolled for Rollout keys at once.
void build(key_type *samples, size_t samplesize, unsigned char *splitter_lcp)
build tree and splitter array from sample
static const size_t treebits
key_type get_splitter(unsigned int i) const
return a splitter
void classify(const StringSet &strset, typename StringSet::Iterator begin, typename StringSet::Iterator end, std::uint16_t *bktout, size_t depth) const
classify all strings in area by walking tree and saving bucket id
unsigned int find_bkt(const key_type &key) const
binary search on splitter array for bucket number
static const size_t num_splitters
Sample Sort Classification Tree Unrolled and Interleaved.
key_type splitter_tree_[num_splitters+1]
void find_bkt_unroll(const key_type key[Rollout], std::uint16_t obkt[Rollout]) const
search in splitter tree for bucket number, unrolled for Rollout keys at once.
void build(key_type *samples, size_t samplesize, unsigned char *splitter_lcp)
build tree and splitter array from sample
static const size_t treebits
key_type splitter_[num_splitters]
key_type get_splitter(unsigned int i) const
return a splitter
void classify(const StringSet &strset, typename StringSet::Iterator begin, typename StringSet::Iterator end, std::uint16_t *bktout, size_t depth) const
classify all strings in area by walking tree and saving bucket id
unsigned int find_bkt(const key_type &key) const
binary search on splitter array for bucket number
static const size_t num_splitters
Recursive TreeBuilder for full-descent and unrolled variants, constructs only a level-order binary tr...
ptrdiff_t snum(const key_type *s) const
SSTreeBuilderLevelOrder(key_type tree[num_splitters], unsigned char splitter_lcp[num_splitters+1], const key_type *samples, size_t samplesize)
build tree, sizes: splitter_tree[num_splitters + 1] and
key_type recurse(const key_type *lo, const key_type *hi, unsigned int treeidx, key_type &rec_prevkey)
unsigned char * lcp_iter_
const key_type * samples_
static const bool debug_splitter
Recursive TreeBuilder for full-descent and unrolled variants, constructs a both a pre-order and level...
ptrdiff_t snum(const key_type *s) const
key_type recurse(const key_type *lo, const key_type *hi, unsigned int treeidx, key_type &rec_prevkey)
unsigned char * lcp_iter_
SSTreeBuilderPreAndLevelOrder(key_type splitter[num_splitters], key_type tree[num_splitters+1], unsigned char splitter_lcp[num_splitters+1], const key_type *samples, size_t samplesize)
const key_type * samples_
static const bool debug_splitter
#define tlx_die_unequal(X, Y)
Check that X == Y or die miserably, but output the values of X and Y for better debugging.
std::string hexdump_type(const Type &t)
Dump a (binary) item as a sequence of uppercase hexadecimal pairs.
#define TLX_LOGC(cond)
Explicitly specify the condition for logging.
#define TLX_LOG
Default logging method: output if the local debug variable is true.
#define TLX_LOG0
Override default output: never or always output log.
static void perfect_tree_calculations_self_verify()
enable_if< sizeof(Type)==4, std::uint32_t >::type get_key(const StringSet &strset, const typename StringSet::String &s, size_t depth)
static std::string to_binary(Type v, const size_t width=(8 *sizeof(Type)))
represent binary digits of large integer datatypes
Class to transform in-order to level-order indexes in a perfect binary tree.
static const size_t num_nodes
static unsigned int level_to_preorder(unsigned int id)
static const size_t treebits
static unsigned int pre_to_levelorder(unsigned int id)
static void self_verify()