| /* BwtSort.c -- BWT block sorting |
| 2008-08-17 |
| Igor Pavlov |
| Public domain */ |
| |
| #include "BwtSort.h" |
| #include "Sort.h" |
| |
| /* #define BLOCK_SORT_USE_HEAP_SORT */ |
| |
| #define NO_INLINE MY_FAST_CALL |
| |
| /* Don't change it !!! */ |
| #define kNumHashBytes 2 |
| #define kNumHashValues (1 << (kNumHashBytes * 8)) |
| |
| /* kNumRefBitsMax must be < (kNumHashBytes * 8) = 16 */ |
| #define kNumRefBitsMax 12 |
| |
| #define BS_TEMP_SIZE kNumHashValues |
| |
| #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| |
| /* 32 Flags in UInt32 word */ |
| #define kNumFlagsBits 5 |
| #define kNumFlagsInWord (1 << kNumFlagsBits) |
| #define kFlagsMask (kNumFlagsInWord - 1) |
| #define kAllFlags 0xFFFFFFFF |
| |
| #else |
| |
| #define kNumBitsMax 20 |
| #define kIndexMask ((1 << kNumBitsMax) - 1) |
| #define kNumExtraBits (32 - kNumBitsMax) |
| #define kNumExtra0Bits (kNumExtraBits - 2) |
| #define kNumExtra0Mask ((1 << kNumExtra0Bits) - 1) |
| |
| #define SetFinishedGroupSize(p, size) \ |
| { *(p) |= ((((size) - 1) & kNumExtra0Mask) << kNumBitsMax); \ |
| if ((size) > (1 << kNumExtra0Bits)) { \ |
| *(p) |= 0x40000000; *((p) + 1) |= ((((size) - 1)>> kNumExtra0Bits) << kNumBitsMax); } } \ |
| |
| static void SetGroupSize(UInt32 *p, UInt32 size) |
| { |
| if (--size == 0) |
| return; |
| *p |= 0x80000000 | ((size & kNumExtra0Mask) << kNumBitsMax); |
| if (size >= (1 << kNumExtra0Bits)) |
| { |
| *p |= 0x40000000; |
| p[1] |= ((size >> kNumExtra0Bits) << kNumBitsMax); |
| } |
| } |
| |
| #endif |
| |
| /* |
| SortGroup - is recursive Range-Sort function with HeapSort optimization for small blocks |
| "range" is not real range. It's only for optimization. |
| returns: 1 - if there are groups, 0 - no more groups |
| */ |
| |
| UInt32 NO_INLINE SortGroup(UInt32 BlockSize, UInt32 NumSortedBytes, UInt32 groupOffset, UInt32 groupSize, int NumRefBits, UInt32 *Indices |
| #ifndef BLOCK_SORT_USE_HEAP_SORT |
| , UInt32 left, UInt32 range |
| #endif |
| ) |
| { |
| UInt32 *ind2 = Indices + groupOffset; |
| UInt32 *Groups; |
| if (groupSize <= 1) |
| { |
| /* |
| #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| SetFinishedGroupSize(ind2, 1); |
| #endif |
| */ |
| return 0; |
| } |
| Groups = Indices + BlockSize + BS_TEMP_SIZE; |
| if (groupSize <= ((UInt32)1 << NumRefBits) |
| #ifndef BLOCK_SORT_USE_HEAP_SORT |
| && groupSize <= range |
| #endif |
| ) |
| { |
| UInt32 *temp = Indices + BlockSize; |
| UInt32 j; |
| UInt32 mask, thereAreGroups, group, cg; |
| { |
| UInt32 gPrev; |
| UInt32 gRes = 0; |
| { |
| UInt32 sp = ind2[0] + NumSortedBytes; |
| if (sp >= BlockSize) sp -= BlockSize; |
| gPrev = Groups[sp]; |
| temp[0] = (gPrev << NumRefBits); |
| } |
| |
| for (j = 1; j < groupSize; j++) |
| { |
| UInt32 sp = ind2[j] + NumSortedBytes; |
| UInt32 g; |
| if (sp >= BlockSize) sp -= BlockSize; |
| g = Groups[sp]; |
| temp[j] = (g << NumRefBits) | j; |
| gRes |= (gPrev ^ g); |
| } |
| if (gRes == 0) |
| { |
| #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| SetGroupSize(ind2, groupSize); |
| #endif |
| return 1; |
| } |
| } |
| |
| HeapSort(temp, groupSize); |
| mask = ((1 << NumRefBits) - 1); |
| thereAreGroups = 0; |
| |
| group = groupOffset; |
| cg = (temp[0] >> NumRefBits); |
| temp[0] = ind2[temp[0] & mask]; |
| |
| { |
| #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| UInt32 *Flags = Groups + BlockSize; |
| #else |
| UInt32 prevGroupStart = 0; |
| #endif |
| |
| for (j = 1; j < groupSize; j++) |
| { |
| UInt32 val = temp[j]; |
| UInt32 cgCur = (val >> NumRefBits); |
| |
| if (cgCur != cg) |
| { |
| cg = cgCur; |
| group = groupOffset + j; |
| |
| #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| { |
| UInt32 t = group - 1; |
| Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); |
| } |
| #else |
| SetGroupSize(temp + prevGroupStart, j - prevGroupStart); |
| prevGroupStart = j; |
| #endif |
| } |
| else |
| thereAreGroups = 1; |
| { |
| UInt32 ind = ind2[val & mask]; |
| temp[j] = ind; |
| Groups[ind] = group; |
| } |
| } |
| |
| #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| SetGroupSize(temp + prevGroupStart, j - prevGroupStart); |
| #endif |
| } |
| |
| for (j = 0; j < groupSize; j++) |
| ind2[j] = temp[j]; |
| return thereAreGroups; |
| } |
| |
| /* Check that all strings are in one group (cannot sort) */ |
| { |
| UInt32 group, j; |
| UInt32 sp = ind2[0] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; |
| group = Groups[sp]; |
| for (j = 1; j < groupSize; j++) |
| { |
| sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; |
| if (Groups[sp] != group) |
| break; |
| } |
| if (j == groupSize) |
| { |
| #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| SetGroupSize(ind2, groupSize); |
| #endif |
| return 1; |
| } |
| } |
| |
| #ifndef BLOCK_SORT_USE_HEAP_SORT |
| { |
| /* ---------- Range Sort ---------- */ |
| UInt32 i; |
| UInt32 mid; |
| for (;;) |
| { |
| UInt32 j; |
| if (range <= 1) |
| { |
| #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| SetGroupSize(ind2, groupSize); |
| #endif |
| return 1; |
| } |
| mid = left + ((range + 1) >> 1); |
| j = groupSize; |
| i = 0; |
| do |
| { |
| UInt32 sp = ind2[i] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; |
| if (Groups[sp] >= mid) |
| { |
| for (j--; j > i; j--) |
| { |
| sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; |
| if (Groups[sp] < mid) |
| { |
| UInt32 temp = ind2[i]; ind2[i] = ind2[j]; ind2[j] = temp; |
| break; |
| } |
| } |
| if (i >= j) |
| break; |
| } |
| } |
| while (++i < j); |
| if (i == 0) |
| { |
| range = range - (mid - left); |
| left = mid; |
| } |
| else if (i == groupSize) |
| range = (mid - left); |
| else |
| break; |
| } |
| |
| #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| { |
| UInt32 t = (groupOffset + i - 1); |
| UInt32 *Flags = Groups + BlockSize; |
| Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); |
| } |
| #endif |
| |
| { |
| UInt32 j; |
| for (j = i; j < groupSize; j++) |
| Groups[ind2[j]] = groupOffset + i; |
| } |
| |
| { |
| UInt32 res = SortGroup(BlockSize, NumSortedBytes, groupOffset, i, NumRefBits, Indices, left, mid - left); |
| return res | SortGroup(BlockSize, NumSortedBytes, groupOffset + i, groupSize - i, NumRefBits, Indices, mid, range - (mid - left)); |
| } |
| |
| } |
| |
| #else |
| |
| /* ---------- Heap Sort ---------- */ |
| |
| { |
| UInt32 j; |
| for (j = 0; j < groupSize; j++) |
| { |
| UInt32 sp = ind2[j] + NumSortedBytes; if (sp >= BlockSize) sp -= BlockSize; |
| ind2[j] = sp; |
| } |
| |
| HeapSortRef(ind2, Groups, groupSize); |
| |
| /* Write Flags */ |
| { |
| UInt32 sp = ind2[0]; |
| UInt32 group = Groups[sp]; |
| |
| #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| UInt32 *Flags = Groups + BlockSize; |
| #else |
| UInt32 prevGroupStart = 0; |
| #endif |
| |
| for (j = 1; j < groupSize; j++) |
| { |
| sp = ind2[j]; |
| if (Groups[sp] != group) |
| { |
| group = Groups[sp]; |
| #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| { |
| UInt32 t = groupOffset + j - 1; |
| Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); |
| } |
| #else |
| SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); |
| prevGroupStart = j; |
| #endif |
| } |
| } |
| |
| #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| SetGroupSize(ind2 + prevGroupStart, j - prevGroupStart); |
| #endif |
| } |
| { |
| /* Write new Groups values and Check that there are groups */ |
| UInt32 thereAreGroups = 0; |
| for (j = 0; j < groupSize; j++) |
| { |
| UInt32 group = groupOffset + j; |
| #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| UInt32 subGroupSize = ((ind2[j] & ~0xC0000000) >> kNumBitsMax); |
| if ((ind2[j] & 0x40000000) != 0) |
| subGroupSize += ((ind2[j + 1] >> kNumBitsMax) << kNumExtra0Bits); |
| subGroupSize++; |
| for (;;) |
| { |
| UInt32 original = ind2[j]; |
| UInt32 sp = original & kIndexMask; |
| if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; |
| ind2[j] = sp | (original & ~kIndexMask); |
| Groups[sp] = group; |
| if (--subGroupSize == 0) |
| break; |
| j++; |
| thereAreGroups = 1; |
| } |
| #else |
| UInt32 *Flags = Groups + BlockSize; |
| for (;;) |
| { |
| UInt32 sp = ind2[j]; if (sp < NumSortedBytes) sp += BlockSize; sp -= NumSortedBytes; |
| ind2[j] = sp; |
| Groups[sp] = group; |
| if ((Flags[(groupOffset + j) >> kNumFlagsBits] & (1 << ((groupOffset + j) & kFlagsMask))) == 0) |
| break; |
| j++; |
| thereAreGroups = 1; |
| } |
| #endif |
| } |
| return thereAreGroups; |
| } |
| } |
| #endif |
| } |
| |
| /* conditions: blockSize > 0 */ |
| UInt32 BlockSort(UInt32 *Indices, const Byte *data, UInt32 blockSize) |
| { |
| UInt32 *counters = Indices + blockSize; |
| UInt32 i; |
| UInt32 *Groups; |
| #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| UInt32 *Flags; |
| #endif |
| |
| /* Radix-Sort for 2 bytes */ |
| for (i = 0; i < kNumHashValues; i++) |
| counters[i] = 0; |
| for (i = 0; i < blockSize - 1; i++) |
| counters[((UInt32)data[i] << 8) | data[i + 1]]++; |
| counters[((UInt32)data[i] << 8) | data[0]]++; |
| |
| Groups = counters + BS_TEMP_SIZE; |
| #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| Flags = Groups + blockSize; |
| { |
| UInt32 numWords = (blockSize + kFlagsMask) >> kNumFlagsBits; |
| for (i = 0; i < numWords; i++) |
| Flags[i] = kAllFlags; |
| } |
| #endif |
| |
| { |
| UInt32 sum = 0; |
| for (i = 0; i < kNumHashValues; i++) |
| { |
| UInt32 groupSize = counters[i]; |
| if (groupSize > 0) |
| { |
| #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| UInt32 t = sum + groupSize - 1; |
| Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); |
| #endif |
| sum += groupSize; |
| } |
| counters[i] = sum - groupSize; |
| } |
| |
| for (i = 0; i < blockSize - 1; i++) |
| Groups[i] = counters[((UInt32)data[i] << 8) | data[i + 1]]; |
| Groups[i] = counters[((UInt32)data[i] << 8) | data[0]]; |
| |
| for (i = 0; i < blockSize - 1; i++) |
| Indices[counters[((UInt32)data[i] << 8) | data[i + 1]]++] = i; |
| Indices[counters[((UInt32)data[i] << 8) | data[0]]++] = i; |
| |
| #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| { |
| UInt32 prev = 0; |
| for (i = 0; i < kNumHashValues; i++) |
| { |
| UInt32 prevGroupSize = counters[i] - prev; |
| if (prevGroupSize == 0) |
| continue; |
| SetGroupSize(Indices + prev, prevGroupSize); |
| prev = counters[i]; |
| } |
| } |
| #endif |
| } |
| |
| { |
| int NumRefBits; |
| UInt32 NumSortedBytes; |
| for (NumRefBits = 0; ((blockSize - 1) >> NumRefBits) != 0; NumRefBits++); |
| NumRefBits = 32 - NumRefBits; |
| if (NumRefBits > kNumRefBitsMax) |
| NumRefBits = kNumRefBitsMax; |
| |
| for (NumSortedBytes = kNumHashBytes; ; NumSortedBytes <<= 1) |
| { |
| #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| UInt32 finishedGroupSize = 0; |
| #endif |
| UInt32 newLimit = 0; |
| for (i = 0; i < blockSize;) |
| { |
| UInt32 groupSize; |
| #ifdef BLOCK_SORT_EXTERNAL_FLAGS |
| |
| if ((Flags[i >> kNumFlagsBits] & (1 << (i & kFlagsMask))) == 0) |
| { |
| i++; |
| continue; |
| } |
| for (groupSize = 1; |
| (Flags[(i + groupSize) >> kNumFlagsBits] & (1 << ((i + groupSize) & kFlagsMask))) != 0; |
| groupSize++); |
| |
| groupSize++; |
| |
| #else |
| |
| groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); |
| { |
| Bool finishedGroup = ((Indices[i] & 0x80000000) == 0); |
| if ((Indices[i] & 0x40000000) != 0) |
| { |
| groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits); |
| Indices[i + 1] &= kIndexMask; |
| } |
| Indices[i] &= kIndexMask; |
| groupSize++; |
| if (finishedGroup || groupSize == 1) |
| { |
| Indices[i - finishedGroupSize] &= kIndexMask; |
| if (finishedGroupSize > 1) |
| Indices[i - finishedGroupSize + 1] &= kIndexMask; |
| { |
| UInt32 newGroupSize = groupSize + finishedGroupSize; |
| SetFinishedGroupSize(Indices + i - finishedGroupSize, newGroupSize); |
| finishedGroupSize = newGroupSize; |
| } |
| i += groupSize; |
| continue; |
| } |
| finishedGroupSize = 0; |
| } |
| |
| #endif |
| |
| if (NumSortedBytes >= blockSize) |
| { |
| UInt32 j; |
| for (j = 0; j < groupSize; j++) |
| { |
| UInt32 t = (i + j); |
| /* Flags[t >> kNumFlagsBits] &= ~(1 << (t & kFlagsMask)); */ |
| Groups[Indices[t]] = t; |
| } |
| } |
| else |
| if (SortGroup(blockSize, NumSortedBytes, i, groupSize, NumRefBits, Indices |
| #ifndef BLOCK_SORT_USE_HEAP_SORT |
| , 0, blockSize |
| #endif |
| ) != 0) |
| newLimit = i + groupSize; |
| i += groupSize; |
| } |
| if (newLimit == 0) |
| break; |
| } |
| } |
| #ifndef BLOCK_SORT_EXTERNAL_FLAGS |
| for (i = 0; i < blockSize;) |
| { |
| UInt32 groupSize = ((Indices[i] & ~0xC0000000) >> kNumBitsMax); |
| if ((Indices[i] & 0x40000000) != 0) |
| { |
| groupSize += ((Indices[i + 1] >> kNumBitsMax) << kNumExtra0Bits); |
| Indices[i + 1] &= kIndexMask; |
| } |
| Indices[i] &= kIndexMask; |
| groupSize++; |
| i += groupSize; |
| } |
| #endif |
| return Groups[0]; |
| } |
| |