--- /dev/null
+
+/*-------------------------------------------------------------*/
+/*--- Block sorting machinery ---*/
+/*--- blocksort.c ---*/
+/*-------------------------------------------------------------*/
+
+/* ------------------------------------------------------------------
+ This file is part of bzip2/libbzip2, a program and library for
+ lossless, block-sorting data compression.
+
+ bzip2/libbzip2 version 1.0.6 of 6 September 2010
+ Copyright (C) 1996-2010 Julian Seward <jseward@bzip.org>
+
+ Please read the WARNING, DISCLAIMER and PATENTS sections in the
+ README file.
+
+ This program is released under the terms of the license contained
+ in the file LICENSE.
+ ------------------------------------------------------------------ */
+
+typedef char Char;
+typedef unsigned char Bool;
+typedef unsigned char UChar;
+typedef int Int32;
+typedef unsigned int UInt32;
+typedef short Int16;
+typedef unsigned short UInt16;
+
+#define True ((Bool)1)
+#define False ((Bool)0)
+
+#define BZ_M_IDLE 1
+#define BZ_M_RUNNING 2
+#define BZ_M_FLUSHING 3
+#define BZ_M_FINISHING 4
+
+#define BZ_S_OUTPUT 1
+#define BZ_S_INPUT 2
+
+#define BZ_N_RADIX 2
+#define BZ_N_QSORT 12
+#define BZ_N_SHELL 18
+#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2)
+
+/*---------------------------------------------*/
+/*--- Fallback O(N log(N)^2) sorting ---*/
+/*--- algorithm, for repetitive blocks ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+void fallbackSimpleSort ( UInt32* fmap,
+ UInt32* eclass,
+ Int32 lo,
+ Int32 hi )
+{
+ Int32 i, j, tmp;
+ UInt32 ec_tmp;
+
+ if (lo == hi) return;
+
+ if (hi - lo > 3) {
+ for ( i = hi-4; i >= lo; i-- ) {
+ tmp = fmap[i];
+ ec_tmp = eclass[tmp];
+ for ( j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4 )
+ fmap[j-4] = fmap[j];
+ fmap[j-4] = tmp;
+ }
+ }
+
+ for ( i = hi-1; i >= lo; i-- ) {
+ tmp = fmap[i];
+ ec_tmp = eclass[tmp];
+ for ( j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++ )
+ fmap[j-1] = fmap[j];
+ fmap[j-1] = tmp;
+ }
+}
+
+
+/*---------------------------------------------*/
+#define fswap(zz1, zz2) \
+ { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define fvswap(zzp1, zzp2, zzn) \
+{ \
+ Int32 yyp1 = (zzp1); \
+ Int32 yyp2 = (zzp2); \
+ Int32 yyn = (zzn); \
+ while (yyn > 0) { \
+ fswap(fmap[yyp1], fmap[yyp2]); \
+ yyp1++; yyp2++; yyn--; \
+ } \
+}
+
+
+#define fmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define fpush(lz,hz) { stackLo[sp] = lz; \
+ stackHi[sp] = hz; \
+ sp++; }
+
+#define fpop(lz,hz) { sp--; \
+ lz = stackLo[sp]; \
+ hz = stackHi[sp]; }
+
+#define FALLBACK_QSORT_SMALL_THRESH 10
+#define FALLBACK_QSORT_STACK_SIZE 100
+
+
+void fallbackQSort3 ( UInt32* fmap,
+ UInt32* eclass,
+ Int32 loSt,
+ Int32 hiSt )
+{
+ Int32 unLo, unHi, ltLo, gtHi, n, m;
+ Int32 sp, lo, hi;
+ UInt32 med, r, r3;
+ Int32 stackLo[FALLBACK_QSORT_STACK_SIZE];
+ Int32 stackHi[FALLBACK_QSORT_STACK_SIZE];
+
+ r = 0;
+
+ sp = 0;
+ fpush ( loSt, hiSt );
+
+ while (sp > 0) {
+
+ fpop ( lo, hi );
+ if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) {
+ fallbackSimpleSort ( fmap, eclass, lo, hi );
+ continue;
+ }
+
+ /* Random partitioning. Median of 3 sometimes fails to
+ avoid bad cases. Median of 9 seems to help but
+ looks rather expensive. This too seems to work but
+ is cheaper. Guidance for the magic constants
+ 7621 and 32768 is taken from Sedgewick's algorithms
+ book, chapter 35.
+ */
+ r = ((r * 7621) + 1) % 32768;
+ r3 = r % 3;
+ if (r3 == 0) med = eclass[fmap[lo]]; else
+ if (r3 == 1) med = eclass[fmap[(lo+hi)>>1]]; else
+ med = eclass[fmap[hi]];
+
+ unLo = ltLo = lo;
+ unHi = gtHi = hi;
+
+ while (1) {
+ while (1) {
+ if (unLo > unHi) break;
+ n = (Int32)eclass[fmap[unLo]] - (Int32)med;
+ if (n == 0) {
+ fswap(fmap[unLo], fmap[ltLo]);
+ ltLo++; unLo++;
+ continue;
+ };
+ if (n > 0) break;
+ unLo++;
+ }
+ while (1) {
+ if (unLo > unHi) break;
+ n = (Int32)eclass[fmap[unHi]] - (Int32)med;
+ if (n == 0) {
+ fswap(fmap[unHi], fmap[gtHi]);
+ gtHi--; unHi--;
+ continue;
+ };
+ if (n < 0) break;
+ unHi--;
+ }
+ if (unLo > unHi) break;
+ fswap(fmap[unLo], fmap[unHi]); unLo++; unHi--;
+ }
+
+ if (gtHi < ltLo) continue;
+
+ n = fmin(ltLo-lo, unLo-ltLo); fvswap(lo, unLo-n, n);
+ m = fmin(hi-gtHi, gtHi-unHi); fvswap(unLo, hi-m+1, m);
+
+ n = lo + unLo - ltLo - 1;
+ m = hi - (gtHi - unHi) + 1;
+
+ if (n - lo > hi - m) {
+ fpush ( lo, n );
+ fpush ( m, hi );
+ } else {
+ fpush ( m, hi );
+ fpush ( lo, n );
+ }
+ }
+}
+
+#undef fmin
+#undef fpush
+#undef fpop
+#undef fswap
+#undef fvswap
+#undef FALLBACK_QSORT_SMALL_THRESH
+#undef FALLBACK_QSORT_STACK_SIZE
+
+
+/*---------------------------------------------*/
+/* Pre:
+ nblock > 0
+ eclass exists for [0 .. nblock-1]
+ ((UChar*)eclass) [0 .. nblock-1] holds block
+ ptr exists for [0 .. nblock-1]
+
+ Post:
+ ((UChar*)eclass) [0 .. nblock-1] holds block
+ All other areas of eclass destroyed
+ fmap [0 .. nblock-1] holds sorted order
+ bhtab [ 0 .. 2+(nblock/32) ] destroyed
+*/
+
+#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31))
+#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31))
+#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31)))
+#define WORD_BH(zz) bhtab[(zz) >> 5]
+#define UNALIGNED_BH(zz) ((zz) & 0x01f)
+
+void fallbackSort ( UInt32* fmap,
+ UInt32* eclass,
+ UInt32* bhtab,
+ Int32 nblock,
+ Int32 verb )
+{
+ Int32 ftab[257];
+ Int32 ftabCopy[256];
+ Int32 H, i, j, k, l, r, cc, cc1;
+ Int32 nNotDone;
+ Int32 nBhtab;
+ UChar* eclass8 = (UChar*)eclass;
+
+ /*--
+ Initial 1-char radix sort to generate
+ initial fmap and initial BH bits.
+ --*/
+ for (i = 0; i < 257; i++) ftab[i] = 0;
+ for (i = 0; i < nblock; i++) ftab[eclass8[i]]++;
+ for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i];
+ for (i = 1; i < 257; i++) ftab[i] += ftab[i-1];
+
+ for (i = 0; i < nblock; i++) {
+ j = eclass8[i];
+ k = ftab[j] - 1;
+ ftab[j] = k;
+ fmap[k] = i;
+ }
+
+ nBhtab = 2 + (nblock / 32);
+ for (i = 0; i < nBhtab; i++) bhtab[i] = 0;
+ for (i = 0; i < 256; i++) SET_BH(ftab[i]);
+
+ /*--
+ Inductively refine the buckets. Kind-of an
+ "exponential radix sort" (!), inspired by the
+ Manber-Myers suffix array construction algorithm.
+ --*/
+
+ /*-- set sentinel bits for block-end detection --*/
+ for (i = 0; i < 32; i++) {
+ SET_BH(nblock + 2*i);
+ CLEAR_BH(nblock + 2*i + 1);
+ }
+
+ /*-- the log(N) loop --*/
+ H = 1;
+ while (1) {
+
+
+ j = 0;
+ for (i = 0; i < nblock; i++) {
+ if (ISSET_BH(i)) j = i;
+ k = fmap[i] - H; if (k < 0) k += nblock;
+ eclass[k] = j;
+ }
+
+ nNotDone = 0;
+ r = -1;
+ while (1) {
+
+ /*-- find the next non-singleton bucket --*/
+ k = r + 1;
+ while (ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+ if (ISSET_BH(k)) {
+ while (WORD_BH(k) == 0xffffffff) k += 32;
+ while (ISSET_BH(k)) k++;
+ }
+ l = k - 1;
+ if (l >= nblock) break;
+ while (!ISSET_BH(k) && UNALIGNED_BH(k)) k++;
+ if (!ISSET_BH(k)) {
+ while (WORD_BH(k) == 0x00000000) k += 32;
+ while (!ISSET_BH(k)) k++;
+ }
+ r = k - 1;
+ if (r >= nblock) break;
+
+ /*-- now [l, r] bracket current bucket --*/
+ if (r > l) {
+ nNotDone += (r - l + 1);
+ fallbackQSort3 ( fmap, eclass, l, r );
+
+ /*-- scan bucket and generate header bits-- */
+ cc = -1;
+ for (i = l; i <= r; i++) {
+ cc1 = eclass[fmap[i]];
+ if (cc != cc1) { SET_BH(i); cc = cc1; };
+ }
+ }
+ }
+
+ H *= 2;
+ if (H > nblock || nNotDone == 0) break;
+ }
+
+ /*--
+ Reconstruct the original block in
+ eclass8 [0 .. nblock-1], since the
+ previous phase destroyed it.
+ --*/
+ j = 0;
+ for (i = 0; i < nblock; i++) {
+ while (ftabCopy[j] == 0) j++;
+ ftabCopy[j]--;
+ eclass8[fmap[i]] = (UChar)j;
+ }
+}
+
+#undef SET_BH
+#undef CLEAR_BH
+#undef ISSET_BH
+#undef WORD_BH
+#undef UNALIGNED_BH
+
+
+/*---------------------------------------------*/
+/*--- The main, O(N^2 log(N)) sorting ---*/
+/*--- algorithm. Faster for "normal" ---*/
+/*--- non-repetitive blocks. ---*/
+/*---------------------------------------------*/
+
+/*---------------------------------------------*/
+Bool mainGtU ( UInt32 i1,
+ UInt32 i2,
+ UChar* block,
+ UInt16* quadrant,
+ UInt32 nblock,
+ Int32* budget )
+{
+ Int32 k;
+ UChar c1, c2;
+ UInt16 s1, s2;
+
+ /* 1 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 2 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 3 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 4 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 5 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 6 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 7 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 8 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 9 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 10 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 11 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+ /* 12 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ i1++; i2++;
+
+ k = nblock + 8;
+
+ do {
+ /* 1 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 2 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 3 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 4 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 5 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 6 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 7 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+ /* 8 */
+ c1 = block[i1]; c2 = block[i2];
+ if (c1 != c2) return (c1 > c2);
+ s1 = quadrant[i1]; s2 = quadrant[i2];
+ if (s1 != s2) return (s1 > s2);
+ i1++; i2++;
+
+ if (i1 >= nblock) i1 -= nblock;
+ if (i2 >= nblock) i2 -= nblock;
+
+ k -= 8;
+ (*budget)--;
+ }
+ while (k >= 0);
+
+ return False;
+}
+
+
+/*---------------------------------------------*/
+/*--
+ Knuth's increments seem to work better
+ than Incerpi-Sedgewick here. Possibly
+ because the number of elems to sort is
+ usually small, typically <= 20.
+--*/
+Int32 incs[14] = { 1, 4, 13, 40, 121, 364, 1093, 3280,
+ 9841, 29524, 88573, 265720,
+ 797161, 2391484 };
+
+void mainSimpleSort ( UInt32* ptr,
+ UChar* block,
+ UInt16* quadrant,
+ Int32 nblock,
+ Int32 lo,
+ Int32 hi,
+ Int32 d,
+ Int32* budget )
+{
+ Int32 i, j, h, bigN, hp;
+ UInt32 v;
+
+ bigN = hi - lo + 1;
+ if (bigN < 2) return;
+
+ hp = 0;
+ while (incs[hp] < bigN) hp++;
+ hp--;
+
+ for (; hp >= 0; hp--) {
+ h = incs[hp];
+
+ i = lo + h;
+ while (True) {
+
+ /*-- copy 1 --*/
+ if (i > hi) break;
+ v = ptr[i];
+ j = i;
+ while ( mainGtU (
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+ ) ) {
+ ptr[j] = ptr[j-h];
+ j = j - h;
+ if (j <= (lo + h - 1)) break;
+ }
+ ptr[j] = v;
+ i++;
+
+ /*-- copy 2 --*/
+ if (i > hi) break;
+ v = ptr[i];
+ j = i;
+ while ( mainGtU (
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+ ) ) {
+ ptr[j] = ptr[j-h];
+ j = j - h;
+ if (j <= (lo + h - 1)) break;
+ }
+ ptr[j] = v;
+ i++;
+
+ /*-- copy 3 --*/
+ if (i > hi) break;
+ v = ptr[i];
+ j = i;
+ while ( mainGtU (
+ ptr[j-h]+d, v+d, block, quadrant, nblock, budget
+ ) ) {
+ ptr[j] = ptr[j-h];
+ j = j - h;
+ if (j <= (lo + h - 1)) break;
+ }
+ ptr[j] = v;
+ i++;
+
+ if (*budget < 0) return;
+ }
+ }
+}
+
+
+/*---------------------------------------------*/
+/*--
+ The following is an implementation of
+ an elegant 3-way quicksort for strings,
+ described in a paper "Fast Algorithms for
+ Sorting and Searching Strings", by Robert
+ Sedgewick and Jon L. Bentley.
+--*/
+
+#define mswap(zz1, zz2) \
+ { Int32 zztmp = zz1; zz1 = zz2; zz2 = zztmp; }
+
+#define mvswap(zzp1, zzp2, zzn) \
+{ \
+ Int32 yyp1 = (zzp1); \
+ Int32 yyp2 = (zzp2); \
+ Int32 yyn = (zzn); \
+ while (yyn > 0) { \
+ mswap(ptr[yyp1], ptr[yyp2]); \
+ yyp1++; yyp2++; yyn--; \
+ } \
+}
+
+UChar mmed3 ( UChar a, UChar b, UChar c )
+{
+ UChar t;
+ if (a > b) { t = a; a = b; b = t; };
+ if (b > c) {
+ b = c;
+ if (a > b) b = a;
+ }
+ return b;
+}
+
+#define mmin(a,b) ((a) < (b)) ? (a) : (b)
+
+#define mpush(lz,hz,dz) { stackLo[sp] = lz; \
+ stackHi[sp] = hz; \
+ stackD [sp] = dz; \
+ sp++; }
+
+#define mpop(lz,hz,dz) { sp--; \
+ lz = stackLo[sp]; \
+ hz = stackHi[sp]; \
+ dz = stackD [sp]; }
+
+
+#define mnextsize(az) (nextHi[az]-nextLo[az])
+
+#define mnextswap(az,bz) \
+ { Int32 tz; \
+ tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \
+ tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \
+ tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; }
+
+
+#define MAIN_QSORT_SMALL_THRESH 20
+#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT)
+#define MAIN_QSORT_STACK_SIZE 100
+
+void mainQSort3 ( UInt32* ptr,
+ UChar* block,
+ UInt16* quadrant,
+ Int32 nblock,
+ Int32 loSt,
+ Int32 hiSt,
+ Int32 dSt,
+ Int32* budget )
+{
+ Int32 unLo, unHi, ltLo, gtHi, n, m, med;
+ Int32 sp, lo, hi, d;
+
+ Int32 stackLo[MAIN_QSORT_STACK_SIZE];
+ Int32 stackHi[MAIN_QSORT_STACK_SIZE];
+ Int32 stackD [MAIN_QSORT_STACK_SIZE];
+
+ Int32 nextLo[3];
+ Int32 nextHi[3];
+ Int32 nextD [3];
+
+ sp = 0;
+ mpush ( loSt, hiSt, dSt );
+
+ while (sp > 0) {
+
+ mpop ( lo, hi, d );
+ if (hi - lo < MAIN_QSORT_SMALL_THRESH ||
+ d > MAIN_QSORT_DEPTH_THRESH) {
+ mainSimpleSort ( ptr, block, quadrant, nblock, lo, hi, d, budget );
+ if (*budget < 0) return;
+ continue;
+ }
+
+ med = (Int32)
+ mmed3 ( block[ptr[ lo ]+d],
+ block[ptr[ hi ]+d],
+ block[ptr[ (lo+hi)>>1 ]+d] );
+
+ unLo = ltLo = lo;
+ unHi = gtHi = hi;
+
+ while (True) {
+ while (True) {
+ if (unLo > unHi) break;
+ n = ((Int32)block[ptr[unLo]+d]) - med;
+ if (n == 0) {
+ mswap(ptr[unLo], ptr[ltLo]);
+ ltLo++; unLo++; continue;
+ };
+ if (n > 0) break;
+ unLo++;
+ }
+ while (True) {
+ if (unLo > unHi) break;
+ n = ((Int32)block[ptr[unHi]+d]) - med;
+ if (n == 0) {
+ mswap(ptr[unHi], ptr[gtHi]);
+ gtHi--; unHi--; continue;
+ };
+ if (n < 0) break;
+ unHi--;
+ }
+ if (unLo > unHi) break;
+ mswap(ptr[unLo], ptr[unHi]); unLo++; unHi--;
+ }
+
+ if (gtHi < ltLo) {
+ mpush(lo, hi, d+1 );
+ continue;
+ }
+
+ n = mmin(ltLo-lo, unLo-ltLo); mvswap(lo, unLo-n, n);
+ m = mmin(hi-gtHi, gtHi-unHi); mvswap(unLo, hi-m+1, m);
+
+ n = lo + unLo - ltLo - 1;
+ m = hi - (gtHi - unHi) + 1;
+
+ nextLo[0] = lo; nextHi[0] = n; nextD[0] = d;
+ nextLo[1] = m; nextHi[1] = hi; nextD[1] = d;
+ nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1;
+
+ if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+ if (mnextsize(1) < mnextsize(2)) mnextswap(1,2);
+ if (mnextsize(0) < mnextsize(1)) mnextswap(0,1);
+
+
+ mpush (nextLo[0], nextHi[0], nextD[0]);
+ mpush (nextLo[1], nextHi[1], nextD[1]);
+ mpush (nextLo[2], nextHi[2], nextD[2]);
+ }
+}