Code:
/* Inverse Burrows-Wheeler transform. */
saint_t
inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
saidx_t n, saidx_t idx) {
saidx_t C[ALPHABET_SIZE];
sauchar_t D[ALPHABET_SIZE];
saidx_t *B;
saidx_t i, p;
saint_t c, d;
/* Check arguments. */
if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
(n < idx) || ((0 < n) && (idx == 0))) {
return -1;
}
if(n <= 1) { return 0; }
if((B = A) == NULL) {
/* Allocate n*sizeof(saidx_t) bytes of memory. */
if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
}
/* Inverse BW transform. */
for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
for(i = 0; i < n; ++i) { ++C[T[i]]; }
for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
p = C[c];
if(0 < p) {
C[c] = i;
D[d++] = (sauchar_t)c;
i += p;
}
}
for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; }
for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
for(i = 0, p = idx; i < n; ++i) {
U[i] = D[binarysearch_lower(C, d, p)];
p = B[p - 1];
}
if(A == NULL) {
/* Deallocate memory. */
free(B);
}
return 0;
}
So, it's not about adding EOF symbol, it's kinda alphabet reordering. It not affects compression in this case, but BWT output is completely different - symbols just remapped - and, talking about fair BWT or SA construction, it's a cheating! Since we transform string with remapped symbols! Some time ago I talked with the author of Archon and Dark - kvark (Dmitry Malyshev) and he pointed me that such things are cheating indeed... It's about times with fair SA constructors battle... I hope such great BWT author knows how EOF with suffix array based BWT organized.