Page 10 of 10 FirstFirst ... 8910
Results 271 to 275 of 275

Thread: Brotli

  1. #271
    Member SolidComp's Avatar
    Join Date
    Jun 2015
    Location
    USA
    Posts
    347
    Thanks
    129
    Thanked 53 Times in 37 Posts
    Quote Originally Posted by JamesB View Post
    Zlib gets it to 96 bytes with Z_RLE, 97 with Z_HUFFMAN_ONLY. It just sucks once you add a gzip wrapper around it too.
    I'm not familiar with those settings. Is this with the ZLIB format, or raw deflate or something?

    Do you have a Windows binary for zlib I could use? I've never been able to turn it into an executable.

  2. #272
    Member SolidComp's Avatar
    Join Date
    Jun 2015
    Location
    USA
    Posts
    347
    Thanks
    129
    Thanked 53 Times in 37 Posts
    Quote Originally Posted by MegaByte View Post
    If you're always limited to ASCII, a simple packing eliminating the high bit of every byte would get you to 94 bytes. If you're not always limited to ASCII, but other characters are rare, you could use a packed version of UTF-7 encoding (or better yet, a 7-bit UTF-8-style prefix code without self-synchronization, and maybe RLE thrown in) .
    Interesting. I want to fully support UTF-8, accented characters and so forth, but a lot of the data will be ASCII. Would any codec wring out the extra 0 bit in ASCII code points, or would you have to pack separately? Huffman would wring it out right?

  3. #273
    Member SolidComp's Avatar
    Join Date
    Jun 2015
    Location
    USA
    Posts
    347
    Thanks
    129
    Thanked 53 Times in 37 Posts
    Quote Originally Posted by Jyrki Alakuijala View Post
    Brotli can define a 7 bit encoding with very very few bits. Just current encoders don't attempt a simple entropy coding without LZ77.
    By the way, brotli -4 and brotli -11 are only off by one byte for this data.

  4. #274
    Member SolidComp's Avatar
    Join Date
    Jun 2015
    Location
    USA
    Posts
    347
    Thanks
    129
    Thanked 53 Times in 37 Posts
    I iterated the Positional Data Syntax, now called Compact Simple Data (CSD), and made the data more realistic. No more 55555 for the postal code, no more 00000 in the account number. As far as the CSD changes, mandatory fixed-length fields are now appended together in Line 1 without line breaks (including total payload size) – since they're fixed-length and we know the order, line breaks are a waste. They're followed by mandatory variable-length fields, then optional fields, then any custom/new fields, which are self-describing (both name and data type – the whole format is type safe).

    This sample is 140 bytes. Brotli 1.0.4 -11 knocks it down to 109 bytes. Brotli -4 is 110 bytes:

    01401P60273358200617AH36DFYELYY
    4479102563298665
    1223
    569
    27.85
    TOM JONES
    123 MAIN STREET

    NEW YORK
    NY
    US
    89113
    :CUSTOM1:TEXT:EXAMPLEDATA512

  5. #275
    Member
    Join Date
    Dec 2011
    Location
    Cambridge, UK
    Posts
    503
    Thanks
    181
    Thanked 177 Times in 120 Posts
    Quote Originally Posted by SolidComp View Post
    I'm not familiar with those settings. Is this with the ZLIB format, or raw deflate or something?

    Do you have a Windows binary for zlib I could use? I've never been able to turn it into an executable.

    Z_RLE etc are compression strategies. It basically tells the library how to do the LZ bit (hardly any obviously with Z_RLE), not at all (Z_HUFFMAN_ONLY), or whether to up the minimum match length and only do long LZ matches on likely binary data (Z_FILTERED). They're basically ways of getting faster performance (and sometimes smaller files even) out of the library by supplying hints so you don't need to rely on exhaustive search to get the optimal compression; not that zlib can do that exhaustive search anyway.

    As for a binary, no I don't have one for windows. I just had a noddy bit of code to load a file into memory and call the zlib API.

    Edit: code is

    Code:
    #include <stdio.h>
    #include <stdlib.h>
    #include <zlib.h>
    #include <stdint.h>
    
    #define BS 1024*1024
    static unsigned char *load(uint64_t *lenp) {
        unsigned char *data = NULL;
        uint64_t dsize = 0;
        uint64_t dcurr = 0;
        signed int len;
    
        do {
            if (dsize - dcurr < BS) {
                dsize = dsize ? dsize * 2 : BS;
                data = realloc(data, dsize);
            }
    
            len = read(0, data + dcurr, BS);
            if (len > 0)
                dcurr += len;
        } while (len > 0);
    
        if (len == -1) {
            perror("read");
        }
    
        fprintf(stderr, "dcurr=%llu\n", dcurr);
    
        *lenp = dcurr;
        return data;
    }
    
    
    int main(int argc, char **argv) {
        unsigned char *in, *out;
        uint64_t in_len, out_len;
        z_stream zstr;
        int decomp = 0;
        int err;
        int strat = Z_DEFAULT_STRATEGY, level = -1;
        int win = -15;
        int mem = 8;
    
        /*
        extern int z_verbose;
        z_verbose = 3;
        */
        in = load(&in_len);
        out = malloc(out_len = 65536);
    
        if (argc > 1 && strcmp(argv[1], "-d") == 0) {
            decomp = 1;
    
            if (argc > 2)
                win = atoi(argv[2]);
        } else {
            if (argc > 1 && argv[1][0] == '-') {
                printf("Usage: zlib_test [-d] | [strategy [level]] < in > out.gz\n");
                exit(0);
            }
    
            if (argc > 1) {
                if (0 == strcmp(argv[1], "Z_DEFAULT_STRATEGY"))
                    strat = Z_DEFAULT_STRATEGY;
                else if (0 == strcmp(argv[1], "Z_RLE"))
                    strat = Z_RLE;
                else if (0 == strcmp(argv[1], "Z_FILTERED"))
                    strat = Z_FILTERED;
                else if (0 == strcmp(argv[1], "Z_HUFFMAN_ONLY"))
                    strat = Z_HUFFMAN_ONLY;
                else {
                    fprintf(stderr, "Unknown strategy %s\n", argv[1]);
                    exit(1);
                }
    
                if (argc > 2)
                    level = atoi(argv[2]);
    
                if (argc > 3)
                    win = atoi(argv[3]);
    
                if (argc > 4)
                    mem = atoi(argv[4]);
            }
    
            fprintf(stderr, "Strat=%d level=%d\n", strat, level);
        }
    
        zstr.zalloc = (alloc_func)0;
        zstr.zfree = (free_func)0;
        zstr.opaque = (voidpf)0;
    
        if (decomp) {
            if (inflateInit2(&zstr, win) != Z_OK) {
            //if (inflateInit2(&zstr, 15+16) != Z_OK) {
                fprintf(stderr, "zlib errror in inflateInit()\n");
                return 1;
            }
    
            zstr.next_in = in;
            zstr.avail_in = in_len;
    
            do {
                zstr.next_out = out;
                zstr.avail_out = out_len;
    
                err = inflate(&zstr, Z_FINISH);
                if (err == Z_STREAM_END || err == Z_OK || err == Z_BUF_ERROR) {
                    write(1, out, zstr.total_out);
                    zstr.total_out = 0;
                } else {
                    fprintf(stderr, "zlib errror in inflate(): %d/%s\n",
                            err, zstr.msg);
                    return 1;
                }
            } while (err != Z_FINISH && err != Z_STREAM_END);
    
            inflateEnd(&zstr);
    
        } else {
            if (deflateInit2(&zstr, level, Z_DEFLATED, win, mem, strat) != Z_OK) {
                fprintf(stderr, "zlib errror in deflateInit2()\n");
                return 1;
            }
    
            zstr.next_in = in;
            zstr.avail_in = in_len;
    
            do {
                zstr.next_out = out;
                zstr.avail_out = out_len;
    
                err = deflate(&zstr, Z_FINISH);
                if (err == Z_STREAM_END || err == Z_OK || err == Z_BUF_ERROR) {
                    write(1, out, zstr.total_out);
                    zstr.total_out = 0;
                } else {
                    fprintf(stderr, "zlib errror in deflate(): %d/%s\n",
                            err, zstr.msg);
                    return 1;
                }
            } while (err != Z_FINISH && err != Z_STREAM_END);
    
            deflateEnd(&zstr);
        }
    
        return 0;
    }

Page 10 of 10 FirstFirst ... 8910

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •