+#define ZLIB_BUFFER_SIZE 16384
+
+static long compress_line(z_stream*zs, Bytef*line, int len, FILE*fi)
+{
+ long size = 0;
+ zs->next_in = line;
+ zs->avail_in = len;
+
+ while(1) {
+ int ret = deflate(zs, Z_NO_FLUSH);
+ if (ret != Z_OK) {
+ fprintf(stderr, "error in deflate(): %s", zs->msg?zs->msg:"unknown");
+ return 0;
+ }
+ if(zs->avail_out != ZLIB_BUFFER_SIZE) {
+ int consumed = ZLIB_BUFFER_SIZE - zs->avail_out;
+ size += consumed;
+ png_write_bytes(fi, zs->next_out - consumed , consumed);
+ zs->next_out = zs->next_out - consumed;
+ zs->avail_out = ZLIB_BUFFER_SIZE;
+ }
+ if(!zs->avail_in) {
+ break;
+ }
+ }
+ return size;
+}
+
+static int test_line(z_stream*zs_orig, Bytef*line, int linelen)
+{
+ z_stream zs;
+ int ret = deflateCopy(&zs, zs_orig);
+ if(ret != Z_OK) {
+ fprintf(stderr, "Couldn't copy stream\n");
+ return 0;
+ }
+
+ zs.next_in = line;
+ zs.avail_in = linelen;
+
+ long size = 0;
+
+ int mode = Z_SYNC_FLUSH;
+ while(1) {
+ int ret = deflate(&zs, mode);
+ if (ret != Z_OK && ret != Z_STREAM_END) {
+ fprintf(stderr, "error in deflate(): %s (mode %s, %d bytes remaining)\n", zs.msg?zs.msg:"unknown",
+ mode==Z_SYNC_FLUSH?"Z_SYNC_FLUSH":"Z_FINISH", zs.avail_in);
+ return 0;
+ }
+ if(zs.avail_out != ZLIB_BUFFER_SIZE) {
+ int consumed = ZLIB_BUFFER_SIZE - zs.avail_out;
+ size += consumed;
+ zs.next_out = zs.next_out - consumed;
+ zs.avail_out = ZLIB_BUFFER_SIZE;
+ }
+ if (ret == Z_STREAM_END) {
+ break;
+ }
+ if(!zs.avail_in) {
+ mode = Z_FINISH;
+ }
+ }
+ ret = deflateEnd(&zs);
+ if (ret != Z_OK) {
+ fprintf(stderr, "error in deflateEnd(): %s\n", zs.msg?zs.msg:"unknown");
+ return 0;
+ }
+ return size;
+}
+
+static int finishzlib(z_stream*zs, FILE*fi)
+{
+ int size = 0;
+ int ret;
+ while(1) {
+ ret = deflate(zs, Z_FINISH);
+ if (ret != Z_OK &&
+ ret != Z_STREAM_END) {
+ fprintf(stderr, "error in deflate(finish): %s\n", zs->msg?zs->msg:"unknown");
+ return 0;
+ }
+
+ if(zs->avail_out != ZLIB_BUFFER_SIZE) {
+ int consumed = ZLIB_BUFFER_SIZE - zs->avail_out;
+ size += consumed;
+ png_write_bytes(fi, zs->next_out - consumed , consumed);
+ zs->next_out = zs->next_out - consumed;
+ zs->avail_out = ZLIB_BUFFER_SIZE;
+ }
+ if (ret == Z_STREAM_END) {
+ break;
+ }
+ }
+ ret = deflateEnd(zs);
+ if (ret != Z_OK) {
+ fprintf(stderr, "error in deflateEnd(): %s\n", zs->msg?zs->msg:"unknown");
+ return 0;
+ }
+ return size;
+}
+
+static inline u32 color_hash(COL*col)
+{
+ u32 col32 = *(u32*)col;
+ u32 hash = (col32 >> 17) ^ col32;
+ hash ^= ((hash>>8) + 1) ^ hash;
+ return hash;
+}
+
+static int png_get_number_of_palette_entries(COL*img, unsigned width, unsigned height, COL*palette, char*has_alpha)
+{
+ int len = width*height;
+ int t;
+ int palsize = 0;
+ int size[256];
+ int palette_overflow = 0;
+ u32 lastcol32 = 0;
+
+ memset(size, 0, sizeof(size));
+
+ u32*pal = (u32*)malloc(65536*sizeof(u32));
+ int*count = (int*)malloc(65536*sizeof(int));
+
+ assert(sizeof(COL)==sizeof(u32));
+ assert(width && height);
+
+ lastcol32 = (*(u32*)&img[0])^0xffffffff; // don't match
+
+ for(t=0;t<len;t++) {
+ u32 col32 = *(u32*)&img[t];
+ if(col32 == lastcol32)
+ continue;
+
+ if(img[t].a!=255)
+ *has_alpha=1;
+ int i;
+
+ u32 hash = color_hash(&img[t])&255;
+
+ int csize = size[hash];
+ u32*cpal = &pal[hash*256];
+ int*ccount = &count[hash*256];
+ for(i=0;i<csize;i++) {
+ if(col32 == cpal[i]) {
+ ccount[i]++;
+ break;
+ }
+ }
+ if(i==csize) {
+ if(palsize==256) {
+ palette_overflow = 1;
+ break;
+ }
+ count[size[hash]] = 1;
+ cpal[size[hash]++] = col32;
+ palsize++;
+ }
+ lastcol32 = col32;
+ }
+ if(palette_overflow) {
+ free(pal);
+ *has_alpha=1;
+ return width*height;
+ }
+ if(palette) {
+ int i = 0;
+ int occurences[256];
+ for(t=0;t<256;t++) {
+ int s;
+ int csize = size[t];
+ u32* cpal = &pal[t*256];
+ int* ccount = &count[t*256];
+ for(s=0;s<csize;s++) {
+ occurences[i] = ccount[s];
+ palette[i++] = *(COL*)(&cpal[s]);
+ }
+ }
+ assert(i==palsize);
+ int j;
+ for(i=0;i<palsize-1;i++) {
+ for(j=i+1;j<palsize;j++) {
+ if(occurences[j] < occurences[i]) {
+ int o = occurences[i];
+ COL c = palette[i];
+ occurences[i] = occurences[j];
+ palette[i] = palette[j];
+ occurences[j] = o;
+ palette[j] = c;
+ }
+ }
+ }
+ }
+ free(pal);
+ free(count);
+ return palsize;
+}
+
+static void png_map_to_palette(COL*src, unsigned char*dest, int size, COL*palette, int palette_size)
+{
+ int t;
+ int palette_hash[1024];
+ memset(palette_hash, 0, sizeof(int)*1024);
+
+ for(t=0;t<palette_size;t++) {
+ u32 hash = color_hash(&palette[t])&1023;
+ while(palette_hash[hash])
+ hash = (hash+1)&1023;
+ palette_hash[hash] = t;
+ }
+
+ for(t=0;t<size;t++) {
+ u32 hash = color_hash(&src[t]);
+ int index = 0;
+ while(1) {
+ hash&=1023;
+ index = palette_hash[hash];
+ if(!memcmp(&palette[index], &src[t], sizeof(COL)))
+ break;
+ hash++;
+ }
+ dest[t] = palette_hash[hash];
+ }
+}
+
+static int png_apply_specific_filter_8(int filtermode, unsigned char*dest, unsigned char*src, unsigned width)
+{
+ int pos2 = 0;
+ int pos = 0;
+ unsigned srcwidth = width;
+ int x;
+ if(filtermode == 0) {
+ for(x=0;x<width;x++) {
+ dest[pos2++]=src[pos++]; //alpha
+ }
+ } else if(filtermode == 1) {
+ /* x difference filter */
+ dest[pos2++]=src[pos++];
+ for(x=1;x<width;x++) {
+ dest[pos2++]=src[pos] - src[pos-1];
+ pos++;
+ }
+ } else if(filtermode == 2) {
+ /* y difference filter */
+ for(x=0;x<width;x++) {
+ dest[pos2++]=src[pos+0] - src[pos-srcwidth+0]; //alpha
+ pos++;
+ }
+ } else if(filtermode == 3) {
+ dest[pos2++]=src[pos+0] - src[pos-srcwidth+0]/2;
+ pos++;
+ /* x+y difference filter */
+ for(x=1;x<width;x++) {
+ dest[pos2++]=src[pos+0] - (src[pos-1+0] + src[pos-srcwidth+0])/2; //alpha
+ pos++;
+ }
+ } else if(filtermode == 4) {
+ dest[pos2++]=src[pos+0] - PaethPredictor(0, src[pos-srcwidth+0], 0);
+ pos++;
+ /* paeth difference filter */
+ for(x=1;x<width;x++) {
+ dest[pos2++]=src[pos+0] - PaethPredictor(src[pos-1+0], src[pos-srcwidth+0], src[pos-1-srcwidth+0]);
+ pos++;
+ }
+ }
+ return filtermode;
+}
+
+static int png_apply_specific_filter_32(int filtermode, unsigned char*dest, unsigned char*src, unsigned width)
+{
+ int pos2 = 0;
+ int pos = 0;
+ unsigned srcwidth = width*4;
+ int x;
+ if(filtermode == 0) {
+ for(x=0;x<width;x++) {
+ dest[pos2++]=src[pos+1];
+ dest[pos2++]=src[pos+2];
+ dest[pos2++]=src[pos+3];
+ dest[pos2++]=src[pos+0]; //alpha
+ pos+=4;
+ }
+ } else if(filtermode == 1) {
+ /* x difference filter */
+ dest[pos2++]=src[pos+1];
+ dest[pos2++]=src[pos+2];
+ dest[pos2++]=src[pos+3];
+ dest[pos2++]=src[pos+0];
+ pos+=4;
+ for(x=1;x<width;x++) {
+ dest[pos2++]=src[pos+1] - src[pos-4+1];
+ dest[pos2++]=src[pos+2] - src[pos-4+2];
+ dest[pos2++]=src[pos+3] - src[pos-4+3];
+ dest[pos2++]=src[pos+0] - src[pos-4+0]; //alpha
+ pos+=4;
+ }
+ } else if(filtermode == 2) {
+ /* y difference filter */
+ for(x=0;x<width;x++) {
+ dest[pos2++]=src[pos+1] - src[pos-srcwidth+1];
+ dest[pos2++]=src[pos+2] - src[pos-srcwidth+2];
+ dest[pos2++]=src[pos+3] - src[pos-srcwidth+3];
+ dest[pos2++]=src[pos+0] - src[pos-srcwidth+0]; //alpha
+ pos+=4;
+ }
+ } else if(filtermode == 3) {
+ dest[pos2++]=src[pos+1] - src[pos-srcwidth+1]/2;
+ dest[pos2++]=src[pos+2] - src[pos-srcwidth+2]/2;
+ dest[pos2++]=src[pos+3] - src[pos-srcwidth+3]/2;
+ dest[pos2++]=src[pos+0] - src[pos-srcwidth+0]/2;
+ pos+=4;
+ /* x+y difference filter */
+ for(x=1;x<width;x++) {
+ dest[pos2++]=src[pos+1] - (src[pos-4+1] + src[pos-srcwidth+1])/2;
+ dest[pos2++]=src[pos+2] - (src[pos-4+2] + src[pos-srcwidth+2])/2;
+ dest[pos2++]=src[pos+3] - (src[pos-4+3] + src[pos-srcwidth+3])/2;
+ dest[pos2++]=src[pos+0] - (src[pos-4+0] + src[pos-srcwidth+0])/2; //alpha
+ pos+=4;
+ }
+ } else if(filtermode == 4) {
+ dest[pos2++]=src[pos+1] - PaethPredictor(0, src[pos-srcwidth+1], 0);
+ dest[pos2++]=src[pos+2] - PaethPredictor(0, src[pos-srcwidth+2], 0);
+ dest[pos2++]=src[pos+3] - PaethPredictor(0, src[pos-srcwidth+3], 0);
+ dest[pos2++]=src[pos+0] - PaethPredictor(0, src[pos-srcwidth+0], 0);
+ pos+=4;
+ /* paeth difference filter */
+ for(x=1;x<width;x++) {
+ dest[pos2++]=src[pos+1] - PaethPredictor(src[pos-4+1], src[pos-srcwidth+1], src[pos-4-srcwidth+1]);
+ dest[pos2++]=src[pos+2] - PaethPredictor(src[pos-4+2], src[pos-srcwidth+2], src[pos-4-srcwidth+2]);
+ dest[pos2++]=src[pos+3] - PaethPredictor(src[pos-4+3], src[pos-srcwidth+3], src[pos-4-srcwidth+3]);
+ dest[pos2++]=src[pos+0] - PaethPredictor(src[pos-4+0], src[pos-srcwidth+0], src[pos-4-srcwidth+0]);
+ pos+=4;
+ }
+ }
+ return filtermode;
+}
+
+static int*num_bits_table = 0;
+
+static void make_num_bits_table()
+{
+ if(num_bits_table) return;
+ num_bits_table = malloc(sizeof(num_bits_table[0])*256);
+ int t;
+ for(t=0;t<256;t++) {
+ int bits=0;
+ int v = t;
+ while(v) {
+ bits++;
+ v&=v-1;
+ }
+ num_bits_table[t]=bits;
+ }
+}
+
+static int png_find_best_filter(unsigned char*src, unsigned width, int bpp, int y)
+{
+ make_num_bits_table();
+
+ int num_filters = y>0?5:2; //don't apply y-direction filter in first line
+
+ int bytes_per_pixel = bpp>>3;
+ int w = width*bytes_per_pixel;
+ int back_x = bytes_per_pixel;
+ int back_y = y?width*bytes_per_pixel:0;
+
+ unsigned char*pairs[5];
+ pairs[0] = calloc(1, 8192);
+ pairs[1] = calloc(1, 8192);
+ pairs[2] = calloc(1, 8192);
+ pairs[3] = calloc(1, 8192);
+ pairs[4] = calloc(1, 8192);
+
+ unsigned char old[5];
+ int l = bytes_per_pixel - 1;
+ old[0] = src[l];
+ old[1] = src[l];
+ old[2] = src[l] - src[l-back_y];
+ old[3] = src[l] - src[l-back_y];
+ old[4] = src[l] - PaethPredictor(0, src[l-back_y], 0);
+
+ int different_pairs[5] = {0,0,0,0,0};
+
+ int x;
+ for(x=bytes_per_pixel;x<w;x++) {
+ unsigned char dest[5];
+ dest[0] = src[x];
+ dest[1] = src[x] - src[x-back_x];
+ dest[2] = src[x] - src[x-back_y];
+ dest[3] = src[x] - (src[x-back_x] + src[x-back_y])/2;
+ dest[4] = src[x] - PaethPredictor(src[x-back_x], src[x-back_y], src[x-back_x-back_y]);
+
+ int i;
+ for(i=0;i<5;i++) {
+ int v = dest[i]<<8|old[i];
+ int p = v>>3;
+ int b = 1<<(v&7);
+ if(!pairs[i][p]&b) {
+ pairs[i][p]|=b;
+ different_pairs[i]++;
+ }
+ }
+ memcpy(old, dest, sizeof(old));
+ }
+ int f;
+ int best_nr = 0;
+ int best_energy = INT_MAX;
+ for(f=0;f<num_filters;f++) {
+ int energy = different_pairs[f];
+ if(energy<best_energy) {
+ best_nr = f;
+ best_energy = energy;
+ }
+ }
+ free(pairs[0]);
+ free(pairs[1]);
+ free(pairs[2]);
+ free(pairs[3]);
+ free(pairs[4]);
+ return best_nr;
+}
+
+
+static int png_apply_filter(unsigned char*dest, unsigned char*src, unsigned width, int y, int bpp)
+{
+ int best_nr = 0;
+#if 0
+ make_num_bits_table();
+ int num_filters = y>0?5:2; //don't apply y-direction filter in first line
+ int f;
+ int best_energy = INT_MAX;
+ int w = width*(bpp/8);
+ unsigned char* pairs = malloc(8192);
+ assert(bpp==8 || bpp==32);
+ for(f=0;f<num_filters;f++) {
+ if(bpp==8)
+ png_apply_specific_filter_8(f, dest, src, width);
+ else
+ png_apply_specific_filter_32(f, dest, src, width);
+ int x;
+
+ /* approximation for zlib compressability: test how many different
+ (byte1,byte2) occur */
+ memset(pairs, 0, 8192);
+ int different_pairs = 0;
+ for(x=0;x<w-1;x++) {
+ int v = dest[x+1]<<8|dest[x];
+ int p = v>>3;
+ int b = 1<<(v&7);
+ if(!pairs[p]&b) {
+ pairs[p]|=b;
+ different_pairs ++;
+ }
+ }
+ int energy = different_pairs;
+ if(energy<best_energy) {
+ best_nr = f;
+ best_energy = energy;
+ }
+ }
+ free(pairs);
+#else
+ best_nr = png_find_best_filter(src, width, bpp, y);
+#endif
+ if(bpp==8)
+ png_apply_specific_filter_8(best_nr, dest, src, width);
+ else
+ png_apply_specific_filter_32(best_nr, dest, src, width);
+ return best_nr;
+}
+
+int png_apply_filter_8(unsigned char*dest, unsigned char*src, unsigned width, int y)
+{
+ return png_apply_filter(dest, src, width, y, 8);
+}
+int png_apply_filter_32(unsigned char*dest, unsigned char*src, unsigned width, int y)
+{
+ return png_apply_filter(dest, src, width, y, 32);
+}
+
+EXPORT void png_write_palette_based(const char*filename, unsigned char*data, unsigned width, unsigned height, int numcolors)