Browse Source

Merge branch 'release/v0.0.1'

Jérôme BUISINE 8 months ago
parent
commit
b1f09b4719
13 changed files with 9014 additions and 119 deletions
  1. 4 2
      CMakeLists.txt
  2. 9 1
      README.md
  3. 3 0
      lodepng/CMakeLists.txt
  4. 6389 0
      lodepng/lodepng.cpp
  5. 1941 0
      lodepng/lodepng.h
  6. 9 0
      main/CMakeLists.txt
  7. 41 0
      main/rawls_convert.cpp
  8. 0 0
      main/rawls_h_flip.cpp
  9. 146 0
      main/rawls_merge.cpp
  10. 5 2
      rawls/CMakeLists.txt
  11. 391 0
      rawls/rawls.cpp
  12. 76 0
      rawls/rawls.h
  13. 0 114
      rawls/rawls_reader.cpp

+ 4 - 2
CMakeLists.txt

@@ -1,7 +1,9 @@
 # CMakeLists files in this project can
 # refer to the root source directory of the project as ${rawls-reader_SOURCE_DIR} and
 # to the root binary directory of the project as ${rawls-reader_BINARY_DIR}.
-cmake_minimum_required (VERSION 2.8.11)
+cmake_minimum_required (VERSION 3.16.2)
 project(rawls-reader)
 
-add_subdirectory(rawls)
+add_subdirectory(lodepng)
+add_subdirectory(rawls)
+add_subdirectory(main)

+ 9 - 1
README.md

@@ -25,7 +25,15 @@ cd build && cmake ..
 make 
 ```
 
+### Examples:
+
 Convert the `.rawls` image format into `.ppm` 
 ```
-./rawls/rawls_reader --image ../path/to/image.rawls_20 --outfile image.ppm
+./main/rawls_reader --image ../path/to/image.rawls --outfile image.ppm
+```
+
+
+Merge `.rawls` images samples
+```
+./main/rawls_merge --folder ../path/to/images --samples 100 --random 0 --outfile image.png
 ```

+ 3 - 0
lodepng/CMakeLists.txt

@@ -0,0 +1,3 @@
+add_library(lodepng lodepng.cpp)
+
+target_include_directories (lodepng PUBLIC ${CMAKE_CURRENT_SOURCE_DIR})

+ 6389 - 0
lodepng/lodepng.cpp

@@ -0,0 +1,6389 @@
+/*
+LodePNG version 20191219
+
+Copyright (c) 2005-2019 Lode Vandevenne
+
+This software is provided 'as-is', without any express or implied
+warranty. In no event will the authors be held liable for any damages
+arising from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose,
+including commercial applications, and to alter it and redistribute it
+freely, subject to the following restrictions:
+
+    1. The origin of this software must not be misrepresented; you must not
+    claim that you wrote the original software. If you use this software
+    in a product, an acknowledgment in the product documentation would be
+    appreciated but is not required.
+
+    2. Altered source versions must be plainly marked as such, and must not be
+    misrepresented as being the original software.
+
+    3. This notice may not be removed or altered from any source
+    distribution.
+*/
+
+/*
+The manual and changelog are in the header file "lodepng.h"
+Rename this file to lodepng.cpp to use it for C++, or to lodepng.c to use it for C.
+*/
+
+#include "lodepng.h"
+
+#ifdef LODEPNG_COMPILE_DISK
+#include <limits.h> /* LONG_MAX */
+#include <stdio.h> /* file handling */
+#endif /* LODEPNG_COMPILE_DISK */
+
+#ifdef LODEPNG_COMPILE_ALLOCATORS
+#include <stdlib.h> /* allocations */
+#endif /* LODEPNG_COMPILE_ALLOCATORS */
+
+#if defined(_MSC_VER) && (_MSC_VER >= 1310) /*Visual Studio: A few warning types are not desired here.*/
+#pragma warning( disable : 4244 ) /*implicit conversions: not warned by gcc -Wall -Wextra and requires too much casts*/
+#pragma warning( disable : 4996 ) /*VS does not like fopen, but fopen_s is not standard C so unusable here*/
+#endif /*_MSC_VER */
+
+const char* LODEPNG_VERSION_STRING = "20191219";
+
+/*
+This source file is built up in the following large parts. The code sections
+with the "LODEPNG_COMPILE_" #defines divide this up further in an intermixed way.
+-Tools for C and common code for PNG and Zlib
+-C Code for Zlib (huffman, deflate, ...)
+-C Code for PNG (file format chunks, adam7, PNG filters, color conversions, ...)
+-The C++ wrapper around all of the above
+*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // Tools for C, and common code for PNG and Zlib.                       // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*The malloc, realloc and free functions defined here with "lodepng_" in front
+of the name, so that you can easily change them to others related to your
+platform if needed. Everything else in the code calls these. Pass
+-DLODEPNG_NO_COMPILE_ALLOCATORS to the compiler, or comment out
+#define LODEPNG_COMPILE_ALLOCATORS in the header, to disable the ones here and
+define them in your own project's source files without needing to change
+lodepng source code. Don't forget to remove "static" if you copypaste them
+from here.*/
+
+#ifdef LODEPNG_COMPILE_ALLOCATORS
+static void* lodepng_malloc(size_t size) {
+#ifdef LODEPNG_MAX_ALLOC
+  if(size > LODEPNG_MAX_ALLOC) return 0;
+#endif
+  return malloc(size);
+}
+
+static void* lodepng_realloc(void* ptr, size_t new_size) {
+#ifdef LODEPNG_MAX_ALLOC
+  if(new_size > LODEPNG_MAX_ALLOC) return 0;
+#endif
+  return realloc(ptr, new_size);
+}
+
+static void lodepng_free(void* ptr) {
+  free(ptr);
+}
+#else /*LODEPNG_COMPILE_ALLOCATORS*/
+/* TODO: support giving additional void* payload to the custom allocators */
+void* lodepng_malloc(size_t size);
+void* lodepng_realloc(void* ptr, size_t new_size);
+void lodepng_free(void* ptr);
+#endif /*LODEPNG_COMPILE_ALLOCATORS*/
+
+/* convince the compiler to inline a function, for use when this measurably improves performance */
+/* inline is not available in C90, but use it when supported by the compiler */
+#if (defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L)) || (defined(__cplusplus) && (__cplusplus >= 199711L))
+#define LODEPNG_INLINE inline
+#else
+#define LODEPNG_INLINE /* not available */
+#endif
+
+/* restrict is not available in C90, but use it when supported by the compiler */
+#if (defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1))) ||\
+    (defined(_MSC_VER) && (_MSC_VER >= 1400)) || \
+    (defined(__WATCOMC__) && (__WATCOMC__ >= 1250) && !defined(__cplusplus))
+#define LODEPNG_RESTRICT __restrict
+#else
+#define LODEPNG_RESTRICT /* not available */
+#endif
+
+/* Replacements for C library functions memcpy and strlen, to support those platforms
+where a full C library is not available. The compiler can recognize them and compile
+to something as fast. */
+
+static void lodepng_memcpy(void* LODEPNG_RESTRICT dst,
+                           const void* LODEPNG_RESTRICT src, size_t size) {
+  size_t i;
+  for(i = 0; i < size; i++) ((char*)dst)[i] = ((const char*)src)[i];
+}
+
+/* does not check memory out of bounds, do not use on untrusted data */
+static size_t lodepng_strlen(const char* a) {
+  const char* orig = a;
+  /* avoid warning about unused function in case of disabled COMPILE... macros */
+  (void)lodepng_strlen;
+  while(*a) a++;
+  return (size_t)(a - orig);
+}
+
+#define LODEPNG_MAX(a, b) (((a) > (b)) ? (a) : (b))
+#define LODEPNG_MIN(a, b) (((a) < (b)) ? (a) : (b))
+#define LODEPNG_ABS(x) ((x) < 0 ? -(x) : (x))
+
+
+#ifdef LODEPNG_COMPILE_DECODER
+/* Safely check if multiplying two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_mulofl(size_t a, size_t b, size_t* result) {
+  *result = a * b; /* Unsigned multiplication is well defined and safe in C90 */
+  return (a != 0 && *result / a != b);
+}
+
+/* Safely check if adding two integers will overflow (no undefined
+behavior, compiler removing the code, etc...) and output result. */
+static int lodepng_addofl(size_t a, size_t b, size_t* result) {
+  *result = a + b; /* Unsigned addition is well defined and safe in C90 */
+  return *result < a;
+}
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/* Safely check if a + b > c, even if overflow could happen. */
+static int lodepng_gtofl(size_t a, size_t b, size_t c) {
+  size_t d;
+  if(lodepng_addofl(a, b, &d)) return 1;
+  return d > c;
+}
+#endif /*LODEPNG_COMPILE_ZLIB*/
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+
+/*
+Often in case of an error a value is assigned to a variable and then it breaks
+out of a loop (to go to the cleanup phase of a function). This macro does that.
+It makes the error handling code shorter and more readable.
+
+Example: if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83);
+*/
+#define CERROR_BREAK(errorvar, code){\
+  errorvar = code;\
+  break;\
+}
+
+/*version of CERROR_BREAK that assumes the common case where the error variable is named "error"*/
+#define ERROR_BREAK(code) CERROR_BREAK(error, code)
+
+/*Set error var to the error code, and return it.*/
+#define CERROR_RETURN_ERROR(errorvar, code){\
+  errorvar = code;\
+  return code;\
+}
+
+/*Try the code, if it returns error, also return the error.*/
+#define CERROR_TRY_RETURN(call){\
+  unsigned error = call;\
+  if(error) return error;\
+}
+
+/*Set error var to the error code, and return from the void function.*/
+#define CERROR_RETURN(errorvar, code){\
+  errorvar = code;\
+  return;\
+}
+
+/*
+About uivector, ucvector and string:
+-All of them wrap dynamic arrays or text strings in a similar way.
+-LodePNG was originally written in C++. The vectors replace the std::vectors that were used in the C++ version.
+-The string tools are made to avoid problems with compilers that declare things like strncat as deprecated.
+-They're not used in the interface, only internally in this file as static functions.
+-As with many other structs in this file, the init and cleanup functions serve as ctor and dtor.
+*/
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_ENCODER
+/*dynamic vector of unsigned ints*/
+typedef struct uivector {
+  unsigned* data;
+  size_t size; /*size in number of unsigned longs*/
+  size_t allocsize; /*allocated size in bytes*/
+} uivector;
+
+static void uivector_cleanup(void* p) {
+  ((uivector*)p)->size = ((uivector*)p)->allocsize = 0;
+  lodepng_free(((uivector*)p)->data);
+  ((uivector*)p)->data = NULL;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_reserve(uivector* p, size_t allocsize) {
+  if(allocsize > p->allocsize) {
+    size_t newsize = (allocsize > p->allocsize * 2u) ? allocsize : ((allocsize * 3u) >> 1u);
+    void* data = lodepng_realloc(p->data, newsize);
+    if(data) {
+      p->allocsize = newsize;
+      p->data = (unsigned*)data;
+    }
+    else return 0; /*error: not enough memory*/
+  }
+  return 1;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_resize(uivector* p, size_t size) {
+  if(!uivector_reserve(p, size * sizeof(unsigned))) return 0;
+  p->size = size;
+  return 1; /*success*/
+}
+
+/*resize and give all new elements the value*/
+static unsigned uivector_resizev(uivector* p, size_t size, unsigned value) {
+  size_t oldsize = p->size, i;
+  if(!uivector_resize(p, size)) return 0;
+  for(i = oldsize; i < size; ++i) p->data[i] = value;
+  return 1;
+}
+
+static void uivector_init(uivector* p) {
+  p->data = NULL;
+  p->size = p->allocsize = 0;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned uivector_push_back(uivector* p, unsigned c) {
+  if(!uivector_resize(p, p->size + 1)) return 0;
+  p->data[p->size - 1] = c;
+  return 1;
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+/*dynamic vector of unsigned chars*/
+typedef struct ucvector {
+  unsigned char* data;
+  size_t size; /*used size*/
+  size_t allocsize; /*allocated size*/
+} ucvector;
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_reserve(ucvector* p, size_t allocsize) {
+  if(allocsize > p->allocsize) {
+    size_t newsize = (allocsize > p->allocsize * 2u) ? allocsize : ((allocsize * 3u) >> 1u);
+    void* data = lodepng_realloc(p->data, newsize);
+    if(data) {
+      p->allocsize = newsize;
+      p->data = (unsigned char*)data;
+    }
+    else return 0; /*error: not enough memory*/
+  }
+  return 1;
+}
+
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_resize(ucvector* p, size_t size) {
+  if(!ucvector_reserve(p, size * sizeof(unsigned char))) return 0;
+  p->size = size;
+  return 1; /*success*/
+}
+
+#ifdef LODEPNG_COMPILE_PNG
+
+static void ucvector_cleanup(void* p) {
+  ((ucvector*)p)->size = ((ucvector*)p)->allocsize = 0;
+  lodepng_free(((ucvector*)p)->data);
+  ((ucvector*)p)->data = NULL;
+}
+
+static void ucvector_init(ucvector* p) {
+  p->data = NULL;
+  p->size = p->allocsize = 0;
+}
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ZLIB
+/*you can both convert from vector to buffer&size and vice versa. If you use
+init_buffer to take over a buffer and size, it is not needed to use cleanup*/
+static void ucvector_init_buffer(ucvector* p, unsigned char* buffer, size_t size) {
+  p->data = buffer;
+  p->allocsize = p->size = size;
+}
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+#if (defined(LODEPNG_COMPILE_PNG) && defined(LODEPNG_COMPILE_ANCILLARY_CHUNKS)) || defined(LODEPNG_COMPILE_ENCODER)
+/*returns 1 if success, 0 if failure ==> nothing done*/
+static unsigned ucvector_push_back(ucvector* p, unsigned char c) {
+  if(!ucvector_resize(p, p->size + 1)) return 0;
+  p->data[p->size - 1] = c;
+  return 1;
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
+
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+/*free string pointer and set it to NULL*/
+static void string_cleanup(char** out) {
+  lodepng_free(*out);
+  *out = NULL;
+}
+
+/* dynamically allocates a new string with a copy of the null terminated input text */
+static char* alloc_string(const char* in) {
+  size_t insize = lodepng_strlen(in);
+  char* out = (char*)lodepng_malloc(insize + 1);
+  if(out) {
+    size_t i;
+    for(i = 0; i != insize; ++i) {
+      out[i] = in[i];
+    }
+    out[i] = 0;
+  }
+  return out;
+}
+#endif /*LODEPNG_COMPILE_ANCILLARY_CHUNKS*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#if defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG)
+static unsigned lodepng_read32bitInt(const unsigned char* buffer) {
+  return (((unsigned)buffer[0] << 24u) | ((unsigned)buffer[1] << 16u) |
+         ((unsigned)buffer[2] << 8u) | (unsigned)buffer[3]);
+}
+#endif /*defined(LODEPNG_COMPILE_DECODER) || defined(LODEPNG_COMPILE_PNG)*/
+
+#if defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)
+/*buffer must have at least 4 allocated bytes available*/
+static void lodepng_set32bitInt(unsigned char* buffer, unsigned value) {
+  buffer[0] = (unsigned char)((value >> 24) & 0xff);
+  buffer[1] = (unsigned char)((value >> 16) & 0xff);
+  buffer[2] = (unsigned char)((value >>  8) & 0xff);
+  buffer[3] = (unsigned char)((value      ) & 0xff);
+}
+#endif /*defined(LODEPNG_COMPILE_PNG) || defined(LODEPNG_COMPILE_ENCODER)*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / File IO                                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DISK
+
+/* returns negative value on error. This should be pure C compatible, so no fstat. */
+static long lodepng_filesize(const char* filename) {
+  FILE* file;
+  long size;
+  file = fopen(filename, "rb");
+  if(!file) return -1;
+
+  if(fseek(file, 0, SEEK_END) != 0) {
+    fclose(file);
+    return -1;
+  }
+
+  size = ftell(file);
+  /* It may give LONG_MAX as directory size, this is invalid for us. */
+  if(size == LONG_MAX) size = -1;
+
+  fclose(file);
+  return size;
+}
+
+/* load file into buffer that already has the correct allocated size. Returns error code.*/
+static unsigned lodepng_buffer_file(unsigned char* out, size_t size, const char* filename) {
+  FILE* file;
+  size_t readsize;
+  file = fopen(filename, "rb");
+  if(!file) return 78;
+
+  readsize = fread(out, 1, size, file);
+  fclose(file);
+
+  if (readsize != size) return 78;
+  return 0;
+}
+
+unsigned lodepng_load_file(unsigned char** out, size_t* outsize, const char* filename) {
+  long size = lodepng_filesize(filename);
+  if (size < 0) return 78;
+  *outsize = (size_t)size;
+
+  *out = (unsigned char*)lodepng_malloc((size_t)size);
+  if(!(*out) && size > 0) return 83; /*the above malloc failed*/
+
+  return lodepng_buffer_file(*out, (size_t)size, filename);
+}
+
+/*write given buffer to the file, overwriting the file, it doesn't append to it.*/
+unsigned lodepng_save_file(const unsigned char* buffer, size_t buffersize, const char* filename) {
+  FILE* file;
+  file = fopen(filename, "wb" );
+  if(!file) return 79;
+  fwrite(buffer, 1, buffersize, file);
+  fclose(file);
+  return 0;
+}
+
+#endif /*LODEPNG_COMPILE_DISK*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of common code and tools. Begin of Zlib related code.            // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ZLIB
+#ifdef LODEPNG_COMPILE_ENCODER
+
+typedef struct {
+  ucvector* data;
+  size_t bp;
+} LodePNGBitWriter;
+
+void LodePNGBitWriter_init(LodePNGBitWriter* writer, ucvector* data) {
+  writer->data = data;
+  writer->bp = 0;
+}
+
+/*TODO: this ignores potential out of memory errors*/
+#define WRITEBIT(/*size_t**/ writer, /*unsigned char*/ bit){\
+  /* append new byte */\
+  if(((writer->bp) & 7u) == 0) ucvector_push_back(writer->data, (unsigned char)0);\
+  (writer->data->data[writer->data->size - 1]) |= (bit << ((writer->bp) & 7u));\
+  ++writer->bp;\
+}
+
+/* LSB of value is written first, and LSB of bytes is used first */
+static void writeBits(LodePNGBitWriter* writer, unsigned value, size_t nbits) {
+  if(nbits == 1) { /* compiler should statically compile this case if nbits == 1 */
+    WRITEBIT(writer, value);
+  } else {
+    /* TODO: increase output size nly once here rather than in each WRITEBIT */
+    size_t i;
+    for(i = 0; i != nbits; ++i) {
+      WRITEBIT(writer, (unsigned char)((value >> i) & 1));
+    }
+  }
+}
+
+/* This one is to use for adding huffman symbol, the value bits are written MSB first */
+static void writeBitsReversed(LodePNGBitWriter* writer, unsigned value, size_t nbits) {
+  size_t i;
+  for(i = 0; i != nbits; ++i) {
+    /* TODO: increase output size only once here rather than in each WRITEBIT */
+    WRITEBIT(writer, (unsigned char)((value >> (nbits - 1u - i)) & 1u));
+  }
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+typedef struct {
+  const unsigned char* data;
+  size_t size; /*size of data in bytes*/
+  size_t bitsize; /*size of data in bits, end of valid bp values, should be 8*size*/
+  size_t bp;
+  unsigned buffer; /*buffer for reading bits. NOTE: 'unsigned' must support at least 32 bits*/
+} LodePNGBitReader;
+
+/* data size argument is in bytes. Returns error if size too large causing overflow */
+static unsigned LodePNGBitReader_init(LodePNGBitReader* reader, const unsigned char* data, size_t size) {
+  size_t temp;
+  reader->data = data;
+  reader->size = size;
+  /* size in bits, return error if overflow (if size_t is 32 bit this supports up to 500MB)  */
+  if(lodepng_mulofl(size, 8u, &reader->bitsize)) return 105;
+  /*ensure incremented bp can be compared to bitsize without overflow even when it would be incremented 32 too much and
+  trying to ensure 32 more bits*/
+  if(lodepng_addofl(reader->bitsize, 64u, &temp)) return 105;
+  reader->bp = 0;
+  reader->buffer = 0;
+  return 0; /*ok*/
+}
+
+/*
+ensureBits functions:
+Ensures the reader can at least read nbits bits in one or more readBits calls,
+safely even if not enough bits are available.
+Returns 1 if there are enough bits available, 0 if not.
+*/
+
+/*See ensureBits documentation above. This one ensures exactly 1 bit */
+/*static unsigned ensureBits1(LodePNGBitReader* reader) {
+  if(reader->bp >= reader->bitsize) return 0;
+  reader->buffer = (unsigned)reader->data[reader->bp >> 3u] >> (reader->bp & 7u);
+  return 1;
+}*/
+
+/*See ensureBits documentation above. This one ensures up to 9 bits */
+static unsigned ensureBits9(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 1u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u);
+    reader->buffer >>= (reader->bp & 7u);
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/*See ensureBits documentation above. This one ensures up to 17 bits */
+static unsigned ensureBits17(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 2u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u);
+    reader->buffer >>= (reader->bp & 7u);
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/*See ensureBits documentation above. This one ensures up to 25 bits */
+static LODEPNG_INLINE unsigned ensureBits25(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 3u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u);
+    reader->buffer >>= (reader->bp & 7u);
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+    if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u);
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/*See ensureBits documentation above. This one ensures up to 32 bits */
+static LODEPNG_INLINE unsigned ensureBits32(LodePNGBitReader* reader, size_t nbits) {
+  size_t start = reader->bp >> 3u;
+  size_t size = reader->size;
+  if(start + 4u < size) {
+    reader->buffer = (unsigned)reader->data[start + 0] | ((unsigned)reader->data[start + 1] << 8u) |
+                     ((unsigned)reader->data[start + 2] << 16u) | ((unsigned)reader->data[start + 3] << 24u);
+    reader->buffer >>= (reader->bp & 7u);
+    reader->buffer |= (((unsigned)reader->data[start + 4] << 24u) << (8u - (reader->bp & 7u)));
+    return 1;
+  } else {
+    reader->buffer = 0;
+    if(start + 0u < size) reader->buffer |= reader->data[start + 0];
+    if(start + 1u < size) reader->buffer |= ((unsigned)reader->data[start + 1] << 8u);
+    if(start + 2u < size) reader->buffer |= ((unsigned)reader->data[start + 2] << 16u);
+    if(start + 3u < size) reader->buffer |= ((unsigned)reader->data[start + 3] << 24u);
+    reader->buffer >>= (reader->bp & 7u);
+    return reader->bp + nbits <= reader->bitsize;
+  }
+}
+
+/* Get bits without advancing the bit pointer. Must have enough bits available with ensureBits. Max nbits is 31. */
+static unsigned peekBits(LodePNGBitReader* reader, size_t nbits) {
+  /* The shift allows nbits to be only up to 31. */
+  return reader->buffer & ((1u << nbits) - 1u);
+}
+
+/* Must have enough bits available with ensureBits */
+static void advanceBits(LodePNGBitReader* reader, size_t nbits) {
+  reader->buffer >>= nbits;
+  reader->bp += nbits;
+}
+
+/* Must have enough bits available with ensureBits */
+static unsigned readBits(LodePNGBitReader* reader, size_t nbits) {
+  unsigned result = peekBits(reader, nbits);
+  advanceBits(reader, nbits);
+  return result;
+}
+
+/* Public for testing only. steps and result must have numsteps values. */
+unsigned lode_png_test_bitreader(const unsigned char* data, size_t size,
+                                 size_t numsteps, const size_t* steps, unsigned* result) {
+  size_t i;
+  LodePNGBitReader reader;
+  LodePNGBitReader_init(&reader, data, size);
+  for(i = 0; i < numsteps; i++) {
+    size_t step = steps[i];
+    unsigned ok;
+    if(step > 25) ok = ensureBits32(&reader, step);
+    else if(step > 17) ok = ensureBits25(&reader, step);
+    else if(step > 9) ok = ensureBits17(&reader, step);
+    else ok = ensureBits9(&reader, step);
+    if(!ok) return 0;
+    result[i] = readBits(&reader, step);
+  }
+  return 1;
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+static unsigned reverseBits(unsigned bits, unsigned num) {
+  /*TODO: implement faster lookup table based version when needed*/
+  unsigned i, result = 0;
+  for(i = 0; i < num; i++) result |= ((bits >> (num - i - 1u)) & 1u) << i;
+  return result;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflate - Huffman                                                      / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#define FIRST_LENGTH_CODE_INDEX 257
+#define LAST_LENGTH_CODE_INDEX 285
+/*256 literals, the end code, some length codes, and 2 unused codes*/
+#define NUM_DEFLATE_CODE_SYMBOLS 288
+/*the distance codes have their own symbols, 30 used, 2 unused*/
+#define NUM_DISTANCE_SYMBOLS 32
+/*the code length codes. 0-15: code lengths, 16: copy previous 3-6 times, 17: 3-10 zeros, 18: 11-138 zeros*/
+#define NUM_CODE_LENGTH_CODES 19
+
+/*the base lengths represented by codes 257-285*/
+static const unsigned LENGTHBASE[29]
+  = {3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59,
+     67, 83, 99, 115, 131, 163, 195, 227, 258};
+
+/*the extra bits used by codes 257-285 (added to base length)*/
+static const unsigned LENGTHEXTRA[29]
+  = {0, 0, 0, 0, 0, 0, 0,  0,  1,  1,  1,  1,  2,  2,  2,  2,  3,  3,  3,  3,
+      4,  4,  4,   4,   5,   5,   5,   5,   0};
+
+/*the base backwards distances (the bits of distance codes appear after length codes and use their own huffman tree)*/
+static const unsigned DISTANCEBASE[30]
+  = {1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513,
+     769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577};
+
+/*the extra bits of backwards distances (added to base)*/
+static const unsigned DISTANCEEXTRA[30]
+  = {0, 0, 0, 0, 1, 1, 2,  2,  3,  3,  4,  4,  5,  5,   6,   6,   7,   7,   8,
+       8,    9,    9,   10,   10,   11,   11,   12,    12,    13,    13};
+
+/*the order in which "code length alphabet code lengths" are stored, out of this
+the huffman tree of the dynamic huffman tree lengths is generated*/
+static const unsigned CLCL_ORDER[NUM_CODE_LENGTH_CODES]
+  = {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*
+Huffman tree struct, containing multiple representations of the tree
+*/
+typedef struct HuffmanTree {
+  unsigned* codes; /*the huffman codes (bit patterns representing the symbols)*/
+  unsigned* lengths; /*the lengths of the huffman codes*/
+  unsigned maxbitlen; /*maximum number of bits a single code can get*/
+  unsigned numcodes; /*number of symbols in the alphabet = number of codes*/
+  /* for reading only */
+  unsigned char* table_len; /*length of symbol from lookup table, or max length if secondary lookup needed*/
+  unsigned short* table_value; /*value of symbol from lookup table, or pointer to secondary table if needed*/
+} HuffmanTree;
+
+static void HuffmanTree_init(HuffmanTree* tree) {
+  tree->codes = 0;
+  tree->lengths = 0;
+  tree->table_len = 0;
+  tree->table_value = 0;
+}
+
+static void HuffmanTree_cleanup(HuffmanTree* tree) {
+  lodepng_free(tree->codes);
+  lodepng_free(tree->lengths);
+  lodepng_free(tree->table_len);
+  lodepng_free(tree->table_value);
+}
+
+/* amount of bits for first huffman table lookup (aka root bits), see HuffmanTree_makeTable and huffmanDecodeSymbol.*/
+/* values 8u and 9u work the fastest */
+#define FIRSTBITS 9u
+
+/* a symbol value too big to represent any valid symbol, to indicate reading disallowed huffman bits combination,
+which is possible in case of only 0 or 1 present symbols. */
+#define INVALIDSYMBOL 65535u
+
+/* make table for huffman decoding */
+static unsigned HuffmanTree_makeTable(HuffmanTree* tree) {
+  static const unsigned headsize = 1u << FIRSTBITS; /*size of the first table*/
+  static const unsigned mask = (1u << FIRSTBITS) /*headsize*/ - 1u;
+  size_t i, numpresent, pointer, size; /*total table size*/
+  unsigned* maxlens = (unsigned*)lodepng_malloc(headsize * sizeof(unsigned));
+  if(!maxlens) return 83; /*alloc fail*/
+
+  /* compute maxlens: max total bit length of symbols sharing prefix in the first table*/
+  for(i = 0; i < headsize; ++i) maxlens[i] = 0;
+  for(i = 0; i < tree->numcodes; i++) {
+    unsigned symbol = tree->codes[i];
+    unsigned l = tree->lengths[i];
+    unsigned index;
+    if(l <= FIRSTBITS) continue; /*symbols that fit in first table don't increase secondary table size*/
+    /*get the FIRSTBITS MSBs, the MSBs of the symbol are encoded first. See later comment about the reversing*/
+    index = reverseBits(symbol >> (l - FIRSTBITS), FIRSTBITS);
+    maxlens[index] = LODEPNG_MAX(maxlens[index], l);
+  }
+  /* compute total table size: size of first table plus all secondary tables for symbols longer than FIRSTBITS */
+  size = headsize;
+  for(i = 0; i < headsize; ++i) {
+    unsigned l = maxlens[i];
+    if(l > FIRSTBITS) size += (1u << (l - FIRSTBITS));
+  }
+  tree->table_len = (unsigned char*)lodepng_malloc(size * sizeof(*tree->table_len));
+  tree->table_value = (unsigned short*)lodepng_malloc(size * sizeof(*tree->table_value));
+  if(!tree->table_len || !tree->table_value) {
+    lodepng_free(maxlens);
+    /* freeing tree->table values is done at a higher scope */
+    return 83; /*alloc fail*/
+  }
+  /*initialize with an invalid length to indicate unused entries*/
+  for(i = 0; i < size; ++i) tree->table_len[i] = 16;
+
+  /*fill in the first table for long symbols: max prefix size and pointer to secondary tables*/
+  pointer = headsize;
+  for(i = 0; i < headsize; ++i) {
+    unsigned l = maxlens[i];
+    if(l <= FIRSTBITS) continue;
+    tree->table_len[i] = l;
+    tree->table_value[i] = pointer;
+    pointer += (1u << (l - FIRSTBITS));
+  }
+  lodepng_free(maxlens);
+
+  /*fill in the first table for short symbols, or secondary table for long symbols*/
+  numpresent = 0;
+  for(i = 0; i < tree->numcodes; ++i) {
+    unsigned l = tree->lengths[i];
+    unsigned symbol = tree->codes[i]; /*the huffman bit pattern. i itself is the value.*/
+    /*reverse bits, because the huffman bits are given in MSB first order but the bit reader reads LSB first*/
+    unsigned reverse = reverseBits(symbol, l);
+    if(l == 0) continue;
+    numpresent++;
+
+    if(l <= FIRSTBITS) {
+      /*short symbol, fully in first table, replicated num times if l < FIRSTBITS*/
+      unsigned num = 1u << (FIRSTBITS - l);
+      unsigned j;
+      for(j = 0; j < num; ++j) {
+        /*bit reader will read the l bits of symbol first, the remaining FIRSTBITS - l bits go to the MSB's*/
+        unsigned index = reverse | (j << l);
+        if(tree->table_len[index] != 16) return 55; /*invalid tree: long symbol shares prefix with short symbol*/
+        tree->table_len[index] = l;
+        tree->table_value[index] = i;
+      }
+    } else {
+      /*long symbol, shares prefix with other long symbols in first lookup table, needs second lookup*/
+      /*the FIRSTBITS MSBs of the symbol are the first table index*/
+      unsigned index = reverse & mask;
+      unsigned maxlen = tree->table_len[index];
+      /*log2 of secondary table length, should be >= l - FIRSTBITS*/
+      unsigned tablelen = maxlen - FIRSTBITS;
+      unsigned start = tree->table_value[index]; /*starting index in secondary table*/
+      unsigned num = 1u << (tablelen - (l - FIRSTBITS)); /*amount of entries of this symbol in secondary table*/
+      unsigned j;
+      if(maxlen < l) return 55; /*invalid tree: long symbol shares prefix with short symbol*/
+      for(j = 0; j < num; ++j) {
+        unsigned reverse2 = reverse >> FIRSTBITS; /* l - FIRSTBITS bits */
+        unsigned index2 = start + (reverse2 | (j << (l - FIRSTBITS)));
+        tree->table_len[index2] = l;
+        tree->table_value[index2] = i;
+      }
+    }
+  }
+
+  if(numpresent < 2) {
+    /* In case of exactly 1 symbol, in theory the huffman symbol needs 0 bits,
+    but deflate uses 1 bit instead. In case of 0 symbols, no symbols can
+    appear at all, but such huffman tree could still exist (e.g. if distance
+    codes are never used). In both cases, not all symbols of the table will be
+    filled in. Fill them in with an invalid symbol value so returning them from
+    huffmanDecodeSymbol will cause error. */
+    for(i = 0; i < size; ++i) {
+      if(tree->table_len[i] == 16) {
+        /* As length, use a value smaller than FIRSTBITS for the head table,
+        and a value larger than FIRSTBITS for the secondary table, to ensure
+        valid behavior for advanceBits when reading this symbol. */
+        tree->table_len[i] = (i < headsize) ? 1 : (FIRSTBITS + 1);
+        tree->table_value[i] = INVALIDSYMBOL;
+      }
+    }
+  } else {
+    /* A good huffman tree has N * 2 - 1 nodes, of which N - 1 are internal nodes.
+    If that is not the case (due to too long length codes), the table will not
+    have been fully used, and this is an error (not all bit combinations can be
+    decoded): an oversubscribed huffman tree, indicated by error 55. */
+    for(i = 0; i < size; ++i) {
+      if(tree->table_len[i] == 16) return 55;
+    }
+  }
+
+  return 0;
+}
+
+/*
+Second step for the ...makeFromLengths and ...makeFromFrequencies functions.
+numcodes, lengths and maxbitlen must already be filled in correctly. return
+value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths2(HuffmanTree* tree) {
+  unsigned* blcount;
+  unsigned* nextcode;
+  unsigned error = 0;
+  unsigned bits, n;
+
+  tree->codes = (unsigned*)lodepng_malloc(tree->numcodes * sizeof(unsigned));
+  blcount = (unsigned*)lodepng_malloc((tree->maxbitlen + 1) * sizeof(unsigned));
+  nextcode = (unsigned*)lodepng_malloc((tree->maxbitlen + 1) * sizeof(unsigned));
+  if(!tree->codes || !blcount || !nextcode) error = 83; /*alloc fail*/
+
+  if(!error) {
+    for(n = 0; n != tree->maxbitlen + 1; n++) blcount[n] = nextcode[n] = 0;
+    /*step 1: count number of instances of each code length*/
+    for(bits = 0; bits != tree->numcodes; ++bits) ++blcount[tree->lengths[bits]];
+    /*step 2: generate the nextcode values*/
+    for(bits = 1; bits <= tree->maxbitlen; ++bits) {
+      nextcode[bits] = (nextcode[bits - 1] + blcount[bits - 1]) << 1u;
+    }
+    /*step 3: generate all the codes*/
+    for(n = 0; n != tree->numcodes; ++n) {
+      if(tree->lengths[n] != 0) {
+        tree->codes[n] = nextcode[tree->lengths[n]]++;
+        /*remove superfluous bits from the code*/
+        tree->codes[n] &= ((1u << tree->lengths[n]) - 1u);
+      }
+    }
+  }
+
+  lodepng_free(blcount);
+  lodepng_free(nextcode);
+
+  if(!error) error = HuffmanTree_makeTable(tree);
+  return error;
+}
+
+/*
+given the code lengths (as stored in the PNG file), generate the tree as defined
+by Deflate. maxbitlen is the maximum bits that a code in the tree can have.
+return value is error.
+*/
+static unsigned HuffmanTree_makeFromLengths(HuffmanTree* tree, const unsigned* bitlen,
+                                            size_t numcodes, unsigned maxbitlen) {
+  unsigned i;
+  tree->lengths = (unsigned*)lodepng_malloc(numcodes * sizeof(unsigned));
+  if(!tree->lengths) return 83; /*alloc fail*/
+  for(i = 0; i != numcodes; ++i) tree->lengths[i] = bitlen[i];
+  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+  tree->maxbitlen = maxbitlen;
+  return HuffmanTree_makeFromLengths2(tree);
+}
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*BPM: Boundary Package Merge, see "A Fast and Space-Economical Algorithm for Length-Limited Coding",
+Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/
+
+/*chain node for boundary package merge*/
+typedef struct BPMNode {
+  int weight; /*the sum of all weights in this chain*/
+  unsigned index; /*index of this leaf node (called "count" in the paper)*/
+  struct BPMNode* tail; /*the next nodes in this chain (null if last)*/
+  int in_use;
+} BPMNode;
+
+/*lists of chains*/
+typedef struct BPMLists {
+  /*memory pool*/
+  unsigned memsize;
+  BPMNode* memory;
+  unsigned numfree;
+  unsigned nextfree;
+  BPMNode** freelist;
+  /*two heads of lookahead chains per list*/
+  unsigned listsize;
+  BPMNode** chains0;
+  BPMNode** chains1;
+} BPMLists;
+
+/*creates a new chain node with the given parameters, from the memory in the lists */
+static BPMNode* bpmnode_create(BPMLists* lists, int weight, unsigned index, BPMNode* tail) {
+  unsigned i;
+  BPMNode* result;
+
+  /*memory full, so garbage collect*/
+  if(lists->nextfree >= lists->numfree) {
+    /*mark only those that are in use*/
+    for(i = 0; i != lists->memsize; ++i) lists->memory[i].in_use = 0;
+    for(i = 0; i != lists->listsize; ++i) {
+      BPMNode* node;
+      for(node = lists->chains0[i]; node != 0; node = node->tail) node->in_use = 1;
+      for(node = lists->chains1[i]; node != 0; node = node->tail) node->in_use = 1;
+    }
+    /*collect those that are free*/
+    lists->numfree = 0;
+    for(i = 0; i != lists->memsize; ++i) {
+      if(!lists->memory[i].in_use) lists->freelist[lists->numfree++] = &lists->memory[i];
+    }
+    lists->nextfree = 0;
+  }
+
+  result = lists->freelist[lists->nextfree++];
+  result->weight = weight;
+  result->index = index;
+  result->tail = tail;
+  return result;
+}
+
+/*sort the leaves with stable mergesort*/
+static void bpmnode_sort(BPMNode* leaves, size_t num) {
+  BPMNode* mem = (BPMNode*)lodepng_malloc(sizeof(*leaves) * num);
+  size_t width, counter = 0;
+  for(width = 1; width < num; width *= 2) {
+    BPMNode* a = (counter & 1) ? mem : leaves;
+    BPMNode* b = (counter & 1) ? leaves : mem;
+    size_t p;
+    for(p = 0; p < num; p += 2 * width) {
+      size_t q = (p + width > num) ? num : (p + width);
+      size_t r = (p + 2 * width > num) ? num : (p + 2 * width);
+      size_t i = p, j = q, k;
+      for(k = p; k < r; k++) {
+        if(i < q && (j >= r || a[i].weight <= a[j].weight)) b[k] = a[i++];
+        else b[k] = a[j++];
+      }
+    }
+    counter++;
+  }
+  if(counter & 1) lodepng_memcpy(leaves, mem, sizeof(*leaves) * num);
+  lodepng_free(mem);
+}
+
+/*Boundary Package Merge step, numpresent is the amount of leaves, and c is the current chain.*/
+static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int c, int num) {
+  unsigned lastindex = lists->chains1[c]->index;
+
+  if(c == 0) {
+    if(lastindex >= numpresent) return;
+    lists->chains0[c] = lists->chains1[c];
+    lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, 0);
+  } else {
+    /*sum of the weights of the head nodes of the previous lookahead chains.*/
+    int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight;
+    lists->chains0[c] = lists->chains1[c];
+    if(lastindex < numpresent && sum > leaves[lastindex].weight) {
+      lists->chains1[c] = bpmnode_create(lists, leaves[lastindex].weight, lastindex + 1, lists->chains1[c]->tail);
+      return;
+    }
+    lists->chains1[c] = bpmnode_create(lists, sum, lastindex, lists->chains1[c - 1]);
+    /*in the end we are only interested in the chain of the last list, so no
+    need to recurse if we're at the last one (this gives measurable speedup)*/
+    if(num + 1 < (int)(2 * numpresent - 2)) {
+      boundaryPM(lists, leaves, numpresent, c - 1, num);
+      boundaryPM(lists, leaves, numpresent, c - 1, num);
+    }
+  }
+}
+
+unsigned lodepng_huffman_code_lengths(unsigned* lengths, const unsigned* frequencies,
+                                      size_t numcodes, unsigned maxbitlen) {
+  unsigned error = 0;
+  unsigned i;
+  size_t numpresent = 0; /*number of symbols with non-zero frequency*/
+  BPMNode* leaves; /*the symbols, only those with > 0 frequency*/
+
+  if(numcodes == 0) return 80; /*error: a tree of 0 symbols is not supposed to be made*/
+  if((1u << maxbitlen) < (unsigned)numcodes) return 80; /*error: represent all symbols*/
+
+  leaves = (BPMNode*)lodepng_malloc(numcodes * sizeof(*leaves));
+  if(!leaves) return 83; /*alloc fail*/
+
+  for(i = 0; i != numcodes; ++i) {
+    if(frequencies[i] > 0) {
+      leaves[numpresent].weight = (int)frequencies[i];
+      leaves[numpresent].index = i;
+      ++numpresent;
+    }
+  }
+
+  for(i = 0; i != numcodes; ++i) lengths[i] = 0;
+
+  /*ensure at least two present symbols. There should be at least one symbol
+  according to RFC 1951 section 3.2.7. Some decoders incorrectly require two. To
+  make these work as well ensure there are at least two symbols. The
+  Package-Merge code below also doesn't work correctly if there's only one
+  symbol, it'd give it the theoretical 0 bits but in practice zlib wants 1 bit*/
+  if(numpresent == 0) {
+    lengths[0] = lengths[1] = 1; /*note that for RFC 1951 section 3.2.7, only lengths[0] = 1 is needed*/
+  } else if(numpresent == 1) {
+    lengths[leaves[0].index] = 1;
+    lengths[leaves[0].index == 0 ? 1 : 0] = 1;
+  } else {
+    BPMLists lists;
+    BPMNode* node;
+
+    bpmnode_sort(leaves, numpresent);
+
+    lists.listsize = maxbitlen;
+    lists.memsize = 2 * maxbitlen * (maxbitlen + 1);
+    lists.nextfree = 0;
+    lists.numfree = lists.memsize;
+    lists.memory = (BPMNode*)lodepng_malloc(lists.memsize * sizeof(*lists.memory));
+    lists.freelist = (BPMNode**)lodepng_malloc(lists.memsize * sizeof(BPMNode*));
+    lists.chains0 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+    lists.chains1 = (BPMNode**)lodepng_malloc(lists.listsize * sizeof(BPMNode*));
+    if(!lists.memory || !lists.freelist || !lists.chains0 || !lists.chains1) error = 83; /*alloc fail*/
+
+    if(!error) {
+      for(i = 0; i != lists.memsize; ++i) lists.freelist[i] = &lists.memory[i];
+
+      bpmnode_create(&lists, leaves[0].weight, 1, 0);
+      bpmnode_create(&lists, leaves[1].weight, 2, 0);
+
+      for(i = 0; i != lists.listsize; ++i) {
+        lists.chains0[i] = &lists.memory[0];
+        lists.chains1[i] = &lists.memory[1];
+      }
+
+      /*each boundaryPM call adds one chain to the last list, and we need 2 * numpresent - 2 chains.*/
+      for(i = 2; i != 2 * numpresent - 2; ++i) boundaryPM(&lists, leaves, numpresent, (int)maxbitlen - 1, (int)i);
+
+      for(node = lists.chains1[maxbitlen - 1]; node; node = node->tail) {
+        for(i = 0; i != node->index; ++i) ++lengths[leaves[i].index];
+      }
+    }
+
+    lodepng_free(lists.memory);
+    lodepng_free(lists.freelist);
+    lodepng_free(lists.chains0);
+    lodepng_free(lists.chains1);
+  }
+
+  lodepng_free(leaves);
+  return error;
+}
+
+/*Create the Huffman tree given the symbol frequencies*/
+static unsigned HuffmanTree_makeFromFrequencies(HuffmanTree* tree, const unsigned* frequencies,
+                                                size_t mincodes, size_t numcodes, unsigned maxbitlen) {
+  size_t i;
+  unsigned error = 0;
+  while(!frequencies[numcodes - 1] && numcodes > mincodes) --numcodes; /*trim zeroes*/
+  tree->maxbitlen = maxbitlen;
+  tree->numcodes = (unsigned)numcodes; /*number of symbols*/
+  tree->lengths = (unsigned*)lodepng_realloc(tree->lengths, numcodes * sizeof(unsigned));
+  if(!tree->lengths) return 83; /*alloc fail*/
+  /*initialize all lengths to 0*/
+  for(i = 0; i < numcodes; i++) tree->lengths[i] = 0;
+
+  error = lodepng_huffman_code_lengths(tree->lengths, frequencies, numcodes, maxbitlen);
+  if(!error) error = HuffmanTree_makeFromLengths2(tree);
+  return error;
+}
+
+static unsigned HuffmanTree_getCode(const HuffmanTree* tree, unsigned index) {
+  return tree->codes[index];
+}
+
+static unsigned HuffmanTree_getLength(const HuffmanTree* tree, unsigned index) {
+  return tree->lengths[index];
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+/*get the literal and length code tree of a deflated block with fixed tree, as per the deflate specification*/
+static unsigned generateFixedLitLenTree(HuffmanTree* tree) {
+  unsigned i, error = 0;
+  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+  if(!bitlen) return 83; /*alloc fail*/
+
+  /*288 possible codes: 0-255=literals, 256=endcode, 257-285=lengthcodes, 286-287=unused*/
+  for(i =   0; i <= 143; ++i) bitlen[i] = 8;
+  for(i = 144; i <= 255; ++i) bitlen[i] = 9;
+  for(i = 256; i <= 279; ++i) bitlen[i] = 7;
+  for(i = 280; i <= 287; ++i) bitlen[i] = 8;
+
+  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DEFLATE_CODE_SYMBOLS, 15);
+
+  lodepng_free(bitlen);
+  return error;
+}
+
+/*get the distance code tree of a deflated block with fixed tree, as specified in the deflate specification*/
+static unsigned generateFixedDistanceTree(HuffmanTree* tree) {
+  unsigned i, error = 0;
+  unsigned* bitlen = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+  if(!bitlen) return 83; /*alloc fail*/
+
+  /*there are 32 distance codes, but 30-31 are unused*/
+  for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen[i] = 5;
+  error = HuffmanTree_makeFromLengths(tree, bitlen, NUM_DISTANCE_SYMBOLS, 15);
+
+  lodepng_free(bitlen);
+  return error;
+}
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/*
+returns the code. The bit reader must already have been ensured at least 15 bits
+*/
+static unsigned huffmanDecodeSymbol(LodePNGBitReader* reader, const HuffmanTree* codetree) {
+  unsigned short code = peekBits(reader, FIRSTBITS);
+  unsigned short l = codetree->table_len[code];
+  unsigned short value = codetree->table_value[code];
+  if(l <= FIRSTBITS) {
+    advanceBits(reader, l);
+    return value;
+  } else {
+    unsigned index2;
+    advanceBits(reader, FIRSTBITS);
+    index2 = value + peekBits(reader, l - FIRSTBITS);
+    advanceBits(reader, codetree->table_len[index2] - FIRSTBITS);
+    return codetree->table_value[index2];
+  }
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Inflator (Decompressor)                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*get the tree of a deflated block with fixed tree, as specified in the deflate specification*/
+static void getTreeInflateFixed(HuffmanTree* tree_ll, HuffmanTree* tree_d) {
+  /*TODO: check for out of memory errors*/
+  generateFixedLitLenTree(tree_ll);
+  generateFixedDistanceTree(tree_d);
+}
+
+/*get the tree of a deflated block with dynamic tree, the tree itself is also Huffman compressed with a known tree*/
+static unsigned getTreeInflateDynamic(HuffmanTree* tree_ll, HuffmanTree* tree_d,
+                                      LodePNGBitReader* reader) {
+  /*make sure that length values that aren't filled in will be 0, or a wrong tree will be generated*/
+  unsigned error = 0;
+  unsigned n, HLIT, HDIST, HCLEN, i;
+
+  /*see comments in deflateDynamic for explanation of the context and these variables, it is analogous*/
+  unsigned* bitlen_ll = 0; /*lit,len code lengths*/
+  unsigned* bitlen_d = 0; /*dist code lengths*/
+  /*code length code lengths ("clcl"), the bit lengths of the huffman tree used to compress bitlen_ll and bitlen_d*/
+  unsigned* bitlen_cl = 0;
+  HuffmanTree tree_cl; /*the code tree for code length codes (the huffman tree for compressed huffman trees)*/
+
+  if(!ensureBits17(reader, 14)) return 49; /*error: the bit pointer is or will go past the memory*/
+
+  /*number of literal/length codes + 257. Unlike the spec, the value 257 is added to it here already*/
+  HLIT =  readBits(reader, 5) + 257;
+  /*number of distance codes. Unlike the spec, the value 1 is added to it here already*/
+  HDIST = readBits(reader, 5) + 1;
+  /*number of code length codes. Unlike the spec, the value 4 is added to it here already*/
+  HCLEN = readBits(reader, 4) + 4;
+
+  bitlen_cl = (unsigned*)lodepng_malloc(NUM_CODE_LENGTH_CODES * sizeof(unsigned));
+  if(!bitlen_cl) return 83 /*alloc fail*/;
+
+  HuffmanTree_init(&tree_cl);
+
+  while(!error) {
+    /*read the code length codes out of 3 * (amount of code length codes) bits*/
+    if(lodepng_gtofl(reader->bp, HCLEN * 3, reader->bitsize)) {
+      ERROR_BREAK(50); /*error: the bit pointer is or will go past the memory*/
+    }
+    for(i = 0; i != HCLEN; ++i) {
+      ensureBits9(reader, 3); /*out of bounds already checked above */
+      bitlen_cl[CLCL_ORDER[i]] = readBits(reader, 3);
+    }
+    for(i = HCLEN; i != NUM_CODE_LENGTH_CODES; ++i) {
+      bitlen_cl[CLCL_ORDER[i]] = 0;
+    }
+
+    error = HuffmanTree_makeFromLengths(&tree_cl, bitlen_cl, NUM_CODE_LENGTH_CODES, 7);
+    if(error) break;
+
+    /*now we can use this tree to read the lengths for the tree that this function will return*/
+    bitlen_ll = (unsigned*)lodepng_malloc(NUM_DEFLATE_CODE_SYMBOLS * sizeof(unsigned));
+    bitlen_d = (unsigned*)lodepng_malloc(NUM_DISTANCE_SYMBOLS * sizeof(unsigned));
+    if(!bitlen_ll || !bitlen_d) ERROR_BREAK(83 /*alloc fail*/);
+    for(i = 0; i != NUM_DEFLATE_CODE_SYMBOLS; ++i) bitlen_ll[i] = 0;
+    for(i = 0; i != NUM_DISTANCE_SYMBOLS; ++i) bitlen_d[i] = 0;
+
+    /*i is the current symbol we're reading in the part that contains the code lengths of lit/len and dist codes*/
+    i = 0;
+    while(i < HLIT + HDIST) {
+      unsigned code;
+      ensureBits25(reader, 22); /* up to 15 bits for huffman code, up to 7 extra bits below*/
+      code = huffmanDecodeSymbol(reader, &tree_cl);
+      if(code <= 15) /*a length code*/ {
+        if(i < HLIT) bitlen_ll[i] = code;
+        else bitlen_d[i - HLIT] = code;
+        ++i;
+      } else if(code == 16) /*repeat previous*/ {
+        unsigned replength = 3; /*read in the 2 bits that indicate repeat length (3-6)*/
+        unsigned value; /*set value to the previous code*/
+
+        if(i == 0) ERROR_BREAK(54); /*can't repeat previous if i is 0*/
+
+        replength += readBits(reader, 2);
+
+        if(i < HLIT + 1) value = bitlen_ll[i - 1];
+        else value = bitlen_d[i - HLIT - 1];
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(13); /*error: i is larger than the amount of codes*/
+          if(i < HLIT) bitlen_ll[i] = value;
+          else bitlen_d[i - HLIT] = value;
+          ++i;
+        }
+      } else if(code == 17) /*repeat "0" 3-10 times*/ {
+        unsigned replength = 3; /*read in the bits that indicate repeat length*/
+        replength += readBits(reader, 3);
+
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(14); /*error: i is larger than the amount of codes*/
+
+          if(i < HLIT) bitlen_ll[i] = 0;
+          else bitlen_d[i - HLIT] = 0;
+          ++i;
+        }
+      } else if(code == 18) /*repeat "0" 11-138 times*/ {
+        unsigned replength = 11; /*read in the bits that indicate repeat length*/
+        replength += readBits(reader, 7);
+
+        /*repeat this value in the next lengths*/
+        for(n = 0; n < replength; ++n) {
+          if(i >= HLIT + HDIST) ERROR_BREAK(15); /*error: i is larger than the amount of codes*/
+
+          if(i < HLIT) bitlen_ll[i] = 0;
+          else bitlen_d[i - HLIT] = 0;
+          ++i;
+        }
+      } else /*if(code == INVALIDSYMBOL)*/ {
+        ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+      }
+      /*check if any of the ensureBits above went out of bounds*/
+      if(reader->bp > reader->bitsize) {
+        /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+        (10=no endcode, 11=wrong jump outside of tree)*/
+        /* TODO: revise error codes 10,11,50: the above comment is no longer valid */
+        ERROR_BREAK(50); /*error, bit pointer jumps past memory*/
+      }
+    }
+    if(error) break;
+
+    if(bitlen_ll[256] == 0) ERROR_BREAK(64); /*the length of the end code 256 must be larger than 0*/
+
+    /*now we've finally got HLIT and HDIST, so generate the code trees, and the function is done*/
+    error = HuffmanTree_makeFromLengths(tree_ll, bitlen_ll, NUM_DEFLATE_CODE_SYMBOLS, 15);
+    if(error) break;
+    error = HuffmanTree_makeFromLengths(tree_d, bitlen_d, NUM_DISTANCE_SYMBOLS, 15);
+
+    break; /*end of error-while*/
+  }
+
+  lodepng_free(bitlen_cl);
+  lodepng_free(bitlen_ll);
+  lodepng_free(bitlen_d);
+  HuffmanTree_cleanup(&tree_cl);
+
+  return error;
+}
+
+/*inflate a block with dynamic of fixed Huffman tree. btype must be 1 or 2.*/
+static unsigned inflateHuffmanBlock(ucvector* out, size_t* pos, LodePNGBitReader* reader,
+                                    unsigned btype) {
+  unsigned error = 0;
+  HuffmanTree tree_ll; /*the huffman tree for literal and length codes*/
+  HuffmanTree tree_d; /*the huffman tree for distance codes*/
+
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+
+  if(btype == 1) getTreeInflateFixed(&tree_ll, &tree_d);
+  else /*if(btype == 2)*/ error = getTreeInflateDynamic(&tree_ll, &tree_d, reader);
+
+  while(!error) /*decode all symbols until end reached, breaks at end code*/ {
+    /*code_ll is literal, length or end code*/
+    unsigned code_ll;
+    ensureBits25(reader, 20); /* up to 15 for the huffman symbol, up to 5 for the length extra bits */
+    code_ll = huffmanDecodeSymbol(reader, &tree_ll);
+    if(code_ll <= 255) /*literal symbol*/ {
+      /*ucvector_push_back would do the same, but for some reason the two lines below run 10% faster*/
+      if(!ucvector_resize(out, (*pos) + 1)) ERROR_BREAK(83 /*alloc fail*/);
+      out->data[*pos] = (unsigned char)code_ll;
+      ++(*pos);
+    } else if(code_ll >= FIRST_LENGTH_CODE_INDEX && code_ll <= LAST_LENGTH_CODE_INDEX) /*length code*/ {
+      unsigned code_d, distance;
+      unsigned numextrabits_l, numextrabits_d; /*extra bits for length and distance*/
+      size_t start, backward, length;
+
+      /*part 1: get length base*/
+      length = LENGTHBASE[code_ll - FIRST_LENGTH_CODE_INDEX];
+
+      /*part 2: get extra bits and add the value of that to length*/
+      numextrabits_l = LENGTHEXTRA[code_ll - FIRST_LENGTH_CODE_INDEX];
+      if(numextrabits_l != 0) {
+        /* bits already ensured above */
+        length += readBits(reader, numextrabits_l);
+      }
+
+      /*part 3: get distance code*/
+      ensureBits32(reader, 28); /* up to 15 for the huffman symbol, up to 13 for the extra bits */
+      code_d = huffmanDecodeSymbol(reader, &tree_d);
+      if(code_d > 29) {
+        if(code_d <= 31) {
+          ERROR_BREAK(18); /*error: invalid distance code (30-31 are never used)*/
+        } else /* if(code_d == INVALIDSYMBOL) */{
+          ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+        }
+      }
+      distance = DISTANCEBASE[code_d];
+
+      /*part 4: get extra bits from distance*/
+      numextrabits_d = DISTANCEEXTRA[code_d];
+      if(numextrabits_d != 0) {
+        /* bits already ensured above */
+        distance += readBits(reader, numextrabits_d);
+      }
+
+      /*part 5: fill in all the out[n] values based on the length and dist*/
+      start = (*pos);
+      if(distance > start) ERROR_BREAK(52); /*too long backward distance*/
+      backward = start - distance;
+
+      if(!ucvector_resize(out, (*pos) + length)) ERROR_BREAK(83 /*alloc fail*/);
+      if (distance < length) {
+        size_t forward;
+        lodepng_memcpy(out->data + *pos, out->data + backward, distance);
+        *pos += distance;
+        for(forward = distance; forward < length; ++forward) {
+          out->data[(*pos)++] = out->data[backward++];
+        }
+      } else {
+        lodepng_memcpy(out->data + *pos, out->data + backward, length);
+        *pos += length;
+      }
+    } else if(code_ll == 256) {
+      break; /*end code, break the loop*/
+    } else /*if(code_ll == INVALIDSYMBOL)*/ {
+      ERROR_BREAK(16); /*error: tried to read disallowed huffman symbol*/
+    }
+    /*check if any of the ensureBits above went out of bounds*/
+    if(reader->bp > reader->bitsize) {
+      /*return error code 10 or 11 depending on the situation that happened in huffmanDecodeSymbol
+      (10=no endcode, 11=wrong jump outside of tree)*/
+      /* TODO: revise error codes 10,11,50: the above comment is no longer valid */
+      ERROR_BREAK(51); /*error, bit pointer jumps past memory*/
+    }
+  }
+
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+
+  return error;
+}
+
+static unsigned inflateNoCompression(ucvector* out, size_t* pos,
+                                     LodePNGBitReader* reader, const LodePNGDecompressSettings* settings) {
+  size_t bytepos;
+  size_t size = reader->size;
+  unsigned LEN, NLEN, error = 0;
+
+  /*go to first boundary of byte*/
+  bytepos = (reader->bp + 7u) >> 3u;
+
+  /*read LEN (2 bytes) and NLEN (2 bytes)*/
+  if(bytepos + 4 >= size) return 52; /*error, bit pointer will jump past memory*/
+  LEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2;
+  NLEN = (unsigned)reader->data[bytepos] + ((unsigned)reader->data[bytepos + 1] << 8u); bytepos += 2;
+
+  /*check if 16-bit NLEN is really the one's complement of LEN*/
+  if(!settings->ignore_nlen && LEN + NLEN != 65535) {
+    return 21; /*error: NLEN is not one's complement of LEN*/
+  }
+
+  if(!ucvector_resize(out, (*pos) + LEN)) return 83; /*alloc fail*/
+
+  /*read the literal data: LEN bytes are now stored in the out buffer*/
+  if(bytepos + LEN > size) return 23; /*error: reading outside of in buffer*/
+
+  lodepng_memcpy(out->data + *pos, reader->data + bytepos, LEN);
+  *pos += LEN;
+  bytepos += LEN;
+
+  reader->bp = bytepos << 3u;
+
+  return error;
+}
+
+static unsigned lodepng_inflatev(ucvector* out,
+                                 const unsigned char* in, size_t insize,
+                                 const LodePNGDecompressSettings* settings) {
+  unsigned BFINAL = 0;
+  size_t pos = 0; /*byte position in the out buffer*/
+  LodePNGBitReader reader;
+  unsigned error = LodePNGBitReader_init(&reader, in, insize);
+
+  if(error) return error;
+
+  while(!BFINAL) {
+    unsigned BTYPE;
+    if(!ensureBits9(&reader, 3)) return 52; /*error, bit pointer will jump past memory*/
+    BFINAL = readBits(&reader, 1);
+    BTYPE = readBits(&reader, 2);
+
+    if(BTYPE == 3) return 20; /*error: invalid BTYPE*/
+    else if(BTYPE == 0) error = inflateNoCompression(out, &pos, &reader, settings); /*no compression*/
+    else error = inflateHuffmanBlock(out, &pos, &reader, BTYPE); /*compression, BTYPE 01 or 10*/
+
+    if(error) return error;
+  }
+
+  return error;
+}
+
+unsigned lodepng_inflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGDecompressSettings* settings) {
+  unsigned error;
+  ucvector v;
+  ucvector_init_buffer(&v, *out, *outsize);
+  error = lodepng_inflatev(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+static unsigned inflate(unsigned char** out, size_t* outsize,
+                        const unsigned char* in, size_t insize,
+                        const LodePNGDecompressSettings* settings) {
+  if(settings->custom_inflate) {
+    return settings->custom_inflate(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_inflate(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Deflator (Compressor)                                                  / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static const size_t MAX_SUPPORTED_DEFLATE_LENGTH = 258;
+
+/*search the index in the array, that has the largest value smaller than or equal to the given value,
+given array must be sorted (if no value is smaller, it returns the size of the given array)*/
+static size_t searchCodeIndex(const unsigned* array, size_t array_size, size_t value) {
+  /*binary search (only small gain over linear). TODO: use CPU log2 instruction for getting symbols instead*/
+  size_t left = 1;
+  size_t right = array_size - 1;
+
+  while(left <= right) {
+    size_t mid = (left + right) >> 1;
+    if (array[mid] >= value) right = mid - 1;
+    else left = mid + 1;
+  }
+  if(left >= array_size || array[left] > value) left--;
+  return left;
+}
+
+static void addLengthDistance(uivector* values, size_t length, size_t distance) {
+  /*values in encoded vector are those used by deflate:
+  0-255: literal bytes
+  256: end
+  257-285: length/distance pair (length code, followed by extra length bits, distance code, extra distance bits)
+  286-287: invalid*/
+
+  unsigned length_code = (unsigned)searchCodeIndex(LENGTHBASE, 29, length);
+  unsigned extra_length = (unsigned)(length - LENGTHBASE[length_code]);
+  unsigned dist_code = (unsigned)searchCodeIndex(DISTANCEBASE, 30, distance);
+  unsigned extra_distance = (unsigned)(distance - DISTANCEBASE[dist_code]);
+
+  uivector_push_back(values, length_code + FIRST_LENGTH_CODE_INDEX);
+  uivector_push_back(values, extra_length);
+  uivector_push_back(values, dist_code);
+  uivector_push_back(values, extra_distance);
+}
+
+/*3 bytes of data get encoded into two bytes. The hash cannot use more than 3
+bytes as input because 3 is the minimum match length for deflate*/
+static const unsigned HASH_NUM_VALUES = 65536;
+static const unsigned HASH_BIT_MASK = 65535; /*HASH_NUM_VALUES - 1, but C90 does not like that as initializer*/
+
+typedef struct Hash {
+  int* head; /*hash value to head circular pos - can be outdated if went around window*/
+  /*circular pos to prev circular pos*/
+  unsigned short* chain;
+  int* val; /*circular pos to hash value*/
+
+  /*TODO: do this not only for zeros but for any repeated byte. However for PNG
+  it's always going to be the zeros that dominate, so not important for PNG*/
+  int* headz; /*similar to head, but for chainz*/
+  unsigned short* chainz; /*those with same amount of zeros*/
+  unsigned short* zeros; /*length of zeros streak, used as a second hash chain*/
+} Hash;
+
+static unsigned hash_init(Hash* hash, unsigned windowsize) {
+  unsigned i;
+  hash->head = (int*)lodepng_malloc(sizeof(int) * HASH_NUM_VALUES);
+  hash->val = (int*)lodepng_malloc(sizeof(int) * windowsize);
+  hash->chain = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+  hash->zeros = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+  hash->headz = (int*)lodepng_malloc(sizeof(int) * (MAX_SUPPORTED_DEFLATE_LENGTH + 1));
+  hash->chainz = (unsigned short*)lodepng_malloc(sizeof(unsigned short) * windowsize);
+
+  if(!hash->head || !hash->chain || !hash->val  || !hash->headz|| !hash->chainz || !hash->zeros) {
+    return 83; /*alloc fail*/
+  }
+
+  /*initialize hash table*/
+  for(i = 0; i != HASH_NUM_VALUES; ++i) hash->head[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->val[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->chain[i] = i; /*same value as index indicates uninitialized*/
+
+  for(i = 0; i <= MAX_SUPPORTED_DEFLATE_LENGTH; ++i) hash->headz[i] = -1;
+  for(i = 0; i != windowsize; ++i) hash->chainz[i] = i; /*same value as index indicates uninitialized*/
+
+  return 0;
+}
+
+static void hash_cleanup(Hash* hash) {
+  lodepng_free(hash->head);
+  lodepng_free(hash->val);
+  lodepng_free(hash->chain);
+
+  lodepng_free(hash->zeros);
+  lodepng_free(hash->headz);
+  lodepng_free(hash->chainz);
+}
+
+
+
+static unsigned getHash(const unsigned char* data, size_t size, size_t pos) {
+  unsigned result = 0;
+  if(pos + 2 < size) {
+    /*A simple shift and xor hash is used. Since the data of PNGs is dominated
+    by zeroes due to the filters, a better hash does not have a significant
+    effect on speed in traversing the chain, and causes more time spend on
+    calculating the hash.*/
+    result ^= ((unsigned)data[pos + 0] << 0u);
+    result ^= ((unsigned)data[pos + 1] << 4u);
+    result ^= ((unsigned)data[pos + 2] << 8u);
+  } else {
+    size_t amount, i;
+    if(pos >= size) return 0;
+    amount = size - pos;
+    for(i = 0; i != amount; ++i) result ^= ((unsigned)data[pos + i] << (i * 8u));
+  }
+  return result & HASH_BIT_MASK;
+}
+
+static unsigned countZeros(const unsigned char* data, size_t size, size_t pos) {
+  const unsigned char* start = data + pos;
+  const unsigned char* end = start + MAX_SUPPORTED_DEFLATE_LENGTH;
+  if(end > data + size) end = data + size;
+  data = start;
+  while(data != end && *data == 0) ++data;
+  /*subtracting two addresses returned as 32-bit number (max value is MAX_SUPPORTED_DEFLATE_LENGTH)*/
+  return (unsigned)(data - start);
+}
+
+/*wpos = pos & (windowsize - 1)*/
+static void updateHashChain(Hash* hash, size_t wpos, unsigned hashval, unsigned short numzeros) {
+  hash->val[wpos] = (int)hashval;
+  if(hash->head[hashval] != -1) hash->chain[wpos] = hash->head[hashval];
+  hash->head[hashval] = (int)wpos;
+
+  hash->zeros[wpos] = numzeros;
+  if(hash->headz[numzeros] != -1) hash->chainz[wpos] = hash->headz[numzeros];
+  hash->headz[numzeros] = (int)wpos;
+}
+
+/*
+LZ77-encode the data. Return value is error code. The input are raw bytes, the output
+is in the form of unsigned integers with codes representing for example literal bytes, or
+length/distance pairs.
+It uses a hash table technique to let it encode faster. When doing LZ77 encoding, a
+sliding window (of windowsize) is used, and all past bytes in that window can be used as
+the "dictionary". A brute force search through all possible distances would be slow, and
+this hash technique is one out of several ways to speed this up.
+*/
+static unsigned encodeLZ77(uivector* out, Hash* hash,
+                           const unsigned char* in, size_t inpos, size_t insize, unsigned windowsize,
+                           unsigned minmatch, unsigned nicematch, unsigned lazymatching) {
+  size_t pos;
+  unsigned i, error = 0;
+  /*for large window lengths, assume the user wants no compression loss. Otherwise, max hash chain length speedup.*/
+  unsigned maxchainlength = windowsize >= 8192 ? windowsize : windowsize / 8u;
+  unsigned maxlazymatch = windowsize >= 8192 ? MAX_SUPPORTED_DEFLATE_LENGTH : 64;
+
+  unsigned usezeros = 1; /*not sure if setting it to false for windowsize < 8192 is better or worse*/
+  unsigned numzeros = 0;
+
+  unsigned offset; /*the offset represents the distance in LZ77 terminology*/
+  unsigned length;
+  unsigned lazy = 0;
+  unsigned lazylength = 0, lazyoffset = 0;
+  unsigned hashval;
+  unsigned current_offset, current_length;
+  unsigned prev_offset;
+  const unsigned char *lastptr, *foreptr, *backptr;
+  unsigned hashpos;
+
+  if(windowsize == 0 || windowsize > 32768) return 60; /*error: windowsize smaller/larger than allowed*/
+  if((windowsize & (windowsize - 1)) != 0) return 90; /*error: must be power of two*/
+
+  if(nicematch > MAX_SUPPORTED_DEFLATE_LENGTH) nicematch = MAX_SUPPORTED_DEFLATE_LENGTH;
+
+  for(pos = inpos; pos < insize; ++pos) {
+    size_t wpos = pos & (windowsize - 1); /*position for in 'circular' hash buffers*/
+    unsigned chainlength = 0;
+
+    hashval = getHash(in, insize, pos);
+
+    if(usezeros && hashval == 0) {
+      if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+      else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+    } else {
+      numzeros = 0;
+    }
+
+    updateHashChain(hash, wpos, hashval, numzeros);
+
+    /*the length and offset found for the current position*/
+    length = 0;
+    offset = 0;
+
+    hashpos = hash->chain[wpos];
+
+    lastptr = &in[insize < pos + MAX_SUPPORTED_DEFLATE_LENGTH ? insize : pos + MAX_SUPPORTED_DEFLATE_LENGTH];
+
+    /*search for the longest string*/
+    prev_offset = 0;
+    for(;;) {
+      if(chainlength++ >= maxchainlength) break;
+      current_offset = (unsigned)(hashpos <= wpos ? wpos - hashpos : wpos - hashpos + windowsize);
+
+      if(current_offset < prev_offset) break; /*stop when went completely around the circular buffer*/
+      prev_offset = current_offset;
+      if(current_offset > 0) {
+        /*test the next characters*/
+        foreptr = &in[pos];
+        backptr = &in[pos - current_offset];
+
+        /*common case in PNGs is lots of zeros. Quickly skip over them as a speedup*/
+        if(numzeros >= 3) {
+          unsigned skip = hash->zeros[hashpos];
+          if(skip > numzeros) skip = numzeros;
+          backptr += skip;
+          foreptr += skip;
+        }
+
+        while(foreptr != lastptr && *backptr == *foreptr) /*maximum supported length by deflate is max length*/ {
+          ++backptr;
+          ++foreptr;
+        }
+        current_length = (unsigned)(foreptr - &in[pos]);
+
+        if(current_length > length) {
+          length = current_length; /*the longest length*/
+          offset = current_offset; /*the offset that is related to this longest length*/
+          /*jump out once a length of max length is found (speed gain). This also jumps
+          out if length is MAX_SUPPORTED_DEFLATE_LENGTH*/
+          if(current_length >= nicematch) break;
+        }
+      }
+
+      if(hashpos == hash->chain[hashpos]) break;
+
+      if(numzeros >= 3 && length > numzeros) {
+        hashpos = hash->chainz[hashpos];
+        if(hash->zeros[hashpos] != numzeros) break;
+      } else {
+        hashpos = hash->chain[hashpos];
+        /*outdated hash value, happens if particular value was not encountered in whole last window*/
+        if(hash->val[hashpos] != (int)hashval) break;
+      }
+    }
+
+    if(lazymatching) {
+      if(!lazy && length >= 3 && length <= maxlazymatch && length < MAX_SUPPORTED_DEFLATE_LENGTH) {
+        lazy = 1;
+        lazylength = length;
+        lazyoffset = offset;
+        continue; /*try the next byte*/
+      }
+      if(lazy) {
+        lazy = 0;
+        if(pos == 0) ERROR_BREAK(81);
+        if(length > lazylength + 1) {
+          /*push the previous character as literal*/
+          if(!uivector_push_back(out, in[pos - 1])) ERROR_BREAK(83 /*alloc fail*/);
+        } else {
+          length = lazylength;
+          offset = lazyoffset;
+          hash->head[hashval] = -1; /*the same hashchain update will be done, this ensures no wrong alteration*/
+          hash->headz[numzeros] = -1; /*idem*/
+          --pos;
+        }
+      }
+    }
+    if(length >= 3 && offset > windowsize) ERROR_BREAK(86 /*too big (or overflown negative) offset*/);
+
+    /*encode it as length/distance pair or literal value*/
+    if(length < 3) /*only lengths of 3 or higher are supported as length/distance pair*/ {
+      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+    } else if(length < minmatch || (length == 3 && offset > 4096)) {
+      /*compensate for the fact that longer offsets have more extra bits, a
+      length of only 3 may be not worth it then*/
+      if(!uivector_push_back(out, in[pos])) ERROR_BREAK(83 /*alloc fail*/);
+    } else {
+      addLengthDistance(out, length, offset);
+      for(i = 1; i < length; ++i) {
+        ++pos;
+        wpos = pos & (windowsize - 1);
+        hashval = getHash(in, insize, pos);
+        if(usezeros && hashval == 0) {
+          if(numzeros == 0) numzeros = countZeros(in, insize, pos);
+          else if(pos + numzeros > insize || in[pos + numzeros - 1] != 0) --numzeros;
+        } else {
+          numzeros = 0;
+        }
+        updateHashChain(hash, wpos, hashval, numzeros);
+      }
+    }
+  } /*end of the loop through each character of input*/
+
+  return error;
+}
+
+/* /////////////////////////////////////////////////////////////////////////// */
+
+static unsigned deflateNoCompression(ucvector* out, const unsigned char* data, size_t datasize) {
+  /*non compressed deflate block data: 1 bit BFINAL,2 bits BTYPE,(5 bits): it jumps to start of next byte,
+  2 bytes LEN, 2 bytes NLEN, LEN bytes literal DATA*/
+
+  size_t i, j, numdeflateblocks = (datasize + 65534u) / 65535u;
+  unsigned datapos = 0;
+  for(i = 0; i != numdeflateblocks; ++i) {
+    unsigned BFINAL, BTYPE, LEN, NLEN;
+    unsigned char firstbyte;
+
+    BFINAL = (i == numdeflateblocks - 1);
+    BTYPE = 0;
+
+    firstbyte = (unsigned char)(BFINAL + ((BTYPE & 1u) << 1u) + ((BTYPE & 2u) << 1u));
+    ucvector_push_back(out, firstbyte);
+
+    LEN = 65535;
+    if(datasize - datapos < 65535u) LEN = (unsigned)datasize - datapos;
+    NLEN = 65535 - LEN;
+
+    ucvector_push_back(out, (unsigned char)(LEN & 255));
+    ucvector_push_back(out, (unsigned char)(LEN >> 8u));
+    ucvector_push_back(out, (unsigned char)(NLEN & 255));
+    ucvector_push_back(out, (unsigned char)(NLEN >> 8u));
+
+    /*Decompressed data*/
+    for(j = 0; j < 65535 && datapos < datasize; ++j) {
+      ucvector_push_back(out, data[datapos++]);
+    }
+  }
+
+  return 0;
+}
+
+/*
+write the lz77-encoded data, which has lit, len and dist codes, to compressed stream using huffman trees.
+tree_ll: the tree for lit and len codes.
+tree_d: the tree for distance codes.
+*/
+static void writeLZ77data(LodePNGBitWriter* writer, const uivector* lz77_encoded,
+                          const HuffmanTree* tree_ll, const HuffmanTree* tree_d) {
+  size_t i = 0;
+  for(i = 0; i != lz77_encoded->size; ++i) {
+    unsigned val = lz77_encoded->data[i];
+    writeBitsReversed(writer, HuffmanTree_getCode(tree_ll, val), HuffmanTree_getLength(tree_ll, val));
+    if(val > 256) /*for a length code, 3 more things have to be added*/ {
+      unsigned length_index = val - FIRST_LENGTH_CODE_INDEX;
+      unsigned n_length_extra_bits = LENGTHEXTRA[length_index];
+      unsigned length_extra_bits = lz77_encoded->data[++i];
+
+      unsigned distance_code = lz77_encoded->data[++i];
+
+      unsigned distance_index = distance_code;
+      unsigned n_distance_extra_bits = DISTANCEEXTRA[distance_index];
+      unsigned distance_extra_bits = lz77_encoded->data[++i];
+
+      writeBits(writer, length_extra_bits, n_length_extra_bits);
+      writeBitsReversed(writer, HuffmanTree_getCode(tree_d, distance_code),
+                       HuffmanTree_getLength(tree_d, distance_code));
+      writeBits(writer, distance_extra_bits, n_distance_extra_bits);
+    }
+  }
+}
+
+/*Deflate for a block of type "dynamic", that is, with freely, optimally, created huffman trees*/
+static unsigned deflateDynamic(LodePNGBitWriter* writer, Hash* hash,
+                               const unsigned char* data, size_t datapos, size_t dataend,
+                               const LodePNGCompressSettings* settings, unsigned final) {
+  unsigned error = 0;
+
+  /*
+  A block is compressed as follows: The PNG data is lz77 encoded, resulting in
+  literal bytes and length/distance pairs. This is then huffman compressed with
+  two huffman trees. One huffman tree is used for the lit and len values ("ll"),
+  another huffman tree is used for the dist values ("d"). These two trees are
+  stored using their code lengths, and to compress even more these code lengths
+  are also run-length encoded and huffman compressed. This gives a huffman tree
+  of code lengths "cl". The code lengths used to describe this third tree are
+  the code length code lengths ("clcl").
+  */
+
+  /*The lz77 encoded data, represented with integers since there will also be length and distance codes in it*/
+  uivector lz77_encoded;
+  HuffmanTree tree_ll; /*tree for lit,len values*/
+  HuffmanTree tree_d; /*tree for distance codes*/
+  HuffmanTree tree_cl; /*tree for encoding the code lengths representing tree_ll and tree_d*/
+  uivector frequencies_ll; /*frequency of lit,len codes*/
+  uivector frequencies_d; /*frequency of dist codes*/
+  uivector frequencies_cl; /*frequency of code length codes*/
+  uivector bitlen_lld; /*lit,len,dist code lengths (int bits), literally (without repeat codes).*/
+  uivector bitlen_lld_e; /*bitlen_lld encoded with repeat codes (this is a rudimentary run length compression)*/
+  /*bitlen_cl is the code length code lengths ("clcl"). The bit lengths of codes to represent tree_cl
+  (these are written as is in the file, it would be crazy to compress these using yet another huffman
+  tree that needs to be represented by yet another set of code lengths)*/
+  uivector bitlen_cl;
+  size_t datasize = dataend - datapos;
+
+  /*
+  Due to the huffman compression of huffman tree representations ("two levels"), there are some analogies:
+  bitlen_lld is to tree_cl what data is to tree_ll and tree_d.
+  bitlen_lld_e is to bitlen_lld what lz77_encoded is to data.
+  bitlen_cl is to bitlen_lld_e what bitlen_lld is to lz77_encoded.
+  */
+
+  unsigned BFINAL = final;
+  size_t numcodes_ll, numcodes_d, i;
+  unsigned HLIT, HDIST, HCLEN;
+
+  uivector_init(&lz77_encoded);
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+  HuffmanTree_init(&tree_cl);
+  uivector_init(&frequencies_ll);
+  uivector_init(&frequencies_d);
+  uivector_init(&frequencies_cl);
+  uivector_init(&bitlen_lld);
+  uivector_init(&bitlen_lld_e);
+  uivector_init(&bitlen_cl);
+
+  /*This while loop never loops due to a break at the end, it is here to
+  allow breaking out of it to the cleanup phase on error conditions.*/
+  while(!error) {
+    if(settings->use_lz77) {
+      error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+                         settings->minmatch, settings->nicematch, settings->lazymatching);
+      if(error) break;
+    } else {
+      if(!uivector_resize(&lz77_encoded, datasize)) ERROR_BREAK(83 /*alloc fail*/);
+      for(i = datapos; i < dataend; ++i) lz77_encoded.data[i - datapos] = data[i]; /*no LZ77, but still will be Huffman compressed*/
+    }
+
+    if(!uivector_resizev(&frequencies_ll, 286, 0)) ERROR_BREAK(83 /*alloc fail*/);
+    if(!uivector_resizev(&frequencies_d, 30, 0)) ERROR_BREAK(83 /*alloc fail*/);
+
+    /*Count the frequencies of lit, len and dist codes*/
+    for(i = 0; i != lz77_encoded.size; ++i) {
+      unsigned symbol = lz77_encoded.data[i];
+      ++frequencies_ll.data[symbol];
+      if(symbol > 256) {
+        unsigned dist = lz77_encoded.data[i + 2];
+        ++frequencies_d.data[dist];
+        i += 3;
+      }
+    }
+    frequencies_ll.data[256] = 1; /*there will be exactly 1 end code, at the end of the block*/
+
+    /*Make both huffman trees, one for the lit and len codes, one for the dist codes*/
+    error = HuffmanTree_makeFromFrequencies(&tree_ll, frequencies_ll.data, 257, frequencies_ll.size, 15);
+    if(error) break;
+    /*2, not 1, is chosen for mincodes: some buggy PNG decoders require at least 2 symbols in the dist tree*/
+    error = HuffmanTree_makeFromFrequencies(&tree_d, frequencies_d.data, 2, frequencies_d.size, 15);
+    if(error) break;
+
+    numcodes_ll = tree_ll.numcodes; if(numcodes_ll > 286) numcodes_ll = 286;
+    numcodes_d = tree_d.numcodes; if(numcodes_d > 30) numcodes_d = 30;
+    /*store the code lengths of both generated trees in bitlen_lld*/
+    for(i = 0; i != numcodes_ll; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_ll, (unsigned)i));
+    for(i = 0; i != numcodes_d; ++i) uivector_push_back(&bitlen_lld, HuffmanTree_getLength(&tree_d, (unsigned)i));
+
+    /*run-length compress bitlen_ldd into bitlen_lld_e by using repeat codes 16 (copy length 3-6 times),
+    17 (3-10 zeroes), 18 (11-138 zeroes)*/
+    for(i = 0; i != (unsigned)bitlen_lld.size; ++i) {
+      unsigned j = 0; /*amount of repetitions*/
+      while(i + j + 1 < (unsigned)bitlen_lld.size && bitlen_lld.data[i + j + 1] == bitlen_lld.data[i]) ++j;
+
+      if(bitlen_lld.data[i] == 0 && j >= 2) /*repeat code for zeroes*/ {
+        ++j; /*include the first zero*/
+        if(j <= 10) /*repeat code 17 supports max 10 zeroes*/ {
+          uivector_push_back(&bitlen_lld_e, 17);
+          uivector_push_back(&bitlen_lld_e, j - 3);
+        } else /*repeat code 18 supports max 138 zeroes*/ {
+          if(j > 138) j = 138;
+          uivector_push_back(&bitlen_lld_e, 18);
+          uivector_push_back(&bitlen_lld_e, j - 11);
+        }
+        i += (j - 1);
+      } else if(j >= 3) /*repeat code for value other than zero*/ {
+        size_t k;
+        unsigned num = j / 6u, rest = j % 6u;
+        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
+        for(k = 0; k < num; ++k) {
+          uivector_push_back(&bitlen_lld_e, 16);
+          uivector_push_back(&bitlen_lld_e, 6 - 3);
+        }
+        if(rest >= 3) {
+          uivector_push_back(&bitlen_lld_e, 16);
+          uivector_push_back(&bitlen_lld_e, rest - 3);
+        }
+        else j -= rest;
+        i += j;
+      } else /*too short to benefit from repeat code*/ {
+        uivector_push_back(&bitlen_lld_e, bitlen_lld.data[i]);
+      }
+    }
+
+    /*generate tree_cl, the huffmantree of huffmantrees*/
+
+    if(!uivector_resizev(&frequencies_cl, NUM_CODE_LENGTH_CODES, 0)) ERROR_BREAK(83 /*alloc fail*/);
+    for(i = 0; i != bitlen_lld_e.size; ++i) {
+      ++frequencies_cl.data[bitlen_lld_e.data[i]];
+      /*after a repeat code come the bits that specify the number of repetitions,
+      those don't need to be in the frequencies_cl calculation*/
+      if(bitlen_lld_e.data[i] >= 16) ++i;
+    }
+
+    error = HuffmanTree_makeFromFrequencies(&tree_cl, frequencies_cl.data,
+                                            frequencies_cl.size, frequencies_cl.size, 7);
+    if(error) break;
+
+    if(!uivector_resize(&bitlen_cl, tree_cl.numcodes)) ERROR_BREAK(83 /*alloc fail*/);
+    for(i = 0; i != tree_cl.numcodes; ++i) {
+      /*lengths of code length tree is in the order as specified by deflate*/
+      bitlen_cl.data[i] = HuffmanTree_getLength(&tree_cl, CLCL_ORDER[i]);
+    }
+    while(bitlen_cl.data[bitlen_cl.size - 1] == 0 && bitlen_cl.size > 4) {
+      /*remove zeros at the end, but minimum size must be 4*/
+      if(!uivector_resize(&bitlen_cl, bitlen_cl.size - 1)) ERROR_BREAK(83 /*alloc fail*/);
+    }
+    if(error) break;
+
+    /*
+    Write everything into the output
+
+    After the BFINAL and BTYPE, the dynamic block consists out of the following:
+    - 5 bits HLIT, 5 bits HDIST, 4 bits HCLEN
+    - (HCLEN+4)*3 bits code lengths of code length alphabet
+    - HLIT + 257 code lengths of lit/length alphabet (encoded using the code length
+      alphabet, + possible repetition codes 16, 17, 18)
+    - HDIST + 1 code lengths of distance alphabet (encoded using the code length
+      alphabet, + possible repetition codes 16, 17, 18)
+    - compressed data
+    - 256 (end code)
+    */
+
+    /*Write block type*/
+    writeBits(writer, BFINAL, 1);
+    writeBits(writer, 0, 1); /*first bit of BTYPE "dynamic"*/
+    writeBits(writer, 1, 1); /*second bit of BTYPE "dynamic"*/
+
+    /*write the HLIT, HDIST and HCLEN values*/
+    HLIT = (unsigned)(numcodes_ll - 257);
+    HDIST = (unsigned)(numcodes_d - 1);
+    HCLEN = (unsigned)bitlen_cl.size - 4;
+    /*trim zeroes for HCLEN. HLIT and HDIST were already trimmed at tree creation*/
+    while(!bitlen_cl.data[HCLEN + 4 - 1] && HCLEN > 0) --HCLEN;
+    writeBits(writer, HLIT, 5);
+    writeBits(writer, HDIST, 5);
+    writeBits(writer, HCLEN, 4);
+
+    /*write the code lengths of the code length alphabet*/
+    for(i = 0; i != HCLEN + 4; ++i) writeBits(writer, bitlen_cl.data[i], 3);
+
+    /*write the lengths of the lit/len AND the dist alphabet*/
+    for(i = 0; i != bitlen_lld_e.size; ++i) {
+      writeBitsReversed(writer, HuffmanTree_getCode(&tree_cl, bitlen_lld_e.data[i]),
+                        HuffmanTree_getLength(&tree_cl, bitlen_lld_e.data[i]));
+      /*extra bits of repeat codes*/
+      if(bitlen_lld_e.data[i] == 16) writeBits(writer, bitlen_lld_e.data[++i], 2);
+      else if(bitlen_lld_e.data[i] == 17) writeBits(writer, bitlen_lld_e.data[++i], 3);
+      else if(bitlen_lld_e.data[i] == 18) writeBits(writer, bitlen_lld_e.data[++i], 7);
+    }
+
+    /*write the compressed data symbols*/
+    writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d);
+    /*error: the length of the end code 256 must be larger than 0*/
+    if(HuffmanTree_getLength(&tree_ll, 256) == 0) ERROR_BREAK(64);
+
+    /*write the end code*/
+    writeBitsReversed(writer, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
+
+    break; /*end of error-while*/
+  }
+
+  /*cleanup*/
+  uivector_cleanup(&lz77_encoded);
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+  HuffmanTree_cleanup(&tree_cl);
+  uivector_cleanup(&frequencies_ll);
+  uivector_cleanup(&frequencies_d);
+  uivector_cleanup(&frequencies_cl);
+  uivector_cleanup(&bitlen_lld_e);
+  uivector_cleanup(&bitlen_lld);
+  uivector_cleanup(&bitlen_cl);
+
+  return error;
+}
+
+static unsigned deflateFixed(LodePNGBitWriter* writer, Hash* hash,
+                             const unsigned char* data,
+                             size_t datapos, size_t dataend,
+                             const LodePNGCompressSettings* settings, unsigned final) {
+  HuffmanTree tree_ll; /*tree for literal values and length codes*/
+  HuffmanTree tree_d; /*tree for distance codes*/
+
+  unsigned BFINAL = final;
+  unsigned error = 0;
+  size_t i;
+
+  HuffmanTree_init(&tree_ll);
+  HuffmanTree_init(&tree_d);
+
+  generateFixedLitLenTree(&tree_ll);
+  generateFixedDistanceTree(&tree_d);
+
+  writeBits(writer, BFINAL, 1);
+  writeBits(writer, 1, 1); /*first bit of BTYPE*/
+  writeBits(writer, 0, 1); /*second bit of BTYPE*/
+
+  if(settings->use_lz77) /*LZ77 encoded*/ {
+    uivector lz77_encoded;
+    uivector_init(&lz77_encoded);
+    error = encodeLZ77(&lz77_encoded, hash, data, datapos, dataend, settings->windowsize,
+                       settings->minmatch, settings->nicematch, settings->lazymatching);
+    if(!error) writeLZ77data(writer, &lz77_encoded, &tree_ll, &tree_d);
+    uivector_cleanup(&lz77_encoded);
+  } else /*no LZ77, but still will be Huffman compressed*/ {
+    for(i = datapos; i < dataend; ++i) {
+      writeBitsReversed(writer, HuffmanTree_getCode(&tree_ll, data[i]), HuffmanTree_getLength(&tree_ll, data[i]));
+    }
+  }
+  /*add END code*/
+  if(!error) writeBitsReversed(writer, HuffmanTree_getCode(&tree_ll, 256), HuffmanTree_getLength(&tree_ll, 256));
+
+  /*cleanup*/
+  HuffmanTree_cleanup(&tree_ll);
+  HuffmanTree_cleanup(&tree_d);
+
+  return error;
+}
+
+static unsigned lodepng_deflatev(ucvector* out, const unsigned char* in, size_t insize,
+                                 const LodePNGCompressSettings* settings) {
+  unsigned error = 0;
+  size_t i, blocksize, numdeflateblocks;
+  Hash hash;
+  LodePNGBitWriter writer;
+
+  LodePNGBitWriter_init(&writer, out);
+
+  if(settings->btype > 2) return 61;
+  else if(settings->btype == 0) return deflateNoCompression(out, in, insize);
+  else if(settings->btype == 1) blocksize = insize;
+  else /*if(settings->btype == 2)*/ {
+    /*on PNGs, deflate blocks of 65-262k seem to give most dense encoding*/
+    blocksize = insize / 8u + 8;
+    if(blocksize < 65536) blocksize = 65536;
+    if(blocksize > 262144) blocksize = 262144;
+  }
+
+  numdeflateblocks = (insize + blocksize - 1) / blocksize;
+  if(numdeflateblocks == 0) numdeflateblocks = 1;
+
+  error = hash_init(&hash, settings->windowsize);
+  if(error) return error;
+
+  for(i = 0; i != numdeflateblocks && !error; ++i) {
+    unsigned final = (i == numdeflateblocks - 1);
+    size_t start = i * blocksize;
+    size_t end = start + blocksize;
+    if(end > insize) end = insize;
+
+    if(settings->btype == 1) error = deflateFixed(&writer, &hash, in, start, end, settings, final);
+    else if(settings->btype == 2) error = deflateDynamic(&writer, &hash, in, start, end, settings, final);
+  }
+
+  hash_cleanup(&hash);
+
+  return error;
+}
+
+unsigned lodepng_deflate(unsigned char** out, size_t* outsize,
+                         const unsigned char* in, size_t insize,
+                         const LodePNGCompressSettings* settings) {
+  unsigned error;
+  ucvector v;
+  ucvector_init_buffer(&v, *out, *outsize);
+  error = lodepng_deflatev(&v, in, insize, settings);
+  *out = v.data;
+  *outsize = v.size;
+  return error;
+}
+
+static unsigned deflate(unsigned char** out, size_t* outsize,
+                        const unsigned char* in, size_t insize,
+                        const LodePNGCompressSettings* settings) {
+  if(settings->custom_deflate) {
+    return settings->custom_deflate(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_deflate(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Adler32                                                                / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+static unsigned update_adler32(unsigned adler, const unsigned char* data, unsigned len) {
+  unsigned s1 = adler & 0xffffu;
+  unsigned s2 = (adler >> 16u) & 0xffffu;
+
+  while(len != 0u) {
+    unsigned i;
+    /*at least 5552 sums can be done before the sums overflow, saving a lot of module divisions*/
+    unsigned amount = len > 5552u ? 5552u : len;
+    len -= amount;
+    for(i = 0; i != amount; ++i) {
+      s1 += (*data++);
+      s2 += s1;
+    }
+    s1 %= 65521u;
+    s2 %= 65521u;
+  }
+
+  return (s2 << 16u) | s1;
+}
+
+/*Return the adler32 of the bytes data[0..len-1]*/
+static unsigned adler32(const unsigned char* data, unsigned len) {
+  return update_adler32(1u, data, len);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Zlib                                                                   / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+unsigned lodepng_zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                 size_t insize, const LodePNGDecompressSettings* settings) {
+  unsigned error = 0;
+  unsigned CM, CINFO, FDICT;
+
+  if(insize < 2) return 53; /*error, size of zlib data too small*/
+  /*read information from zlib header*/
+  if((in[0] * 256 + in[1]) % 31 != 0) {
+    /*error: 256 * in[0] + in[1] must be a multiple of 31, the FCHECK value is supposed to be made that way*/
+    return 24;
+  }
+
+  CM = in[0] & 15;
+  CINFO = (in[0] >> 4) & 15;
+  /*FCHECK = in[1] & 31;*/ /*FCHECK is already tested above*/
+  FDICT = (in[1] >> 5) & 1;
+  /*FLEVEL = (in[1] >> 6) & 3;*/ /*FLEVEL is not used here*/
+
+  if(CM != 8 || CINFO > 7) {
+    /*error: only compression method 8: inflate with sliding window of 32k is supported by the PNG spec*/
+    return 25;
+  }
+  if(FDICT != 0) {
+    /*error: the specification of PNG says about the zlib stream:
+      "The additional flags shall not specify a preset dictionary."*/
+    return 26;
+  }
+
+  error = inflate(out, outsize, in + 2, insize - 2, settings);
+  if(error) return error;
+
+  if(!settings->ignore_adler32) {
+    unsigned ADLER32 = lodepng_read32bitInt(&in[insize - 4]);
+    unsigned checksum = adler32(*out, (unsigned)(*outsize));
+    if(checksum != ADLER32) return 58; /*error, adler checksum not correct, data must be corrupted*/
+  }
+
+  return 0; /*no error*/
+}
+
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                size_t insize, const LodePNGDecompressSettings* settings) {
+  if(settings->custom_zlib) {
+    return settings->custom_zlib(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_zlib_decompress(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+unsigned lodepng_zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                               size_t insize, const LodePNGCompressSettings* settings) {
+  size_t i;
+  unsigned error;
+  unsigned char* deflatedata = 0;
+  size_t deflatesize = 0;
+
+  error = deflate(&deflatedata, &deflatesize, in, insize, settings);
+
+  *out = NULL;
+  *outsize = 0;
+  if(!error) {
+    *outsize = deflatesize + 6;
+    *out = (unsigned char*)lodepng_malloc(*outsize);
+    if(!*out) error = 83; /*alloc fail*/
+  }
+
+  if(!error) {
+    unsigned ADLER32 = adler32(in, (unsigned)insize);
+    /*zlib data: 1 byte CMF (CM+CINFO), 1 byte FLG, deflate data, 4 byte ADLER32 checksum of the Decompressed data*/
+    unsigned CMF = 120; /*0b01111000: CM 8, CINFO 7. With CINFO 7, any window size up to 32768 can be used.*/
+    unsigned FLEVEL = 0;
+    unsigned FDICT = 0;
+    unsigned CMFFLG = 256 * CMF + FDICT * 32 + FLEVEL * 64;
+    unsigned FCHECK = 31 - CMFFLG % 31;
+    CMFFLG += FCHECK;
+
+    (*out)[0] = (unsigned char)(CMFFLG >> 8);
+    (*out)[1] = (unsigned char)(CMFFLG & 255);
+    for(i = 0; i != deflatesize; ++i) (*out)[i + 2] = deflatedata[i];
+    lodepng_set32bitInt(&(*out)[*outsize - 4], ADLER32);
+  }
+
+  lodepng_free(deflatedata);
+  return error;
+}
+
+/* compress using the default or custom zlib function */
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                              size_t insize, const LodePNGCompressSettings* settings) {
+  if(settings->custom_zlib) {
+    return settings->custom_zlib(out, outsize, in, insize, settings);
+  } else {
+    return lodepng_zlib_compress(out, outsize, in, insize, settings);
+  }
+}
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#else /*no LODEPNG_COMPILE_ZLIB*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+static unsigned zlib_decompress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                                size_t insize, const LodePNGDecompressSettings* settings) {
+  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+  return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#ifdef LODEPNG_COMPILE_ENCODER
+static unsigned zlib_compress(unsigned char** out, size_t* outsize, const unsigned char* in,
+                              size_t insize, const LodePNGCompressSettings* settings) {
+  if(!settings->custom_zlib) return 87; /*no custom zlib function provided */
+  return settings->custom_zlib(out, outsize, in, insize, settings);
+}
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#endif /*LODEPNG_COMPILE_ZLIB*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_ENCODER
+
+/*this is a good tradeoff between speed and compression ratio*/
+#define DEFAULT_WINDOWSIZE 2048
+
+void lodepng_compress_settings_init(LodePNGCompressSettings* settings) {
+  /*compress with dynamic huffman tree (not in the mathematical sense, just not the predefined one)*/
+  settings->btype = 2;
+  settings->use_lz77 = 1;
+  settings->windowsize = DEFAULT_WINDOWSIZE;
+  settings->minmatch = 3;
+  settings->nicematch = 128;
+  settings->lazymatching = 1;
+
+  settings->custom_zlib = 0;
+  settings->custom_deflate = 0;
+  settings->custom_context = 0;
+}
+
+const LodePNGCompressSettings lodepng_default_compress_settings = {2, 1, DEFAULT_WINDOWSIZE, 3, 128, 1, 0, 0, 0};
+
+
+#endif /*LODEPNG_COMPILE_ENCODER*/
+
+#ifdef LODEPNG_COMPILE_DECODER
+
+void lodepng_decompress_settings_init(LodePNGDecompressSettings* settings) {
+  settings->ignore_adler32 = 0;
+  settings->ignore_nlen = 0;
+
+  settings->custom_zlib = 0;
+  settings->custom_inflate = 0;
+  settings->custom_context = 0;
+}
+
+const LodePNGDecompressSettings lodepng_default_decompress_settings = {0, 0, 0, 0, 0};
+
+#endif /*LODEPNG_COMPILE_DECODER*/
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* // End of Zlib related code. Begin of PNG related code.                 // */
+/* ////////////////////////////////////////////////////////////////////////// */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+#ifdef LODEPNG_COMPILE_PNG
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / CRC32                                                                  / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+
+#ifndef LODEPNG_NO_COMPILE_CRC
+/* CRC polynomial: 0xedb88320 */
+static unsigned lodepng_crc32_table[256] = {
+           0u, 1996959894u, 3993919788u, 2567524794u,  124634137u, 1886057615u, 3915621685u, 2657392035u,
+   249268274u, 2044508324u, 3772115230u, 2547177864u,  162941995u, 2125561021u, 3887607047u, 2428444049u,
+   498536548u, 1789927666u, 4089016648u, 2227061214u,  450548861u, 1843258603u, 4107580753u, 2211677639u,
+   325883990u, 1684777152u, 4251122042u, 2321926636u,  335633487u, 1661365465u, 4195302755u, 2366115317u,
+   997073096u, 1281953886u, 3579855332u, 2724688242u, 1006888145u, 1258607687u, 3524101629u, 2768942443u,
+   901097722u, 1119000684u, 3686517206u, 2898065728u,  853044451u, 1172266101u, 3705015759u, 2882616665u,
+   651767980u, 1373503546u, 3369554304u, 3218104598u,  565507253u, 1454621731u, 3485111705u, 3099436303u,
+   671266974u, 1594198024u, 3322730930u, 2970347812u,  795835527u, 1483230225u, 3244367275u, 3060149565u,
+  1994146192u,   31158534u, 2563907772u, 4023717930u, 1907459465u,  112637215u, 2680153253u, 3904427059u,
+  2013776290u,  251722036u, 2517215374u, 3775830040u, 2137656763u,  141376813u, 2439277719u, 3865271297u,
+  1802195444u,  476864866u, 2238001368u, 4066508878u, 1812370925u,  453092731u, 2181625025u, 4111451223u,
+  1706088902u,  314042704u, 2344532202u, 4240017532u, 1658658271u,  366619977u, 2362670323u, 4224994405u,
+  1303535960u,  984961486u, 2747007092u, 3569037538u, 1256170817u, 1037604311u, 2765210733u, 3554079995u,
+  1131014506u,  879679996u, 2909243462u, 3663771856u, 1141124467u,  855842277u, 2852801631u, 3708648649u,
+  1342533948u,  654459306u, 3188396048u, 3373015174u, 1466479909u,  544179635u, 3110523913u, 3462522015u,
+  1591671054u,  702138776u, 2966460450u, 3352799412u, 1504918807u,  783551873u, 3082640443u, 3233442989u,
+  3988292384u, 2596254646u,   62317068u, 1957810842u, 3939845945u, 2647816111u,   81470997u, 1943803523u,
+  3814918930u, 2489596804u,  225274430u, 2053790376u, 3826175755u, 2466906013u,  167816743u, 2097651377u,
+  4027552580u, 2265490386u,  503444072u, 1762050814u, 4150417245u, 2154129355u,  426522225u, 1852507879u,
+  4275313526u, 2312317920u,  282753626u, 1742555852u, 4189708143u, 2394877945u,  397917763u, 1622183637u,
+  3604390888u, 2714866558u,  953729732u, 1340076626u, 3518719985u, 2797360999u, 1068828381u, 1219638859u,
+  3624741850u, 2936675148u,  906185462u, 1090812512u, 3747672003u, 2825379669u,  829329135u, 1181335161u,
+  3412177804u, 3160834842u,  628085408u, 1382605366u, 3423369109u, 3138078467u,  570562233u, 1426400815u,
+  3317316542u, 2998733608u,  733239954u, 1555261956u, 3268935591u, 3050360625u,  752459403u, 1541320221u,
+  2607071920u, 3965973030u, 1969922972u,   40735498u, 2617837225u, 3943577151u, 1913087877u,   83908371u,
+  2512341634u, 3803740692u, 2075208622u,  213261112u, 2463272603u, 3855990285u, 2094854071u,  198958881u,
+  2262029012u, 4057260610u, 1759359992u,  534414190u, 2176718541u, 4139329115u, 1873836001u,  414664567u,
+  2282248934u, 4279200368u, 1711684554u,  285281116u, 2405801727u, 4167216745u, 1634467795u,  376229701u,
+  2685067896u, 3608007406u, 1308918612u,  956543938u, 2808555105u, 3495958263u, 1231636301u, 1047427035u,
+  2932959818u, 3654703836u, 1088359270u,  936918000u, 2847714899u, 3736837829u, 1202900863u,  817233897u,
+  3183342108u, 3401237130u, 1404277552u,  615818150u, 3134207493u, 3453421203u, 1423857449u,  601450431u,
+  3009837614u, 3294710456u, 1567103746u,  711928724u, 3020668471u, 3272380065u, 1510334235u,  755167117u
+};
+
+/*Return the CRC of the bytes buf[0..len-1].*/
+unsigned lodepng_crc32(const unsigned char* data, size_t length) {
+  unsigned r = 0xffffffffu;
+  size_t i;
+  for(i = 0; i < length; ++i) {
+    r = lodepng_crc32_table[(r ^ data[i]) & 0xffu] ^ (r >> 8u);
+  }
+  return r ^ 0xffffffffu;
+}
+#else /* !LODEPNG_NO_COMPILE_CRC */
+unsigned lodepng_crc32(const unsigned char* data, size_t length);
+#endif /* !LODEPNG_NO_COMPILE_CRC */
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Reading and writing PNG color channel bits                             / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/* The color channel bits of less-than-8-bit pixels are read with the MSB of bytes first,
+so LodePNGBitWriter and LodePNGBitReader can't be used for those. */
+
+static unsigned char readBitFromReversedStream(size_t* bitpointer, const unsigned char* bitstream) {
+  unsigned char result = (unsigned char)((bitstream[(*bitpointer) >> 3] >> (7 - ((*bitpointer) & 0x7))) & 1);
+  ++(*bitpointer);
+  return result;
+}
+
+/* TODO: make this faster */
+static unsigned readBitsFromReversedStream(size_t* bitpointer, const unsigned char* bitstream, size_t nbits) {
+  unsigned result = 0;
+  size_t i;
+  for(i = 0 ; i < nbits; ++i) {
+    result <<= 1u;
+    result |= (unsigned)readBitFromReversedStream(bitpointer, bitstream);
+  }
+  return result;
+}
+
+static void setBitOfReversedStream(size_t* bitpointer, unsigned char* bitstream, unsigned char bit) {
+  /*the current bit in bitstream may be 0 or 1 for this to work*/
+  if(bit == 0) bitstream[(*bitpointer) >> 3u] &=  (unsigned char)(~(1u << (7u - ((*bitpointer) & 7u))));
+  else         bitstream[(*bitpointer) >> 3u] |=  (1u << (7u - ((*bitpointer) & 7u)));
+  ++(*bitpointer);
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / PNG chunks                                                             / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+unsigned lodepng_chunk_length(const unsigned char* chunk) {
+  return lodepng_read32bitInt(&chunk[0]);
+}
+
+void lodepng_chunk_type(char type[5], const unsigned char* chunk) {
+  unsigned i;
+  for(i = 0; i != 4; ++i) type[i] = (char)chunk[4 + i];
+  type[4] = 0; /*null termination char*/
+}
+
+unsigned char lodepng_chunk_type_equals(const unsigned char* chunk, const char* type) {
+  if(lodepng_strlen(type) != 4) return 0;
+  return (chunk[4] == type[0] && chunk[5] == type[1] && chunk[6] == type[2] && chunk[7] == type[3]);
+}
+
+unsigned char lodepng_chunk_ancillary(const unsigned char* chunk) {
+  return((chunk[4] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_private(const unsigned char* chunk) {
+  return((chunk[6] & 32) != 0);
+}
+
+unsigned char lodepng_chunk_safetocopy(const unsigned char* chunk) {
+  return((chunk[7] & 32) != 0);
+}
+
+unsigned char* lodepng_chunk_data(unsigned char* chunk) {
+  return &chunk[8];
+}
+
+const unsigned char* lodepng_chunk_data_const(const unsigned char* chunk) {
+  return &chunk[8];
+}
+
+unsigned lodepng_chunk_check_crc(const unsigned char* chunk) {
+  unsigned length = lodepng_chunk_length(chunk);
+  unsigned CRC = lodepng_read32bitInt(&chunk[length + 8]);
+  /*the CRC is taken of the data and the 4 chunk type letters, not the length*/
+  unsigned checksum = lodepng_crc32(&chunk[4], length + 4);
+  if(CRC != checksum) return 1;
+  else return 0;
+}
+
+void lodepng_chunk_generate_crc(unsigned char* chunk) {
+  unsigned length = lodepng_chunk_length(chunk);
+  unsigned CRC = lodepng_crc32(&chunk[4], length + 4);
+  lodepng_set32bitInt(chunk + 8 + length, CRC);
+}
+
+unsigned char* lodepng_chunk_next(unsigned char* chunk) {
+  if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
+    && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
+    /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
+    return chunk + 8;
+  } else {
+    unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
+    return chunk + total_chunk_length;
+  }
+}
+
+const unsigned char* lodepng_chunk_next_const(const unsigned char* chunk) {
+  if(chunk[0] == 0x89 && chunk[1] == 0x50 && chunk[2] == 0x4e && chunk[3] == 0x47
+    && chunk[4] == 0x0d && chunk[5] == 0x0a && chunk[6] == 0x1a && chunk[7] == 0x0a) {
+    /* Is PNG magic header at start of PNG file. Jump to first actual chunk. */
+    return chunk + 8;
+  } else {
+    unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
+    return chunk + total_chunk_length;
+  }
+}
+
+unsigned char* lodepng_chunk_find(unsigned char* chunk, const unsigned char* end, const char type[5]) {
+  for(;;) {
+    if(chunk + 12 >= end) return 0;
+    if(lodepng_chunk_type_equals(chunk, type)) return chunk;
+    chunk = lodepng_chunk_next(chunk);
+  }
+}
+
+const unsigned char* lodepng_chunk_find_const(const unsigned char* chunk, const unsigned char* end, const char type[5]) {
+  for(;;) {
+    if(chunk + 12 >= end) return 0;
+    if(lodepng_chunk_type_equals(chunk, type)) return chunk;
+    chunk = lodepng_chunk_next_const(chunk);
+  }
+}
+
+unsigned lodepng_chunk_append(unsigned char** out, size_t* outlength, const unsigned char* chunk) {
+  unsigned i;
+  unsigned total_chunk_length = lodepng_chunk_length(chunk) + 12;
+  unsigned char *chunk_start, *new_buffer;
+  size_t new_length = (*outlength) + total_chunk_length;
+  if(new_length < total_chunk_length || new_length < (*outlength)) return 77; /*integer overflow happened*/
+
+  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
+  if(!new_buffer) return 83; /*alloc fail*/
+  (*out) = new_buffer;
+  (*outlength) = new_length;
+  chunk_start = &(*out)[new_length - total_chunk_length];
+
+  for(i = 0; i != total_chunk_length; ++i) chunk_start[i] = chunk[i];
+
+  return 0;
+}
+
+unsigned lodepng_chunk_create(unsigned char** out, size_t* outlength, unsigned length,
+                              const char* type, const unsigned char* data) {
+  unsigned i;
+  unsigned char *chunk, *new_buffer;
+  size_t new_length = (*outlength) + length + 12;
+  if(new_length < length + 12 || new_length < (*outlength)) return 77; /*integer overflow happened*/
+  new_buffer = (unsigned char*)lodepng_realloc(*out, new_length);
+  if(!new_buffer) return 83; /*alloc fail*/
+  (*out) = new_buffer;
+  (*outlength) = new_length;
+  chunk = &(*out)[(*outlength) - length - 12];
+
+  /*1: length*/
+  lodepng_set32bitInt(chunk, (unsigned)length);
+
+  /*2: chunk name (4 letters)*/
+  chunk[4] = (unsigned char)type[0];
+  chunk[5] = (unsigned char)type[1];
+  chunk[6] = (unsigned char)type[2];
+  chunk[7] = (unsigned char)type[3];
+
+  /*3: the data*/
+  for(i = 0; i != length; ++i) chunk[8 + i] = data[i];
+
+  /*4: CRC (of the chunkname characters and the data)*/
+  lodepng_chunk_generate_crc(chunk);
+
+  return 0;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+/* / Color types, channels, bits                                            / */
+/* ////////////////////////////////////////////////////////////////////////// */
+
+/*checks if the colortype is valid and the bitdepth bd is allowed for this colortype.
+Return value is a LodePNG error code.*/
+static unsigned checkColorValidity(LodePNGColorType colortype, unsigned bd) {
+  switch(colortype) {
+    case LCT_GREY:       if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8 || bd == 16)) return 37; break;
+    case LCT_RGB:        if(!(                                 bd == 8 || bd == 16)) return 37; break;
+    case LCT_PALETTE:    if(!(bd == 1 || bd == 2 || bd == 4 || bd == 8            )) return 37; break;
+    case LCT_GREY_ALPHA: if(!(                                 bd == 8 || bd == 16)) return 37; break;
+    case LCT_RGBA:       if(!(                                 bd == 8 || bd == 16)) return 37; break;
+    default: return 31; /* invalid color type */
+  }
+  return 0; /*allowed color type / bits combination*/
+}
+
+static unsigned getNumColorChannels(LodePNGColorType colortype) {
+  switch(colortype) {
+    case LCT_GREY: return 1;
+    case LCT_RGB: return 3;
+    case LCT_PALETTE: return 1;
+    case LCT_GREY_ALPHA: return 2;
+    case LCT_RGBA: return 4;
+    default: return 0; /*invalid color type*/
+  }
+}
+
+static unsigned lodepng_get_bpp_lct(LodePNGColorType colortype, unsigned bitdepth) {
+  /*bits per pixel is amount of channels * bits per channel*/
+  return getNumColorChannels(colortype) * bitdepth;
+}
+
+/* ////////////////////////////////////////////////////////////////////////// */
+
+void lodepng_color_mode_init(LodePNGColorMode* info) {
+  info->key_defined = 0;
+  info->key_r = info->key_g = info->key_b = 0;
+  info->colortype = LCT_RGBA;
+  info->bitdepth = 8;
+  info->palette = 0;
+  info->palettesize = 0;
+}
+
+void lodepng_color_mode_alloc_palette(LodePNGColorMode* info) {
+  size_t i;
+  /*room for 256 colors with 4 bytes each. Using realloc to avoid leak if it is being overwritten*/
+  info->palette = (unsigned char*)lodepng_realloc(info->palette, 1024);
+  if(!info->palette) return; /*alloc fail*/
+  for(i = 0; i != 256; ++i) {
+    /*Initialize all unused colors with black, the value used for invalid palette indices.
+    This is an error according to the PNG spec, but common PNG decoders make it black instead.
+    That makes color conversion slightly faster due to no error handling needed.*/
+    info->palette[i * 4 + 0] = 0;
+    info->palette[i * 4 + 1] = 0;
+    info->palette[i * 4 + 2] = 0;
+    info->palette[i * 4 + 3] = 255;
+  }
+}
+
+void lodepng_color_mode_cleanup(LodePNGColorMode* info) {
+  lodepng_palette_clear(info);
+}
+
+unsigned lodepng_color_mode_copy(LodePNGColorMode* dest, const LodePNGColorMode* source) {
+  size_t i;
+  lodepng_color_mode_cleanup(dest);
+  *dest = *source;
+  if(source->palette) {
+    dest->palette = (unsigned char*)lodepng_malloc(1024);
+    if(!dest->palette && source->palettesize) return 83; /*alloc fail*/
+    for(i = 0; i != source->palettesize * 4; ++i) dest->palette[i] = source->palette[i];
+  }
+  return 0;
+}
+
+LodePNGColorMode lodepng_color_mode_make(LodePNGColorType colortype, unsigned bitdepth) {
+  LodePNGColorMode result;
+  lodepng_color_mode_init(&result);
+  result.colortype = colortype;
+  result.bitdepth = bitdepth;
+  return result;
+}
+
+static int lodepng_color_mode_equal(const LodePNGColorMode* a, const LodePNGColorMode* b) {
+  size_t i;
+  if(a->colortype != b->colortype) return 0;
+  if(a->bitdepth != b->bitdepth) return 0;
+  if(a->key_defined != b->key_defined) return 0;
+  if(a->key_defined) {
+    if(a->key_r != b->key_r) return 0;
+    if(a->key_g != b->key_g) return 0;
+    if(a->key_b != b->key_b) return 0;
+  }
+  if(a->palettesize != b->palettesize) return 0;
+  for(i = 0; i != a->palettesize * 4; ++i) {
+    if(a->palette[i] != b->palette[i]) return 0;
+  }
+  return 1;
+}
+
+void lodepng_palette_clear(LodePNGColorMode* info) {
+  if(info->palette) lodepng_free(info->palette);
+  info->palette = 0;
+  info->palettesize = 0;
+}
+
+unsigned lodepng_palette_add(LodePNGColorMode* info,
+                             unsigned char r, unsigned char g, unsigned char b, unsigned char a) {
+  if(!info->palette) /*allocate palette if empty*/ {
+    lodepng_color_mode_alloc_palette(info);
+    if(!info->palette) return 83; /*alloc fail*/
+  }
+  if(info->palettesize >= 256) {
+    return 108; /*too many palette values*/
+  }
+  info->palette[4 * info->palettesize + 0] = r;
+  info->palette[4 * info->palettesize + 1] = g;
+  info->palette[4 * info->palettesize + 2] = b;
+  info->palette[4 * info->palettesize + 3] = a;
+  ++info->palettesize;
+  return 0;
+}
+
+/*calculate bits per pixel out of colortype and bitdepth*/
+unsigned lodepng_get_bpp(const LodePNGColorMode* info) {
+  return lodepng_get_bpp_lct(info->colortype, info->bitdepth);
+}
+
+unsigned lodepng_get_channels(const LodePNGColorMode* info) {
+  return getNumColorChannels(info->colortype);
+}
+
+unsigned lodepng_is_greyscale_type(const LodePNGColorMode* info) {
+  return info->colortype == LCT_GREY || info->colortype == LCT_GREY_ALPHA;
+}
+
+unsigned lodepng_is_alpha_type(const LodePNGColorMode* info) {
+  return (info->colortype & 4) != 0; /*4 or 6*/
+}
+
+unsigned lodepng_is_palette_type(const LodePNGColorMode* info) {
+  return info->colortype == LCT_PALETTE;
+}
+
+unsigned lodepng_has_palette_alpha(const LodePNGColorMode* info) {
+  size_t i;
+  for(i = 0; i != info->palettesize; ++i) {
+    if(info->palette[i * 4 + 3] < 255) return 1;
+  }
+  return 0;
+}
+
+unsigned lodepng_can_have_alpha(const LodePNGColorMode* info) {
+  return info->key_defined
+      || lodepng_is_alpha_type(info)
+      || lodepng_has_palette_alpha(info);
+}
+
+static size_t lodepng_get_raw_size_lct(unsigned w, unsigned h, LodePNGColorType colortype, unsigned bitdepth) {
+  size_t bpp = lodepng_get_bpp_lct(colortype, bitdepth);
+  size_t n = (size_t)w * (size_t)h;
+  return ((n / 8u) * bpp) + ((n & 7u) * bpp + 7u) / 8u;
+}
+
+size_t lodepng_get_raw_size(unsigned w, unsigned h, const LodePNGColorMode* color) {
+  return lodepng_get_raw_size_lct(w, h, color->colortype, color->bitdepth);
+}
+
+
+#ifdef LODEPNG_COMPILE_PNG
+#ifdef LODEPNG_COMPILE_DECODER
+
+/*in an idat chunk, each scanline is a multiple of 8 bits, unlike the lodepng output buffer,
+and in addition has one extra byte per line: the filter byte. So this gives a larger
+result than lodepng_get_raw_size. */
+static size_t lodepng_get_raw_size_idat(unsigned w, unsigned h, const LodePNGColorMode* color) {
+  size_t bpp = lodepng_get_bpp(color);
+  /* + 1 for the filter byte, and possibly plus padding bits per line */
+  size_t line = ((size_t)(w / 8u) * bpp) + 1u + ((w & 7u) * bpp + 7u) / 8u;
+  return (size_t)h * line;
+}
+
+/*Safely checks whether size_t overflow can be caused due to amount of pixels.
+This check is overcautious rather than precise. If this check indicates no overflow,
+you can safely compute in a size_t (but not an unsigned):
+-(size_t)w * (size_t)h * 8
+-amount of bytes in IDAT (including filter, padding and Adam7 bytes)
+-amount of bytes in raw color model
+Returns 1 if overflow possible, 0 if not.
+*/
+static int lodepng_pixel_overflow(unsigned w, unsigned h,
+                                  const LodePNGColorMode* pngcolor, const LodePNGColorMode* rawcolor) {
+  size_t bpp = LODEPNG_MAX(lodepng_get_bpp(pngcolor), lodepng_get_bpp(rawcolor));
+  size_t numpixels, total;
+  size_t line; /* bytes per line in worst case */
+
+  if(lodepng_mulofl((size_t)w, (size_t)h, &numpixels)) return 1;
+  if(lodepng_mulofl(numpixels, 8, &total)) return 1; /* bit pointer with 8-bit color, or 8 bytes per channel color */
+
+  /* Bytes per scanline with the expression "(w / 8u) * bpp) + ((w & 7u) * bpp + 7u) / 8u" */
+  if(lodepng_mulofl((size_t)(w / 8u), bpp, &line)) return 1;
+  if(lodepng_addofl(line, ((w & 7u) * bpp + 7u) / 8u, &line)) return 1;
+
+  if(lodepng_addofl(line, 5, &line)) return 1; /* 5 bytes overhead per line: 1 filterbyte, 4 for Adam7 worst case */
+  if(lodepng_mulofl(line, h, &total)) return 1; /* Total bytes in worst case */
+
+  return 0; /* no overflow */
+}
+#endif /*LODEPNG_COMPILE_DECODER*/
+#endif /*LODEPNG_COMPILE_PNG*/
+
+#ifdef LODEPNG_COMPILE_ANCILLARY_CHUNKS
+
+static void LodePNGUnknownChunks_init(LodePNGInfo* info) {
+  unsigned i;
+  for(i = 0; i != 3; ++i) info->unknown_chunks_data[i] = 0;
+  for(i = 0; i != 3; ++i) info->unknown_chunks_size[i] = 0;
+}
+
+static void LodePNGUnknownChunks_cleanup(LodePNGInfo* info) {
+  unsigned i;
+  for(i = 0; i != 3; ++i) lodepng_free(info->unknown_chunks_data[i]);
+}
+
+static unsigned LodePNGUnknownChunks_copy(LodePNGInfo* dest, const LodePNGInfo* src) {
+  unsigned i;
+
+  LodePNGUnknownChunks_cleanup(dest);
+
+  for(i = 0; i != 3; ++i) {
+    size_t j;
+    dest->unknown_chunks_size[i] = src->unknown_chunks_size[i];
+    dest->unknown_chunks_data[i] = (unsigned char*)lodepng_malloc(src->unknown_chunks_size[i]);
+    if(!dest->unknown_chunks_data[i] && dest->unknown_chunks_size[i]) return 83; /*alloc fail*/
+    for(j = 0; j < src->unknown_chunks_size[i]; ++j) {
+      dest->unknown_chunks_data[i][j] = src->unknown_chunks_data[i][j];
+    }
+  }
+
+  return 0;
+}
+
+/******************************************************************************/
+
+static void LodePNGText_init(LodePNGInfo* info) {
+  info->text_num = 0;
+  info->text_keys = NULL;
+  info->text_strings = NULL;
+}