From 4de4200ec2717e777bbf99ed82d1b4344f078ec2 Mon Sep 17 00:00:00 2001 From: Julian Andres Klode Date: Thu, 8 Mar 2018 09:33:39 +0100 Subject: apt-pkg: Add support for zstd zstd is a compression algorithm developed by facebook. At level 19, it is about 6% worse in size than xz -6, but decompression is multiple times faster, saving about 40% install time, especially with eatmydata on cloud instances. --- CMake/FindZstd.cmake | 25 ++++++ CMake/config.h.in | 3 + CMakeLists.txt | 6 ++ apt-pkg/CMakeLists.txt | 2 + apt-pkg/aptconfiguration.cc | 8 ++ apt-pkg/contrib/fileutl.cc | 195 +++++++++++++++++++++++++++++++++++++++++++ apt-pkg/contrib/fileutl.h | 16 +++- debian/control | 1 + doc/examples/configure-index | 1 + test/integration/framework | 2 + test/libapt/fileutl_test.cc | 4 +- 11 files changed, 259 insertions(+), 4 deletions(-) create mode 100644 CMake/FindZstd.cmake diff --git a/CMake/FindZstd.cmake b/CMake/FindZstd.cmake new file mode 100644 index 000000000..68118049b --- /dev/null +++ b/CMake/FindZstd.cmake @@ -0,0 +1,25 @@ +# - Try to find ZSTD +# Once done, this will define +# +# ZSTD_FOUND - system has ZSTD +# ZSTD_INCLUDE_DIRS - the ZSTD include directories +# ZSTD_LIBRARIES - the ZSTD library +find_package(PkgConfig) + +pkg_check_modules(ZSTD_PKGCONF libzstd) + +find_path(ZSTD_INCLUDE_DIRS + NAMES zstd.h + PATHS ${ZSTD_PKGCONF_INCLUDE_DIRS} +) + + +find_library(ZSTD_LIBRARIES + NAMES zstd + PATHS ${ZSTD_PKGCONF_LIBRARY_DIRS} +) + +include(FindPackageHandleStandardArgs) +find_package_handle_standard_args(ZSTD DEFAULT_MSG ZSTD_INCLUDE_DIRS ZSTD_LIBRARIES) + +mark_as_advanced(ZSTD_INCLUDE_DIRS ZSTD_LIBRARIES) diff --git a/CMake/config.h.in b/CMake/config.h.in index cfaa14ed1..bd0da8649 100644 --- a/CMake/config.h.in +++ b/CMake/config.h.in @@ -17,6 +17,9 @@ /* Define if we have the lz4 library for lz4 */ #cmakedefine HAVE_LZ4 +/* Define if we have the zstd library for zst */ +#cmakedefine HAVE_ZSTD + /* Define if we have the udev library */ #cmakedefine HAVE_UDEV diff --git a/CMakeLists.txt b/CMakeLists.txt index b723b0ead..a79ce99d4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -103,6 +103,12 @@ if (LZ4_FOUND) set(HAVE_LZ4 1) endif() +find_package(Zstd REQUIRED) +if (ZSTD_FOUND) + set(HAVE_ZSTD 1) +endif() + + find_package(Udev) if (UDEV_FOUND) set(HAVE_UDEV 1) diff --git a/apt-pkg/CMakeLists.txt b/apt-pkg/CMakeLists.txt index 2f5ad3200..fdf27f92d 100644 --- a/apt-pkg/CMakeLists.txt +++ b/apt-pkg/CMakeLists.txt @@ -44,6 +44,7 @@ target_include_directories(apt-pkg ${BZIP2_INCLUDE_DIR} ${LZMA_INCLUDE_DIRS} ${LZ4_INCLUDE_DIRS} + ${ZSTD_INCLUDE_DIRS} $<$:${UDEV_INCLUDE_DIRS}> ${ICONV_INCLUDE_DIRS} ) @@ -55,6 +56,7 @@ target_link_libraries(apt-pkg ${BZIP2_LIBRARIES} ${LZMA_LIBRARIES} ${LZ4_LIBRARIES} + ${ZSTD_LIBRARIES} $<$:${UDEV_LIBRARIES}> ${ICONV_LIBRARIES} ) diff --git a/apt-pkg/aptconfiguration.cc b/apt-pkg/aptconfiguration.cc index 0421ea949..61e53ec3a 100644 --- a/apt-pkg/aptconfiguration.cc +++ b/apt-pkg/aptconfiguration.cc @@ -39,6 +39,7 @@ static void setDefaultConfigurationForCompressors() { _config->CndSet("Dir::Bin::bzip2", "/bin/bzip2"); _config->CndSet("Dir::Bin::xz", "/usr/bin/xz"); _config->CndSet("Dir::Bin::lz4", "/usr/bin/lz4"); + _config->CndSet("Dir::Bin::zstd", "/usr/bin/zstd"); if (FileExists(_config->Find("Dir::Bin::xz")) == true) { _config->Set("Dir::Bin::lzma", _config->Find("Dir::Bin::xz")); _config->Set("APT::Compressor::lzma::Binary", "xz"); @@ -67,6 +68,7 @@ static void setDefaultConfigurationForCompressors() { _config->CndSet("Acquire::CompressionTypes::lzma","lzma"); _config->CndSet("Acquire::CompressionTypes::gz","gzip"); _config->CndSet("Acquire::CompressionTypes::lz4","lz4"); + _config->CndSet("Acquire::CompressionTypes::zst", "zstd"); } /*}}}*/ // getCompressionTypes - Return Vector of usable compressiontypes /*{{{*/ @@ -369,6 +371,12 @@ const Configuration::getCompressors(bool const Cached) { # define APT_ADD_COMPRESSOR(NAME, EXT, BINARY, ARG, DEARG, COST) \ { CompressorsDone.push_back(NAME); compressors.emplace_back(NAME, EXT, BINARY, ARG, DEARG, COST); } APT_ADD_COMPRESSOR(".", "", "", nullptr, nullptr, 0) + if (_config->Exists("Dir::Bin::zstd") == false || FileExists(_config->Find("Dir::Bin::zstd")) == true) + APT_ADD_COMPRESSOR("zstd", ".zst", "zstd", "-19", "-d", 60) +#ifdef HAVE_ZSTD + else + APT_ADD_COMPRESSOR("zstd", ".zst", "false", nullptr, nullptr, 60) +#endif if (_config->Exists("Dir::Bin::lz4") == false || FileExists(_config->Find("Dir::Bin::lz4")) == true) APT_ADD_COMPRESSOR("lz4",".lz4","lz4","-1","-d",50) #ifdef HAVE_LZ4 diff --git a/apt-pkg/contrib/fileutl.cc b/apt-pkg/contrib/fileutl.cc index f8f7a478c..fc14422d6 100644 --- a/apt-pkg/contrib/fileutl.cc +++ b/apt-pkg/contrib/fileutl.cc @@ -67,6 +67,9 @@ #ifdef HAVE_LZ4 #include #endif +#ifdef HAVE_ZSTD +#include +#endif #include #include @@ -1714,6 +1717,193 @@ public: InternalClose(""); } #endif +}; + /*}}}*/ + +class APT_HIDDEN ZstdFileFdPrivate : public FileFdPrivate +{ /*{{{*/ +#ifdef HAVE_ZSTD + ZSTD_DStream *dctx; + ZSTD_CStream *cctx; + size_t res; + FileFd backend; + simple_buffer zstd_buffer; + // Count of bytes that the decompressor expects to read next, or buffer size. + size_t next_to_load = APT_BUFFER_SIZE; + + public: + virtual bool InternalOpen(int const iFd, unsigned int const Mode) APT_OVERRIDE + { + if ((Mode & FileFd::ReadWrite) == FileFd::ReadWrite) + return _error->Error("zstd only supports write or read mode"); + + if ((Mode & FileFd::WriteOnly) == FileFd::WriteOnly) + { + cctx = ZSTD_createCStream(); + res = ZSTD_initCStream(cctx, 19); + zstd_buffer.reset(APT_BUFFER_SIZE); + } + else + { + dctx = ZSTD_createDStream(); + res = ZSTD_initDStream(dctx); + zstd_buffer.reset(APT_BUFFER_SIZE); + } + + filefd->Flags |= FileFd::Compressed; + + if (ZSTD_isError(res)) + return false; + + unsigned int flags = (Mode & (FileFd::WriteOnly | FileFd::ReadOnly)); + if (backend.OpenDescriptor(iFd, flags, FileFd::None, true) == false) + return false; + + return true; + } + virtual ssize_t InternalUnbufferedRead(void *const To, unsigned long long const Size) APT_OVERRIDE + { + /* Keep reading as long as the compressor still wants to read */ + while (next_to_load) + { + // Fill compressed buffer; + if (zstd_buffer.empty()) + { + unsigned long long read; + /* Reset - if LZ4 decompressor wants to read more, allocate more */ + zstd_buffer.reset(next_to_load); + if (backend.Read(zstd_buffer.getend(), zstd_buffer.free(), &read) == false) + return -1; + zstd_buffer.bufferend += read; + + /* Expected EOF */ + if (read == 0) + { + res = -1; + return filefd->FileFdError("ZSTD: %s %s", + filefd->FileName.c_str(), + _("Unexpected end of file")), + -1; + } + } + // Drain compressed buffer as far as possible. + ZSTD_inBuffer in = { + .src = zstd_buffer.get(), + .size = zstd_buffer.size(), + .pos = 0, + }; + ZSTD_outBuffer out = { + .dst = To, + .size = Size, + .pos = 0, + }; + + res = ZSTD_decompressStream(dctx, &out, &in); + if (ZSTD_isError(res)) + return -1; + + next_to_load = res; + zstd_buffer.bufferstart += in.pos; + + if (out.pos != 0) + return out.pos; + } + + return 0; + } + virtual bool InternalReadError() APT_OVERRIDE + { + char const *const errmsg = ZSTD_getErrorName(res); + + return filefd->FileFdError("ZSTD: %s %s (%zu: %s)", filefd->FileName.c_str(), _("Read error"), res, errmsg); + } + virtual ssize_t InternalWrite(void const *const From, unsigned long long const Size) APT_OVERRIDE + { + // Drain compressed buffer as far as possible. + ZSTD_outBuffer out = { + .dst = zstd_buffer.buffer, + .size = zstd_buffer.buffersize_max, + .pos = 0, + }; + ZSTD_inBuffer in = { + .src = From, + .size = Size, + .pos = 0, + }; + + res = ZSTD_compressStream(cctx, &out, &in); + + if (ZSTD_isError(res) || backend.Write(zstd_buffer.buffer, out.pos) == false) + return -1; + + return in.pos; + } + + virtual bool InternalWriteError() APT_OVERRIDE + { + char const *const errmsg = ZSTD_getErrorName(res); + + return filefd->FileFdError("ZSTD: %s %s (%zu: %s)", filefd->FileName.c_str(), _("Write error"), res, errmsg); + } + virtual bool InternalStream() const APT_OVERRIDE { return true; } + + virtual bool InternalFlush() APT_OVERRIDE + { + return backend.Flush(); + } + + virtual bool InternalClose(std::string const &) APT_OVERRIDE + { + /* Reset variables */ + res = 0; + next_to_load = APT_BUFFER_SIZE; + + if (cctx != nullptr) + { + if (filefd->Failed() == false) + { + do + { + ZSTD_outBuffer out = { + .dst = zstd_buffer.buffer, + .size = zstd_buffer.buffersize_max, + .pos = 0, + }; + res = ZSTD_endStream(cctx, &out); + if (ZSTD_isError(res) || backend.Write(zstd_buffer.buffer, out.pos) == false) + return false; + } while (res > 0); + + if (!backend.Flush()) + return false; + } + if (!backend.Close()) + return false; + + res = ZSTD_freeCStream(cctx); + cctx = nullptr; + } + + if (dctx != nullptr) + { + res = ZSTD_freeDStream(dctx); + dctx = nullptr; + } + if (backend.IsOpen()) + { + backend.Close(); + filefd->iFd = -1; + } + + return ZSTD_isError(res) == false; + } + + explicit ZstdFileFdPrivate(FileFd *const filefd) : FileFdPrivate(filefd), dctx(nullptr), cctx(nullptr) {} + virtual ~ZstdFileFdPrivate() + { + InternalClose(""); + } +#endif }; /*}}}*/ class APT_HIDDEN LzmaFileFdPrivate: public FileFdPrivate { /*{{{*/ @@ -2212,6 +2402,7 @@ bool FileFd::Open(string FileName,unsigned int const Mode,CompressMode Compress, case Lzma: name = "lzma"; break; case Xz: name = "xz"; break; case Lz4: name = "lz4"; break; + case Zstd: name = "zstd"; break; case Auto: case Extension: // Unreachable @@ -2329,6 +2520,7 @@ bool FileFd::OpenDescriptor(int Fd, unsigned int const Mode, CompressMode Compre case Lzma: name = "lzma"; break; case Xz: name = "xz"; break; case Lz4: name = "lz4"; break; + case Zstd: name = "zstd"; break; case Auto: case Extension: if (AutoClose == true && Fd != -1) @@ -2393,6 +2585,9 @@ bool FileFd::OpenInternDescriptor(unsigned int const Mode, APT::Configuration::C #ifdef HAVE_LZ4 APT_COMPRESS_INIT("lz4", Lz4FileFdPrivate); #endif +#ifdef HAVE_ZSTD + APT_COMPRESS_INIT("zstd", ZstdFileFdPrivate); +#endif #undef APT_COMPRESS_INIT else if (compressor.Name == "." || compressor.Binary.empty() == true) d = new DirectFileFdPrivate(this); diff --git a/apt-pkg/contrib/fileutl.h b/apt-pkg/contrib/fileutl.h index 699b8b802..6249b7c16 100644 --- a/apt-pkg/contrib/fileutl.h +++ b/apt-pkg/contrib/fileutl.h @@ -46,6 +46,7 @@ class FileFd friend class Bz2FileFdPrivate; friend class LzmaFileFdPrivate; friend class Lz4FileFdPrivate; + friend class ZstdFileFdPrivate; friend class DirectFileFdPrivate; friend class PipedFileFdPrivate; protected: @@ -76,8 +77,19 @@ class FileFd ReadOnlyGzip, WriteAtomic = ReadWrite | Create | Atomic }; - enum CompressMode { Auto = 'A', None = 'N', Extension = 'E', Gzip = 'G', Bzip2 = 'B', Lzma = 'L', Xz = 'X', Lz4='4' }; - + enum CompressMode + { + Auto = 'A', + None = 'N', + Extension = 'E', + Gzip = 'G', + Bzip2 = 'B', + Lzma = 'L', + Xz = 'X', + Lz4 = '4', + Zstd = 'Z' + }; + inline bool Read(void *To,unsigned long long Size,bool AllowEof) { unsigned long long Jnk; diff --git a/debian/control b/debian/control index a9063eb13..db1a6aebe 100644 --- a/debian/control +++ b/debian/control @@ -23,6 +23,7 @@ Build-Depends: cmake (>= 3.4), liblzma-dev, libseccomp-dev [amd64 arm64 armel armhf i386 mips mips64el mipsel ppc64el s390x hppa powerpc powerpcspe ppc64 x32], libudev-dev [linux-any], + libzstd-dev (>= 1.0), pkg-config, po4a (>= 0.34-2), xsltproc, diff --git a/doc/examples/configure-index b/doc/examples/configure-index index 3763aa900..4612f362e 100644 --- a/doc/examples/configure-index +++ b/doc/examples/configure-index @@ -416,6 +416,7 @@ Dir "" dpkg-source ""; dpkg-buildpackage "/usr/bin/dpkg-buildpackage"; lz4 ""; + zstd ""; gzip ""; xz ""; bzip2 ""; diff --git a/test/integration/framework b/test/integration/framework index cf0a02de3..739098320 100644 --- a/test/integration/framework +++ b/test/integration/framework @@ -624,6 +624,7 @@ configcompression() { '.') printf ".\t.\tcat\n";; 'gz') printf "gzip\tgz\t$CMD $1\n";; 'bz2') printf "bzip2\tbz2\t$CMD $1\n";; + 'zst') printf "zstd\tzst\t$CMD $1\n";; *) printf "$1\t$1\t$CMD $1\n";; esac shift @@ -652,6 +653,7 @@ forcecompressor() { case $COMPRESSOR in gzip) COMPRESS='gz';; bzip2) COMPRESS='bz2';; + zstd) COMPRESS='zst';; esac local CONFFILE="${TMPWORKINGDIRECTORY}/rootdir/etc/apt/apt.conf.d/00force-compressor" echo "Acquire::CompressionTypes::Order { \"${COMPRESS}\"; }; diff --git a/test/libapt/fileutl_test.cc b/test/libapt/fileutl_test.cc index a702c16ec..38536f77f 100644 --- a/test/libapt/fileutl_test.cc +++ b/test/libapt/fileutl_test.cc @@ -180,7 +180,7 @@ static void TestFileFd(mode_t const a_umask, mode_t const ExpectedFilePermission static void TestFileFd(unsigned int const filemode) { auto const compressors = APT::Configuration::getCompressors(); - EXPECT_EQ(7, compressors.size()); + EXPECT_EQ(8, compressors.size()); bool atLeastOneWasTested = false; for (auto const &c: compressors) { @@ -204,7 +204,7 @@ TEST(FileUtlTest, FileFD) _config->Set("APT::Compressor::rev::Binary", "rev"); _config->Set("APT::Compressor::rev::Cost", 10); auto const compressors = APT::Configuration::getCompressors(false); - EXPECT_EQ(7, compressors.size()); + EXPECT_EQ(8, compressors.size()); EXPECT_TRUE(std::any_of(compressors.begin(), compressors.end(), [](APT::Configuration::Compressor const &c) { return c.Name == "rev"; })); std::string const startdir = SafeGetCWD(); -- cgit v1.2.3