From c262653cc21b4302c3fc5f724d254f68bfca84b7 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Wed, 4 Nov 2009 23:44:15 +0100 Subject: add a debug test mode to the rred method for easier testing --- methods/rred.cc | 74 ++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 58 insertions(+), 16 deletions(-) diff --git a/methods/rred.cc b/methods/rred.cc index 27d95bdde..3d4b37e83 100644 --- a/methods/rred.cc +++ b/methods/rred.cc @@ -37,6 +37,8 @@ class RredMethod : public pkgAcqMethod char *buffer, unsigned int bufsize, Hashes *hash); // apply a patch file int ed_file(FILE *ed_cmds, FILE *in_file, FILE *out_file, Hashes *hash); + +protected: // the methods main method virtual bool Fetch(FetchItem *Itm); @@ -186,17 +188,19 @@ int RredMethod::ed_file(FILE *ed_cmds, FILE *in_file, FILE *out_file, return ED_OK; } - bool RredMethod::Fetch(FetchItem *Itm) { - Debug = _config->FindB("Debug::pkgAcquire::RRed",false); + Debug = _config->FindB("Debug::pkgAcquire::RRed", false); URI Get = Itm->Uri; string Path = Get.Host + Get.Path; // To account for relative paths - // Path contains the filename to patch + FetchResult Res; Res.Filename = Itm->DestFile; - URIStart(Res); - // Res.Filename the destination filename + if (Itm->Uri.empty() == true) { + Path = Itm->DestFile; + Itm->DestFile.append(".result"); + } else + URIStart(Res); if (Debug == true) std::clog << "Patching " << Path << " with " << Path @@ -243,20 +247,58 @@ bool RredMethod::Fetch(FetchItem *Itm) return _error->Errno("stat",_("Failed to stat")); // return done - Res.LastModified = Buf.st_mtime; - Res.Size = Buf.st_size; - Res.TakeHashes(Hash); - URIDone(Res); + if (Itm->Uri.empty() == true) { + Res.LastModified = Buf.st_mtime; + Res.Size = Buf.st_size; + Res.TakeHashes(Hash); + URIDone(Res); + } return true; } -int main(int argc, char *argv[]) -{ - RredMethod Mth; +/** + * \brief Wrapper class for testing rred + */ +class TestRredMethod : public RredMethod { +public: + /** \brief Run rred in debug test mode + * + * This method can be used to run the rred method outside + * of the "normal" acquire environment for easier testing. + * + * \param base basename of all files involved in this rred test + */ + bool Run(char const *base) { + _config->CndSet("Debug::pkgAcquire::RRed", "true"); + FetchItem *test = new FetchItem; + test->DestFile = base; + return Fetch(test); + } +}; - Prog = strrchr(argv[0],'/'); - Prog++; - - return Mth.Run(); +/** + * \brief Starter for the rred method (or its test method) + * + * Used without parameters is the normal behavior for methods for + * the APT acquire system. While this works great for the acquire system + * it is very hard to test the method and therefore the method also + * accepts one parameter which will switch it directly to debug test mode: + * The test mode expects that if "Testfile" is given as parameter + * the file "Testfile" should be ed-style patched with "Testfile.ed" + * and will write the result to "Testfile.result". + */ +int main(int argc, char *argv[]) { + Prog = strrchr(argv[0],'/'); + Prog++; + + if (argc == 0) { + RredMethod Mth; + return Mth.Run(); + } else { + TestRredMethod Mth; + bool result = Mth.Run(argv[1]); + _error->DumpErrors(); + return result; + } } -- cgit v1.2.3 From c7139d8c8c04e69150ab975c0e2698d05ed6a298 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Thu, 5 Nov 2009 02:05:21 +0100 Subject: rewrite and refactor rred method to be able to handle even big (>30 MB) patches (Closes: #554349) and hardening the method itself by using more constants and a return value which can't be misinterpreted as linenumber --- methods/rred.cc | 358 +++++++++++++++++++++++++++++--------------------------- 1 file changed, 187 insertions(+), 171 deletions(-) diff --git a/methods/rred.cc b/methods/rred.cc index 3d4b37e83..2d4dd768b 100644 --- a/methods/rred.cc +++ b/methods/rred.cc @@ -1,3 +1,4 @@ +// Includes /*{{{*/ #include #include #include @@ -10,185 +11,205 @@ #include #include #include - -/* this method implements a patch functionality similar to "patch --ed" that is - * used by the "tiffany" incremental packages download stuff. it differs from - * "ed" insofar that it is way more restricted (and therefore secure). in the - * moment only the "c", "a" and "d" commands of ed are implemented (diff - * doesn't output any other). additionally the records must be reverse sorted - * by line number and may not overlap (diff *seems* to produce this kind of - * output). + /*}}}*/ +/** \brief RredMethod - ed-style incremential patch method {{{ + * + * This method implements a patch functionality similar to "patch --ed" that is + * used by the "tiffany" incremental packages download stuff. It differs from + * "ed" insofar that it is way more restricted (and therefore secure). + * The currently supported ed commands are "change", "add" and + * "delete" (diff doesn't output any other). + * Additionally the records must be reverse sorted by line number and + * may not overlap (diff *seems* to produce this kind of output). * */ +class RredMethod : public pkgAcqMethod { + bool Debug; + // the size of this doesn't really matter (except for performance) + const static int BUF_SIZE = 1024; + // the supported ed commands + enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'}; + // return values + enum State {ED_OK=0, ED_ORDERING=1, ED_PARSER=2, ED_FAILURE=3}; -const char *Prog; + State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, + unsigned long &line, char *buffer, Hashes *hash) const; + void ignoreLineInFile(FILE *fin, char *buffer) const; + void copyLineFromFileToFile(FILE *fin, FILE *fout, + Hashes *hash, char *buffer) const; -class RredMethod : public pkgAcqMethod -{ - bool Debug; - // the size of this doesn't really matter (except for performance) - const static int BUF_SIZE = 1024; - // the ed commands - enum Mode {MODE_CHANGED, MODE_DELETED, MODE_ADDED}; - // return values - enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE}; - // this applies a single hunk, it uses a tail recursion to - // reverse the hunks in the file - int ed_rec(FILE *ed_cmds, FILE *in_file, FILE *out_file, int line, - char *buffer, unsigned int bufsize, Hashes *hash); - // apply a patch file - int ed_file(FILE *ed_cmds, FILE *in_file, FILE *out_file, Hashes *hash); + State patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, Hashes *hash) const; protected: - // the methods main method - virtual bool Fetch(FetchItem *Itm); - - public: - - RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {}; + // the methods main method + virtual bool Fetch(FetchItem *Itm); + +public: + RredMethod() : pkgAcqMethod("1.1",SingleInstance | SendConfig) {}; }; + /*}}}*/ +/** \brief applyFile - in reverse order with a tail recursion {{{ + * + * As it is expected that the commands are in reversed order in the patch file + * we check in the first half if the command is valid, but doesn't execute it + * and move a step deeper. After reaching the end of the file we apply the + * patches in the correct order: last found command first. + * + * \param ed_cmds patch file to apply + * \param in_file base file we want to patch + * \param out_file file to write the patched result to + * \param line of command operation + * \param buffer internal used read/write buffer + * \param hash the created file for correctness + * \return the success State of the ed command executor + */ +RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, + unsigned long &line, char *buffer, Hashes *hash) const { + // get the current command and parse it + if (fgets(buffer, BUF_SIZE, ed_cmds) == NULL) { + if (Debug == true) + std::clog << "rred: encounter end of file - we can start patching now."; + line = 0; + return ED_OK; + } -int RredMethod::ed_rec(FILE *ed_cmds, FILE *in_file, FILE *out_file, int line, - char *buffer, unsigned int bufsize, Hashes *hash) { - int pos; - int startline; - int stopline; - int mode; - int written; - char *idx; + // parse in the effected linenumbers + char* idx; + errno=0; + unsigned long const startline = strtol(buffer, &idx, 10); + if (errno == ERANGE || errno == EINVAL) { + _error->Errno("rred", "startline is an invalid number"); + return ED_PARSER; + } + if (startline > line) { + _error->Error("rred: The start line (%lu) of the next command is higher than the last line (%lu). This is not allowed.", startline, line); + return ED_ORDERING; + } + unsigned long stopline; + if (*idx == ',') { + idx++; + errno=0; + stopline = strtol(idx, &idx, 10); + if (errno == ERANGE || errno == EINVAL) { + _error->Errno("rred", "stopline is an invalid number"); + return ED_PARSER; + } + } + else { + stopline = startline; + } + line = startline; - /* get the current command and parse it*/ - if (fgets(buffer, bufsize, ed_cmds) == NULL) { - return line; - } - startline = strtol(buffer, &idx, 10); - if (startline < line) { - return ED_ORDERING; - } - if (*idx == ',') { - idx++; - stopline = strtol(idx, &idx, 10); - } - else { - stopline = startline; - } - if (*idx == 'c') { - mode = MODE_CHANGED; - if (Debug == true) { - std::clog << "changing from line " << startline - << " to " << stopline << std::endl; - } - } - else if (*idx == 'a') { - mode = MODE_ADDED; - if (Debug == true) { - std::clog << "adding after line " << startline << std::endl; - } - } - else if (*idx == 'd') { - mode = MODE_DELETED; - if (Debug == true) { - std::clog << "deleting from line " << startline - << " to " << stopline << std::endl; - } - } - else { - return ED_PARSER; - } - /* get the current position */ - pos = ftell(ed_cmds); - /* if this is add or change then go to the next full stop */ - if ((mode == MODE_CHANGED) || (mode == MODE_ADDED)) { - do { - fgets(buffer, bufsize, ed_cmds); - while ((strlen(buffer) == (bufsize - 1)) - && (buffer[bufsize - 2] != '\n')) { - fgets(buffer, bufsize, ed_cmds); - buffer[0] = ' '; - } - } while (strncmp(buffer, ".", 1) != 0); - } - /* do the recursive call */ - line = ed_rec(ed_cmds, in_file, out_file, line, buffer, bufsize, - hash); - /* pass on errors */ - if (line < 0) { - return line; - } - /* apply our hunk */ - fseek(ed_cmds, pos, SEEK_SET); - /* first wind to the current position */ - if (mode != MODE_ADDED) { - startline -= 1; - } - while (line < startline) { - fgets(buffer, bufsize, in_file); - written = fwrite(buffer, 1, strlen(buffer), out_file); - hash->Add((unsigned char*)buffer, written); - while ((strlen(buffer) == (bufsize - 1)) - && (buffer[bufsize - 2] != '\n')) { - fgets(buffer, bufsize, in_file); - written = fwrite(buffer, 1, strlen(buffer), out_file); - hash->Add((unsigned char*)buffer, written); - } - line++; - } - /* include from ed script */ - if ((mode == MODE_ADDED) || (mode == MODE_CHANGED)) { - do { - fgets(buffer, bufsize, ed_cmds); - if (strncmp(buffer, ".", 1) != 0) { - written = fwrite(buffer, 1, strlen(buffer), out_file); - hash->Add((unsigned char*)buffer, written); - while ((strlen(buffer) == (bufsize - 1)) - && (buffer[bufsize - 2] != '\n')) { - fgets(buffer, bufsize, ed_cmds); - written = fwrite(buffer, 1, strlen(buffer), out_file); - hash->Add((unsigned char*)buffer, written); - } - } - else { - break; - } - } while (1); - } - /* ignore the corresponding number of lines from input */ - if ((mode == MODE_DELETED) || (mode == MODE_CHANGED)) { - while (line < stopline) { - fgets(buffer, bufsize, in_file); - while ((strlen(buffer) == (bufsize - 1)) - && (buffer[bufsize - 2] != '\n')) { - fgets(buffer, bufsize, in_file); - } - line++; - } - } - return line; -} + // which command to execute on this line(s)? + switch (*idx) { + case MODE_CHANGED: + if (Debug == true) + std::clog << "Change from line " << startline << " to " << stopline << std::endl; + break; + case MODE_ADDED: + if (Debug == true) + std::clog << "Insert after line " << startline << std::endl; + break; + case MODE_DELETED: + if (Debug == true) + std::clog << "Delete from line " << startline << " to " << stopline << std::endl; + break; + default: + _error->Error("rred: Unknown ed command '%c'. Abort.", *idx); + return ED_PARSER; + } + unsigned char mode = *idx; + + // save the current position + unsigned const long pos = ftell(ed_cmds); + + // if this is add or change then go to the next full stop + if (mode == MODE_CHANGED || mode == MODE_ADDED) { + do + ignoreLineInFile(ed_cmds, buffer); + while (strncmp(buffer, ".", 1) != 0); + } + + // do the recursive call - the last command is the one we need to execute at first + const State child = applyFile(ed_cmds, in_file, out_file, line, buffer, hash); + if (child != ED_OK) { + return child; + } + + // change and delete are working on "line" - add is done after "line" + if (mode != MODE_ADDED) + line++; + + // first wind to the current position and copy over all unchanged lines + while (line < startline) { + fgets(buffer, BUF_SIZE, in_file); + copyLineFromFileToFile(in_file, out_file, hash, buffer); + line++; + } + + if (mode != MODE_ADDED) + line--; -int RredMethod::ed_file(FILE *ed_cmds, FILE *in_file, FILE *out_file, - Hashes *hash) { + // include data from ed script + if (mode == MODE_CHANGED || mode == MODE_ADDED) { + fseek(ed_cmds, pos, SEEK_SET); + while(fgets(buffer, BUF_SIZE, ed_cmds) != NULL) { + if (strncmp(buffer, ".", 1) != 0) + copyLineFromFileToFile(ed_cmds, out_file, hash, buffer); + else + break; + } + } + + // ignore the corresponding number of lines from input + if (mode == MODE_CHANGED || mode == MODE_DELETED) { + while (line < stopline) { + ignoreLineInFile(in_file, buffer); + line++; + } + } + return ED_OK; +} + /*}}}*/ +void RredMethod::copyLineFromFileToFile(FILE *fin, FILE *fout, /*{{{*/ + Hashes *hash, char *buffer) const { + size_t written = fwrite(buffer, 1, strlen(buffer), fout); + hash->Add((unsigned char*)buffer, written); + while (strlen(buffer) == (BUF_SIZE - 1) && + buffer[BUF_SIZE - 2] != '\n') { + fgets(buffer, BUF_SIZE, fin); + written = fwrite(buffer, 1, strlen(buffer), fout); + hash->Add((unsigned char*)buffer, written); + } +} + /*}}}*/ +void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/ + fgets(buffer, BUF_SIZE, fin); + while (strlen(buffer) == (BUF_SIZE - 1) && + buffer[BUF_SIZE - 2] != '\n') { + fgets(buffer, BUF_SIZE, fin); + buffer[0] = ' '; + } +} + /*}}}*/ +RredMethod::State RredMethod::patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, /*{{{*/ + Hashes *hash) const { char buffer[BUF_SIZE]; - int result; - int written; /* we do a tail recursion to read the commands in the right order */ - result = ed_rec(ed_cmds, in_file, out_file, 0, buffer, BUF_SIZE, - hash); + unsigned long line = -1; // assign highest possible value + State result = applyFile(ed_cmds, in_file, out_file, line, buffer, hash); /* read the rest from infile */ - if (result >= 0) { + if (result == ED_OK) { while (fgets(buffer, BUF_SIZE, in_file) != NULL) { - written = fwrite(buffer, 1, strlen(buffer), out_file); + size_t const written = fwrite(buffer, 1, strlen(buffer), out_file); hash->Add((unsigned char*)buffer, written); } } - else { - return ED_FAILURE; - } - return ED_OK; + return result; } - -bool RredMethod::Fetch(FetchItem *Itm) + /*}}}*/ +bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/ { Debug = _config->FindB("Debug::pkgAcquire::RRed", false); URI Get = Itm->Uri; @@ -219,7 +240,7 @@ bool RredMethod::Fetch(FetchItem *Itm) FILE* fPatch = fdopen(Patch.Fd(), "r"); FILE* fTo = fdopen(To.Fd(), "w"); // now do the actual patching - if (ed_file(fPatch, fFrom, fTo, &Hash) != ED_OK) { + if (patchFile(fPatch, fFrom, fTo, &Hash) != ED_OK) { _error->Errno("rred", _("Could not patch file")); return false; } @@ -256,10 +277,8 @@ bool RredMethod::Fetch(FetchItem *Itm) return true; } - -/** - * \brief Wrapper class for testing rred - */ + /*}}}*/ +/** \brief Wrapper class for testing rred */ /*{{{*/ class TestRredMethod : public RredMethod { public: /** \brief Run rred in debug test mode @@ -276,9 +295,8 @@ public: return Fetch(test); } }; - -/** - * \brief Starter for the rred method (or its test method) + /*}}}*/ +/** \brief Starter for the rred method (or its test method) {{{ * * Used without parameters is the normal behavior for methods for * the APT acquire system. While this works great for the acquire system @@ -289,9 +307,6 @@ public: * and will write the result to "Testfile.result". */ int main(int argc, char *argv[]) { - Prog = strrchr(argv[0],'/'); - Prog++; - if (argc == 0) { RredMethod Mth; return Mth.Run(); @@ -302,3 +317,4 @@ int main(int argc, char *argv[]) { return result; } } + /*}}}*/ -- cgit v1.2.3 From 66093f029b4dd94e109c8cc7d9d4bfbc6f0f4e90 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Thu, 5 Nov 2009 11:53:01 +0100 Subject: copyLinesFromFileToFile instead of a single Line in the rred method by saving the length of the data we need to copy into the out file --- methods/rred.cc | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/methods/rred.cc b/methods/rred.cc index 2d4dd768b..9abb1b89c 100644 --- a/methods/rred.cc +++ b/methods/rred.cc @@ -34,7 +34,7 @@ class RredMethod : public pkgAcqMethod { State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, unsigned long &line, char *buffer, Hashes *hash) const; void ignoreLineInFile(FILE *fin, char *buffer) const; - void copyLineFromFileToFile(FILE *fin, FILE *fout, + void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines, Hashes *hash, char *buffer) const; State patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, Hashes *hash) const; @@ -123,10 +123,14 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_ unsigned const long pos = ftell(ed_cmds); // if this is add or change then go to the next full stop + unsigned int data_length = 0; if (mode == MODE_CHANGED || mode == MODE_ADDED) { - do + do { ignoreLineInFile(ed_cmds, buffer); + data_length++; + } while (strncmp(buffer, ".", 1) != 0); + data_length--; // the dot should not be copied } // do the recursive call - the last command is the one we need to execute at first @@ -140,10 +144,9 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_ line++; // first wind to the current position and copy over all unchanged lines - while (line < startline) { - fgets(buffer, BUF_SIZE, in_file); - copyLineFromFileToFile(in_file, out_file, hash, buffer); - line++; + if (line < startline) { + copyLinesFromFileToFile(in_file, out_file, (startline - line), hash, buffer); + line = startline; } if (mode != MODE_ADDED) @@ -152,12 +155,7 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_ // include data from ed script if (mode == MODE_CHANGED || mode == MODE_ADDED) { fseek(ed_cmds, pos, SEEK_SET); - while(fgets(buffer, BUF_SIZE, ed_cmds) != NULL) { - if (strncmp(buffer, ".", 1) != 0) - copyLineFromFileToFile(ed_cmds, out_file, hash, buffer); - else - break; - } + copyLinesFromFileToFile(ed_cmds, out_file, data_length, hash, buffer); } // ignore the corresponding number of lines from input @@ -170,15 +168,15 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_ return ED_OK; } /*}}}*/ -void RredMethod::copyLineFromFileToFile(FILE *fin, FILE *fout, /*{{{*/ +void RredMethod::copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,/*{{{*/ Hashes *hash, char *buffer) const { - size_t written = fwrite(buffer, 1, strlen(buffer), fout); - hash->Add((unsigned char*)buffer, written); - while (strlen(buffer) == (BUF_SIZE - 1) && - buffer[BUF_SIZE - 2] != '\n') { - fgets(buffer, BUF_SIZE, fin); - written = fwrite(buffer, 1, strlen(buffer), fout); - hash->Add((unsigned char*)buffer, written); + while (0 < lines--) { + do { + fgets(buffer, BUF_SIZE, fin); + size_t const written = fwrite(buffer, 1, strlen(buffer), fout); + hash->Add((unsigned char*)buffer, written); + } while (strlen(buffer) == (BUF_SIZE - 1) && + buffer[BUF_SIZE - 2] != '\n'); } } /*}}}*/ -- cgit v1.2.3 From 1a37c619614a9260968f1703d7c6aae5a9a139f7 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Fri, 6 Nov 2009 09:43:31 +0100 Subject: Finally adope the patch from Morten Hustveit to be able to optional use mmaps and iovec to increase patch speed - but as this increase memory usage we can always fall back to the "old" method which doesn't relay on mmaps. --- methods/rred.cc | 261 ++++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 243 insertions(+), 18 deletions(-) diff --git a/methods/rred.cc b/methods/rred.cc index 9abb1b89c..7236efd03 100644 --- a/methods/rred.cc +++ b/methods/rred.cc @@ -1,11 +1,13 @@ // Includes /*{{{*/ #include +#include #include #include #include #include #include +#include #include #include #include @@ -29,7 +31,7 @@ class RredMethod : public pkgAcqMethod { // the supported ed commands enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'}; // return values - enum State {ED_OK=0, ED_ORDERING=1, ED_PARSER=2, ED_FAILURE=3}; + enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED}; State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, unsigned long &line, char *buffer, Hashes *hash) const; @@ -37,7 +39,8 @@ class RredMethod : public pkgAcqMethod { void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines, Hashes *hash, char *buffer) const; - State patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, Hashes *hash) const; + State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const; + State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const; protected: // the methods main method @@ -67,7 +70,7 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_ // get the current command and parse it if (fgets(buffer, BUF_SIZE, ed_cmds) == NULL) { if (Debug == true) - std::clog << "rred: encounter end of file - we can start patching now."; + std::clog << "rred: encounter end of file - we can start patching now." << std::endl; line = 0; return ED_OK; } @@ -189,22 +192,240 @@ void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/ } } /*}}}*/ -RredMethod::State RredMethod::patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, /*{{{*/ - Hashes *hash) const { +RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/ + FileFd &out_file, Hashes *hash) const { char buffer[BUF_SIZE]; - + FILE* fFrom = fdopen(From.Fd(), "r"); + FILE* fPatch = fdopen(Patch.Fd(), "r"); + FILE* fTo = fdopen(out_file.Fd(), "w"); + /* we do a tail recursion to read the commands in the right order */ unsigned long line = -1; // assign highest possible value - State result = applyFile(ed_cmds, in_file, out_file, line, buffer, hash); + State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash); /* read the rest from infile */ if (result == ED_OK) { - while (fgets(buffer, BUF_SIZE, in_file) != NULL) { - size_t const written = fwrite(buffer, 1, strlen(buffer), out_file); + while (fgets(buffer, BUF_SIZE, fFrom) != NULL) { + size_t const written = fwrite(buffer, 1, strlen(buffer), fTo); hash->Add((unsigned char*)buffer, written); } + fflush(fTo); } return result; +} + /*}}}*/ +struct EdCommand { /*{{{*/ + size_t data_start; + size_t data_end; + size_t data_lines; + size_t first_line; + size_t last_line; + char type; +}; +#define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */ + /*}}}*/ +RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/ + FileFd &out_file, Hashes *hash) const { +#ifdef _POSIX_MAPPED_FILES + MMap ed_cmds(Patch, MMap::ReadOnly); + MMap in_file(From, MMap::ReadOnly); + FILE* fTo = fdopen(out_file.Fd(), "w"); + + if (ed_cmds.Size() == 0 || in_file.Size() == 0) + return MMAP_FAILED; + + EdCommand* commands = 0; + size_t command_count = 0; + size_t command_alloc = 0; + + const char* begin = (char*) ed_cmds.Data(); + const char* end = begin; + const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size(); + + const char* input = (char*) in_file.Data(); + const char* input_end = (char*) in_file.Data() + in_file.Size(); + + size_t i; + + /* 1. Parse entire script. It is executed in reverse order, so we cather it + * in the `commands' buffer first + */ + + for(;;) { + EdCommand cmd; + cmd.data_start = 0; + cmd.data_end = 0; + + while(begin != ed_end && *begin == '\n') + ++begin; + while(end != ed_end && *end != '\n') + ++end; + if(end == ed_end && begin == end) + break; + + /* Determine command range */ + const char* tmp = begin; + + for(;;) { + /* atoll is safe despite lacking NUL-termination; we know there's an + * alphabetic character at end[-1] + */ + if(tmp == end) { + cmd.first_line = atol(begin); + cmd.last_line = cmd.first_line; + break; + } + if(*tmp == ',') { + cmd.first_line = atol(begin); + cmd.last_line = atol(tmp + 1); + break; + } + ++tmp; + } + + // which command to execute on this line(s)? + switch (end[-1]) { + case MODE_CHANGED: + if (Debug == true) + std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl; + break; + case MODE_ADDED: + if (Debug == true) + std::clog << "Insert after line " << cmd.first_line << std::endl; + break; + case MODE_DELETED: + if (Debug == true) + std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl; + break; + default: + _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]); + free(commands); + return ED_PARSER; + } + cmd.type = end[-1]; + + /* Determine the size of the inserted text, so we don't have to scan this + * text again later. + */ + begin = end + 1; + end = begin; + cmd.data_lines = 0; + + if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) { + cmd.data_start = begin - (char*) ed_cmds.Data(); + while(end != ed_end) { + if(*end == '\n') { + if(end[-1] == '.' && end[-2] == '\n') + break; + ++cmd.data_lines; + } + ++end; + } + cmd.data_end = end - (char*) ed_cmds.Data() - 1; + begin = end + 1; + end = begin; + } + if(command_count == command_alloc) { + command_alloc = (command_alloc + 64) * 3 / 2; + commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand)); + } + commands[command_count++] = cmd; + } + + struct iovec* iov = new struct iovec[IOV_COUNT]; + size_t iov_size = 0; + + size_t amount, remaining; + size_t line = 1; + EdCommand* cmd; + + /* 2. Execute script. We gather writes in a `struct iov' array, and flush + * using writev to minimize the number of system calls. Data is read + * directly from the memory mappings of the input file and the script. + */ + + for(i = command_count; i-- > 0; ) { + cmd = &commands[i]; + if(cmd->type == MODE_ADDED) + amount = cmd->first_line + 1; + else + amount = cmd->first_line; + + if(line < amount) { + begin = input; + while(line != amount) { + input = (const char*) memchr(input, '\n', input_end - input); + if(!input) + break; + ++line; + ++input; + } + + iov[iov_size].iov_base = (void*) begin; + iov[iov_size].iov_len = input - begin; + hash->Add((const unsigned char*) begin, input - begin); + + if(++iov_size == IOV_COUNT) { + writev(out_file.Fd(), iov, IOV_COUNT); + iov_size = 0; + } + } + + if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) { + remaining = (cmd->last_line - cmd->first_line) + 1; + line += remaining; + while(remaining) { + input = (const char*) memchr(input, '\n', input_end - input); + if(!input) + break; + --remaining; + ++input; + } + } + + if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) { + if(cmd->data_end != cmd->data_start) { + iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start); + iov[iov_size].iov_len = cmd->data_end - cmd->data_start; + hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start), + iov[iov_size].iov_len); + + if(++iov_size == IOV_COUNT) { + writev(out_file.Fd(), iov, IOV_COUNT); + iov_size = 0; + } + } + } + } + + if(input != input_end) { + iov[iov_size].iov_base = (void*) input; + iov[iov_size].iov_len = input_end - input; + hash->Add((const unsigned char*) input, input_end - input); + ++iov_size; + } + + if(iov_size) { + writev(out_file.Fd(), iov, iov_size); + iov_size = 0; + } + + for(i = 0; i < iov_size; i += IOV_COUNT) { + if(iov_size - i < IOV_COUNT) + writev(out_file.Fd(), iov + i, iov_size - i); + else + writev(out_file.Fd(), iov + i, IOV_COUNT); + } + + delete [] iov; + free(commands); + + fflush(fTo); + + return ED_OK; +#else + return MMAP_FAILED; +#endif } /*}}}*/ bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/ @@ -234,19 +455,23 @@ bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/ return false; Hashes Hash; - FILE* fFrom = fdopen(From.Fd(), "r"); - FILE* fPatch = fdopen(Patch.Fd(), "r"); - FILE* fTo = fdopen(To.Fd(), "w"); // now do the actual patching - if (patchFile(fPatch, fFrom, fTo, &Hash) != ED_OK) { - _error->Errno("rred", _("Could not patch file")); - return false; + State const result = patchMMap(Patch, From, To, &Hash); + if (result == MMAP_FAILED) { + // retry with patchFile + lseek(Patch.Fd(), 0, SEEK_SET); + lseek(From.Fd(), 0, SEEK_SET); + To.Open(Itm->DestFile,FileFd::WriteEmpty); + if (_error->PendingError() == true) + return false; + if (patchFile(Patch, From, To, &Hash) != ED_OK) { + return _error->Errno("rred", _("Could not patch file %s"), Path.append(" (1)").c_str()); + } + } else if (result != ED_OK) { + return _error->Errno("rred", _("Could not patch file %s"), Path.append(" (2)").c_str()); } // write out the result - fflush(fFrom); - fflush(fPatch); - fflush(fTo); From.Close(); Patch.Close(); To.Close(); -- cgit v1.2.3 From d6c4a9764d052c9755ab934c97c7a84c48ebd618 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Mon, 9 Nov 2009 10:17:19 +0100 Subject: extent the mmap to be able to handle currently not implemented (but planed) growable mmaps --- apt-pkg/contrib/mmap.cc | 127 ++++++++++++++++++++++++++++++++---------------- apt-pkg/contrib/mmap.h | 10 ++-- 2 files changed, 92 insertions(+), 45 deletions(-) diff --git a/apt-pkg/contrib/mmap.cc b/apt-pkg/contrib/mmap.cc index 4d5fcf71e..f440f9489 100644 --- a/apt-pkg/contrib/mmap.cc +++ b/apt-pkg/contrib/mmap.cc @@ -140,8 +140,10 @@ bool MMap::Sync(unsigned long Start,unsigned long Stop) // DynamicMMap::DynamicMMap - Constructor /*{{{*/ // --------------------------------------------------------------------- /* */ -DynamicMMap::DynamicMMap(FileFd &F,unsigned long Flags,unsigned long WorkSpace) : - MMap(F,Flags | NoImmMap), Fd(&F), WorkSpace(WorkSpace) +DynamicMMap::DynamicMMap(FileFd &F,unsigned long Flags,unsigned long const &Workspace, + unsigned long const &Grow, unsigned long const &Limit) : + MMap(F,Flags | NoImmMap), Fd(&F), WorkSpace(Workspace), + GrowFactor(Grow), Limit(Limit) { if (_error->PendingError() == true) return; @@ -165,32 +167,48 @@ DynamicMMap::DynamicMMap(FileFd &F,unsigned long Flags,unsigned long WorkSpace) /* We try here to use mmap to reserve some space - this is much more cooler than the fallback solution to simply allocate a char array and could come in handy later than we are able to grow such an mmap */ -DynamicMMap::DynamicMMap(unsigned long Flags,unsigned long WorkSpace) : - MMap(Flags | NoImmMap | UnMapped), Fd(0), WorkSpace(WorkSpace) +DynamicMMap::DynamicMMap(unsigned long Flags,unsigned long const &WorkSpace, + unsigned long const &Grow, unsigned long const &Limit) : + MMap(Flags | NoImmMap | UnMapped), Fd(0), WorkSpace(WorkSpace), + GrowFactor(Grow), Limit(Limit) { - if (_error->PendingError() == true) - return; + if (_error->PendingError() == true) + return; + + // disable Moveable if we don't grow + if (Grow == 0) + Flags &= ~Moveable; + +#ifndef __linux__ + // kfreebsd doesn't have mremap, so we use the fallback + if ((Flags & Moveable) == Moveable) + Flags |= Fallback; +#endif #ifdef _POSIX_MAPPED_FILES - // Set the permissions. - int Prot = PROT_READ; - int Map = MAP_PRIVATE | MAP_ANONYMOUS; - if ((Flags & ReadOnly) != ReadOnly) - Prot |= PROT_WRITE; - if ((Flags & Public) == Public) - Map = MAP_SHARED | MAP_ANONYMOUS; + if ((Flags & Fallback) != Fallback) { + // Set the permissions. + int Prot = PROT_READ; + int Map = MAP_PRIVATE | MAP_ANONYMOUS; + if ((Flags & ReadOnly) != ReadOnly) + Prot |= PROT_WRITE; + if ((Flags & Public) == Public) + Map = MAP_SHARED | MAP_ANONYMOUS; - // use anonymous mmap() to get the memory - Base = (unsigned char*) mmap(0, WorkSpace, Prot, Map, -1, 0); + // use anonymous mmap() to get the memory + Base = (unsigned char*) mmap(0, WorkSpace, Prot, Map, -1, 0); - if(Base == MAP_FAILED) - _error->Errno("DynamicMMap",_("Couldn't make mmap of %lu bytes"),WorkSpace); -#else - // fallback to a static allocated space - Base = new unsigned char[WorkSpace]; - memset(Base,0,WorkSpace); + if(Base == MAP_FAILED) + _error->Errno("DynamicMMap",_("Couldn't make mmap of %lu bytes"),WorkSpace); + + iSize = 0; + return; + } #endif - iSize = 0; + // fallback to a static allocated space + Base = new unsigned char[WorkSpace]; + memset(Base,0,WorkSpace); + iSize = 0; } /*}}}*/ // DynamicMMap::~DynamicMMap - Destructor /*{{{*/ @@ -311,30 +329,55 @@ unsigned long DynamicMMap::WriteString(const char *String, /*}}}*/ // DynamicMMap::Grow - Grow the mmap /*{{{*/ // --------------------------------------------------------------------- -/* This method will try to grow the mmap we currently use. This doesn't - work most of the time because we can't move the mmap around in the - memory for now as this would require to adjust quite a lot of pointers - but why we should not at least try to grow it before we give up? */ -bool DynamicMMap::Grow() -{ -#if defined(_POSIX_MAPPED_FILES) && defined(__linux__) - unsigned long newSize = WorkSpace + 1024*1024; +/* This method is a wrapper around different methods to (try to) grow + a mmap (or our char[]-fallback). Encounterable environments: + 1. Moveable + !Fallback + linux -> mremap with MREMAP_MAYMOVE + 2. Moveable + !Fallback + !linux -> not possible (forbidden by constructor) + 3. Moveable + Fallback -> realloc + 4. !Moveable + !Fallback + linux -> mremap alone - which will fail in 99,9% + 5. !Moveable + !Fallback + !linux -> not possible (forbidden by constructor) + 6. !Moveable + Fallback -> not possible + [ While Moveable and Fallback stands for the equally named flags and + "linux" indicates a linux kernel instead of a freebsd kernel. ] + So what you can see here is, that a MMAP which want to be growable need + to be moveable to have a real chance but that this method will at least try + the nearly impossible 4 to grow it before it finally give up: Never say never. */ +bool DynamicMMap::Grow() { + if (Limit != 0 && WorkSpace >= Limit) + return _error->Error(_("The size of a MMap has already reached the defined limit of %lu bytes," + "abort the try to grow the MMap."), Limit); - if(Fd != 0) - { - Fd->Seek(newSize - 1); - char C = 0; - Fd->Write(&C,sizeof(C)); - } + unsigned long const newSize = WorkSpace + 1024*1024; - Base = mremap(Base, WorkSpace, newSize, 0); - if(Base == MAP_FAILED) - return false; + if(Fd != 0) { + Fd->Seek(newSize - 1); + char C = 0; + Fd->Write(&C,sizeof(C)); + } + if ((Flags & Fallback) != Fallback) { +#if defined(_POSIX_MAPPED_FILES) && defined(__linux__) + #ifdef MREMAP_MAYMOVE + if ((Flags & Moveable) == Moveable) + Base = mremap(Base, WorkSpace, newSize, MREMAP_MAYMOVE); + else + #endif + Base = mremap(Base, WorkSpace, newSize, 0); - WorkSpace = newSize; - return true; + if(Base == MAP_FAILED) + return false; #else - return false; + return false; #endif + } else { + if ((Flags & Moveable) != Moveable) + return false; + + Base = realloc(Base, newSize); + if (Base == NULL) + return false; + } + + WorkSpace = newSize; + return true; } /*}}}*/ diff --git a/apt-pkg/contrib/mmap.h b/apt-pkg/contrib/mmap.h index bde62217d..cd2b15ba2 100644 --- a/apt-pkg/contrib/mmap.h +++ b/apt-pkg/contrib/mmap.h @@ -50,7 +50,7 @@ class MMap public: enum OpenFlags {NoImmMap = (1<<0),Public = (1<<1),ReadOnly = (1<<2), - UnMapped = (1<<3)}; + UnMapped = (1<<3), Moveable = (1<<4), Fallback = (1 << 5)}; // Simple accessors inline operator void *() {return Base;}; @@ -82,6 +82,8 @@ class DynamicMMap : public MMap FileFd *Fd; unsigned long WorkSpace; + unsigned long const GrowFactor; + unsigned long const Limit; Pool *Pools; unsigned int PoolCount; @@ -96,8 +98,10 @@ class DynamicMMap : public MMap inline unsigned long WriteString(const string &S) {return WriteString(S.c_str(),S.length());}; void UsePools(Pool &P,unsigned int Count) {Pools = &P; PoolCount = Count;}; - DynamicMMap(FileFd &F,unsigned long Flags,unsigned long WorkSpace = 2*1024*1024); - DynamicMMap(unsigned long Flags,unsigned long WorkSpace = 2*1024*1024); + DynamicMMap(FileFd &F,unsigned long Flags,unsigned long const &WorkSpace = 2*1024*1024, + unsigned long const &Grow = 1024*1024, unsigned long const &Limit = 0); + DynamicMMap(unsigned long Flags,unsigned long const &WorkSpace = 2*1024*1024, + unsigned long const &Grow = 1024*1024, unsigned long const &Limit = 0); virtual ~DynamicMMap(); }; -- cgit v1.2.3 From cc0f9c8e1044335f167b6b87d71dc91dd2f3501b Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Thu, 19 Nov 2009 23:55:33 +0100 Subject: fix argument check for the rred method --- methods/rred.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/methods/rred.cc b/methods/rred.cc index 7236efd03..c2d8eb5cf 100644 --- a/methods/rred.cc +++ b/methods/rred.cc @@ -530,7 +530,7 @@ public: * and will write the result to "Testfile.result". */ int main(int argc, char *argv[]) { - if (argc == 0) { + if (argc <= 1) { RredMethod Mth; return Mth.Run(); } else { -- cgit v1.2.3 From baef9c95ed1b03e13ab6f61fc2e166169d7710a8 Mon Sep 17 00:00:00 2001 From: David Kalnischkies Date: Wed, 25 Nov 2009 16:42:10 +0100 Subject: cleanup code a bit more and expand error messages --- methods/rred.cc | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/methods/rred.cc b/methods/rred.cc index c2d8eb5cf..262c78cab 100644 --- a/methods/rred.cc +++ b/methods/rred.cc @@ -229,7 +229,6 @@ RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/ #ifdef _POSIX_MAPPED_FILES MMap ed_cmds(Patch, MMap::ReadOnly); MMap in_file(From, MMap::ReadOnly); - FILE* fTo = fdopen(out_file.Fd(), "w"); if (ed_cmds.Size() == 0 || in_file.Size() == 0) return MMAP_FAILED; @@ -420,8 +419,6 @@ RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/ delete [] iov; free(commands); - fflush(fTo); - return ED_OK; #else return MMAP_FAILED; @@ -465,10 +462,14 @@ bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/ if (_error->PendingError() == true) return false; if (patchFile(Patch, From, To, &Hash) != ED_OK) { - return _error->Errno("rred", _("Could not patch file %s"), Path.append(" (1)").c_str()); + return _error->WarningE("rred", _("Could not patch %s with mmap and with file operation usage - the patch seems to be corrupt."), Path.c_str()); + } else if (Debug == true) { + std::clog << "rred: finished file patching of " << Path << " after mmap failed." << std::endl; } } else if (result != ED_OK) { - return _error->Errno("rred", _("Could not patch file %s"), Path.append(" (2)").c_str()); + return _error->Errno("rred", _("Could not patch %s with mmap (but no mmap specific fail) - the patch seems to be corrupt."), Path.c_str()); + } else if (Debug == true) { + std::clog << "rred: finished mmap patching of " << Path << std::endl; } // write out the result @@ -491,12 +492,10 @@ bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/ return _error->Errno("stat",_("Failed to stat")); // return done - if (Itm->Uri.empty() == true) { - Res.LastModified = Buf.st_mtime; - Res.Size = Buf.st_size; - Res.TakeHashes(Hash); - URIDone(Res); - } + Res.LastModified = Buf.st_mtime; + Res.Size = Buf.st_size; + Res.TakeHashes(Hash); + URIDone(Res); return true; } -- cgit v1.2.3