summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDavid Kalnischkies <kalnischkies@gmail.com>2009-11-06 09:43:31 +0100
committerDavid Kalnischkies <kalnischkies@gmail.com>2009-11-06 09:43:31 +0100
commit1a37c619614a9260968f1703d7c6aae5a9a139f7 (patch)
tree7bfe9019da2ea0444f58d1000d1c94d5f4d18174
parent66093f029b4dd94e109c8cc7d9d4bfbc6f0f4e90 (diff)
Finally adope the patch from Morten Hustveit <morten@debian.org> to be
able to optional use mmaps and iovec to increase patch speed - but as this increase memory usage we can always fall back to the "old" method which doesn't relay on mmaps.
-rw-r--r--methods/rred.cc261
1 files changed, 243 insertions, 18 deletions
diff --git a/methods/rred.cc b/methods/rred.cc
index 9abb1b89c..7236efd03 100644
--- a/methods/rred.cc
+++ b/methods/rred.cc
@@ -1,11 +1,13 @@
// Includes /*{{{*/
#include <apt-pkg/fileutl.h>
+#include <apt-pkg/mmap.h>
#include <apt-pkg/error.h>
#include <apt-pkg/acquire-method.h>
#include <apt-pkg/strutl.h>
#include <apt-pkg/hashes.h>
#include <sys/stat.h>
+#include <sys/uio.h>
#include <unistd.h>
#include <utime.h>
#include <stdio.h>
@@ -29,7 +31,7 @@ class RredMethod : public pkgAcqMethod {
// the supported ed commands
enum Mode {MODE_CHANGED='c', MODE_DELETED='d', MODE_ADDED='a'};
// return values
- enum State {ED_OK=0, ED_ORDERING=1, ED_PARSER=2, ED_FAILURE=3};
+ enum State {ED_OK, ED_ORDERING, ED_PARSER, ED_FAILURE, MMAP_FAILED};
State applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_file,
unsigned long &line, char *buffer, Hashes *hash) const;
@@ -37,7 +39,8 @@ class RredMethod : public pkgAcqMethod {
void copyLinesFromFileToFile(FILE *fin, FILE *fout, unsigned int lines,
Hashes *hash, char *buffer) const;
- State patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, Hashes *hash) const;
+ State patchFile(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
+ State patchMMap(FileFd &Patch, FileFd &From, FileFd &out_file, Hashes *hash) const;
protected:
// the methods main method
@@ -67,7 +70,7 @@ RredMethod::State RredMethod::applyFile(FILE *ed_cmds, FILE *in_file, FILE *out_
// get the current command and parse it
if (fgets(buffer, BUF_SIZE, ed_cmds) == NULL) {
if (Debug == true)
- std::clog << "rred: encounter end of file - we can start patching now.";
+ std::clog << "rred: encounter end of file - we can start patching now." << std::endl;
line = 0;
return ED_OK;
}
@@ -189,24 +192,242 @@ void RredMethod::ignoreLineInFile(FILE *fin, char *buffer) const { /*{{{*/
}
}
/*}}}*/
-RredMethod::State RredMethod::patchFile(FILE *ed_cmds, FILE *in_file, FILE *out_file, /*{{{*/
- Hashes *hash) const {
+RredMethod::State RredMethod::patchFile(FileFd &Patch, FileFd &From, /*{{{*/
+ FileFd &out_file, Hashes *hash) const {
char buffer[BUF_SIZE];
-
+ FILE* fFrom = fdopen(From.Fd(), "r");
+ FILE* fPatch = fdopen(Patch.Fd(), "r");
+ FILE* fTo = fdopen(out_file.Fd(), "w");
+
/* we do a tail recursion to read the commands in the right order */
unsigned long line = -1; // assign highest possible value
- State result = applyFile(ed_cmds, in_file, out_file, line, buffer, hash);
+ State const result = applyFile(fPatch, fFrom, fTo, line, buffer, hash);
/* read the rest from infile */
if (result == ED_OK) {
- while (fgets(buffer, BUF_SIZE, in_file) != NULL) {
- size_t const written = fwrite(buffer, 1, strlen(buffer), out_file);
+ while (fgets(buffer, BUF_SIZE, fFrom) != NULL) {
+ size_t const written = fwrite(buffer, 1, strlen(buffer), fTo);
hash->Add((unsigned char*)buffer, written);
}
+ fflush(fTo);
}
return result;
}
/*}}}*/
+struct EdCommand { /*{{{*/
+ size_t data_start;
+ size_t data_end;
+ size_t data_lines;
+ size_t first_line;
+ size_t last_line;
+ char type;
+};
+#define IOV_COUNT 1024 /* Don't really want IOV_MAX since it can be arbitrarily large */
+ /*}}}*/
+RredMethod::State RredMethod::patchMMap(FileFd &Patch, FileFd &From, /*{{{*/
+ FileFd &out_file, Hashes *hash) const {
+#ifdef _POSIX_MAPPED_FILES
+ MMap ed_cmds(Patch, MMap::ReadOnly);
+ MMap in_file(From, MMap::ReadOnly);
+ FILE* fTo = fdopen(out_file.Fd(), "w");
+
+ if (ed_cmds.Size() == 0 || in_file.Size() == 0)
+ return MMAP_FAILED;
+
+ EdCommand* commands = 0;
+ size_t command_count = 0;
+ size_t command_alloc = 0;
+
+ const char* begin = (char*) ed_cmds.Data();
+ const char* end = begin;
+ const char* ed_end = (char*) ed_cmds.Data() + ed_cmds.Size();
+
+ const char* input = (char*) in_file.Data();
+ const char* input_end = (char*) in_file.Data() + in_file.Size();
+
+ size_t i;
+
+ /* 1. Parse entire script. It is executed in reverse order, so we cather it
+ * in the `commands' buffer first
+ */
+
+ for(;;) {
+ EdCommand cmd;
+ cmd.data_start = 0;
+ cmd.data_end = 0;
+
+ while(begin != ed_end && *begin == '\n')
+ ++begin;
+ while(end != ed_end && *end != '\n')
+ ++end;
+ if(end == ed_end && begin == end)
+ break;
+
+ /* Determine command range */
+ const char* tmp = begin;
+
+ for(;;) {
+ /* atoll is safe despite lacking NUL-termination; we know there's an
+ * alphabetic character at end[-1]
+ */
+ if(tmp == end) {
+ cmd.first_line = atol(begin);
+ cmd.last_line = cmd.first_line;
+ break;
+ }
+ if(*tmp == ',') {
+ cmd.first_line = atol(begin);
+ cmd.last_line = atol(tmp + 1);
+ break;
+ }
+ ++tmp;
+ }
+
+ // which command to execute on this line(s)?
+ switch (end[-1]) {
+ case MODE_CHANGED:
+ if (Debug == true)
+ std::clog << "Change from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
+ break;
+ case MODE_ADDED:
+ if (Debug == true)
+ std::clog << "Insert after line " << cmd.first_line << std::endl;
+ break;
+ case MODE_DELETED:
+ if (Debug == true)
+ std::clog << "Delete from line " << cmd.first_line << " to " << cmd.last_line << std::endl;
+ break;
+ default:
+ _error->Error("rred: Unknown ed command '%c'. Abort.", end[-1]);
+ free(commands);
+ return ED_PARSER;
+ }
+ cmd.type = end[-1];
+
+ /* Determine the size of the inserted text, so we don't have to scan this
+ * text again later.
+ */
+ begin = end + 1;
+ end = begin;
+ cmd.data_lines = 0;
+
+ if(cmd.type == MODE_ADDED || cmd.type == MODE_CHANGED) {
+ cmd.data_start = begin - (char*) ed_cmds.Data();
+ while(end != ed_end) {
+ if(*end == '\n') {
+ if(end[-1] == '.' && end[-2] == '\n')
+ break;
+ ++cmd.data_lines;
+ }
+ ++end;
+ }
+ cmd.data_end = end - (char*) ed_cmds.Data() - 1;
+ begin = end + 1;
+ end = begin;
+ }
+ if(command_count == command_alloc) {
+ command_alloc = (command_alloc + 64) * 3 / 2;
+ commands = (EdCommand*) realloc(commands, command_alloc * sizeof(EdCommand));
+ }
+ commands[command_count++] = cmd;
+ }
+
+ struct iovec* iov = new struct iovec[IOV_COUNT];
+ size_t iov_size = 0;
+
+ size_t amount, remaining;
+ size_t line = 1;
+ EdCommand* cmd;
+
+ /* 2. Execute script. We gather writes in a `struct iov' array, and flush
+ * using writev to minimize the number of system calls. Data is read
+ * directly from the memory mappings of the input file and the script.
+ */
+
+ for(i = command_count; i-- > 0; ) {
+ cmd = &commands[i];
+ if(cmd->type == MODE_ADDED)
+ amount = cmd->first_line + 1;
+ else
+ amount = cmd->first_line;
+
+ if(line < amount) {
+ begin = input;
+ while(line != amount) {
+ input = (const char*) memchr(input, '\n', input_end - input);
+ if(!input)
+ break;
+ ++line;
+ ++input;
+ }
+
+ iov[iov_size].iov_base = (void*) begin;
+ iov[iov_size].iov_len = input - begin;
+ hash->Add((const unsigned char*) begin, input - begin);
+
+ if(++iov_size == IOV_COUNT) {
+ writev(out_file.Fd(), iov, IOV_COUNT);
+ iov_size = 0;
+ }
+ }
+
+ if(cmd->type == MODE_DELETED || cmd->type == MODE_CHANGED) {
+ remaining = (cmd->last_line - cmd->first_line) + 1;
+ line += remaining;
+ while(remaining) {
+ input = (const char*) memchr(input, '\n', input_end - input);
+ if(!input)
+ break;
+ --remaining;
+ ++input;
+ }
+ }
+
+ if(cmd->type == MODE_CHANGED || cmd->type == MODE_ADDED) {
+ if(cmd->data_end != cmd->data_start) {
+ iov[iov_size].iov_base = (void*) ((char*)ed_cmds.Data() + cmd->data_start);
+ iov[iov_size].iov_len = cmd->data_end - cmd->data_start;
+ hash->Add((const unsigned char*) ((char*)ed_cmds.Data() + cmd->data_start),
+ iov[iov_size].iov_len);
+
+ if(++iov_size == IOV_COUNT) {
+ writev(out_file.Fd(), iov, IOV_COUNT);
+ iov_size = 0;
+ }
+ }
+ }
+ }
+
+ if(input != input_end) {
+ iov[iov_size].iov_base = (void*) input;
+ iov[iov_size].iov_len = input_end - input;
+ hash->Add((const unsigned char*) input, input_end - input);
+ ++iov_size;
+ }
+
+ if(iov_size) {
+ writev(out_file.Fd(), iov, iov_size);
+ iov_size = 0;
+ }
+
+ for(i = 0; i < iov_size; i += IOV_COUNT) {
+ if(iov_size - i < IOV_COUNT)
+ writev(out_file.Fd(), iov + i, iov_size - i);
+ else
+ writev(out_file.Fd(), iov + i, IOV_COUNT);
+ }
+
+ delete [] iov;
+ free(commands);
+
+ fflush(fTo);
+
+ return ED_OK;
+#else
+ return MMAP_FAILED;
+#endif
+}
+ /*}}}*/
bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
{
Debug = _config->FindB("Debug::pkgAcquire::RRed", false);
@@ -234,19 +455,23 @@ bool RredMethod::Fetch(FetchItem *Itm) /*{{{*/
return false;
Hashes Hash;
- FILE* fFrom = fdopen(From.Fd(), "r");
- FILE* fPatch = fdopen(Patch.Fd(), "r");
- FILE* fTo = fdopen(To.Fd(), "w");
// now do the actual patching
- if (patchFile(fPatch, fFrom, fTo, &Hash) != ED_OK) {
- _error->Errno("rred", _("Could not patch file"));
- return false;
+ State const result = patchMMap(Patch, From, To, &Hash);
+ if (result == MMAP_FAILED) {
+ // retry with patchFile
+ lseek(Patch.Fd(), 0, SEEK_SET);
+ lseek(From.Fd(), 0, SEEK_SET);
+ To.Open(Itm->DestFile,FileFd::WriteEmpty);
+ if (_error->PendingError() == true)
+ return false;
+ if (patchFile(Patch, From, To, &Hash) != ED_OK) {
+ return _error->Errno("rred", _("Could not patch file %s"), Path.append(" (1)").c_str());
+ }
+ } else if (result != ED_OK) {
+ return _error->Errno("rred", _("Could not patch file %s"), Path.append(" (2)").c_str());
}
// write out the result
- fflush(fFrom);
- fflush(fPatch);
- fflush(fTo);
From.Close();
Patch.Close();
To.Close();