summaryrefslogtreecommitdiff
path: root/apt-pkg/acquire-item.cc
diff options
context:
space:
mode:
authorDavid Kalnischkies <david@kalnischkies.de>2013-12-06 12:17:48 +0100
committerDavid Kalnischkies <david@kalnischkies.de>2013-12-13 11:59:49 +0100
commit47d2bc78adb49f3182f9a3d7a4baea363e772d64 (patch)
tree0536df9928330c35d98a8c2a41fb24405cae9ba1 /apt-pkg/acquire-item.cc
parent9d39208af5c8c72d3886c70d603921cf427056ee (diff)
implement POC client-side merging of pdiffs via apt-file
The idea of pdiffs is to avoid downloading the hole file by patching the existing index. This works very well, but becomes slow if a lot of patches needs to be applied to reconstruct an up-to-date index and in recent years more and more dinstall (or similar) runs are executed creating more and more pdiffs in the same amount of time, so pdiffs became less useful. The solution is simple: Reduce the amount of patches (which are very small) which need to be applied on top of the index we have available (which is usually pretty big). This can be done in two ways: Either merge the patches on the server-side so that the client has to download only one patch or the patches are all downloaded and merged on the client-side. The first needs a client who is doing one step at a time who can also skip patches if it needs (APT supports this for a long time now). The later is implemented by this commit, but depends on the server NOT merging the patches and the patches being in a strict order in which no patch is skipped. This is traditionally the case for dak, but other repository creators support merging – e.g. reprepro (which helpfully adds a flag indicating that the patches are merged). To support both or even mixes a client needs more information which isn't available for now. This POC uses the external diffindex-rred included in apt-file to do the heavy lifting of merging & applying all patches in one pass, hence to test this feature apt-file needs to be installed.
Diffstat (limited to 'apt-pkg/acquire-item.cc')
-rw-r--r--apt-pkg/acquire-item.cc151
1 files changed, 148 insertions, 3 deletions
diff --git a/apt-pkg/acquire-item.cc b/apt-pkg/acquire-item.cc
index 009531c2e..b5b9577ef 100644
--- a/apt-pkg/acquire-item.cc
+++ b/apt-pkg/acquire-item.cc
@@ -498,14 +498,42 @@ bool pkgAcqDiffIndex::ParseDiffIndex(string IndexDiffFile) /*{{{*/
}
// we have something, queue the next diff
- if(found)
+ if(found)
{
// queue the diffs
string::size_type const last_space = Description.rfind(" ");
if(last_space != string::npos)
Description.erase(last_space, Description.size()-last_space);
- new pkgAcqIndexDiffs(Owner, RealURI, Description, Desc.ShortDesc,
- ExpectedHash, ServerSha1, available_patches);
+
+ /* decide if we should download patches one by one or in one go:
+ The first is good if the server merges patches, but many don't so client
+ based merging can be attempt in which case the second is better.
+ "bad things" will happen if patches are merged on the server,
+ but client side merging is attempt as well */
+ bool pdiff_merge = _config->FindB("Acquire::PDiffs::Merge", true);
+ if (pdiff_merge == true)
+ {
+ // this perl script is provided by apt-file
+ pdiff_merge = FileExists(_config->FindFile("Dir::Bin::rred", "/usr/bin/diffindex-rred"));
+ if (pdiff_merge == true)
+ {
+ // reprepro adds this flag if it has merged patches on the server
+ std::string const precedence = Tags.FindS("X-Patch-Precedence");
+ pdiff_merge = (precedence != "merged");
+ }
+ }
+
+ if (pdiff_merge == false)
+ new pkgAcqIndexDiffs(Owner, RealURI, Description, Desc.ShortDesc,
+ ExpectedHash, ServerSha1, available_patches);
+ else
+ {
+ std::vector<pkgAcqIndexMergeDiffs*> *diffs = new std::vector<pkgAcqIndexMergeDiffs*>(available_patches.size());
+ for(size_t i = 0; i < available_patches.size(); ++i)
+ (*diffs)[i] = new pkgAcqIndexMergeDiffs(Owner, RealURI, Description, Desc.ShortDesc, ExpectedHash,
+ available_patches[i], diffs);
+ }
+
Complete = false;
Status = StatDone;
Dequeue();
@@ -754,6 +782,123 @@ void pkgAcqIndexDiffs::Done(string Message,unsigned long long Size,string Md5Has
}
}
/*}}}*/
+// AcqIndexMergeDiffs::AcqIndexMergeDiffs - Constructor /*{{{*/
+pkgAcqIndexMergeDiffs::pkgAcqIndexMergeDiffs(pkgAcquire *Owner,
+ string const &URI, string const &URIDesc,
+ string const &ShortDesc, HashString const &ExpectedHash,
+ DiffInfo const &patch,
+ std::vector<pkgAcqIndexMergeDiffs*> const * const allPatches)
+ : Item(Owner), RealURI(URI), ExpectedHash(ExpectedHash),
+ patch(patch),allPatches(allPatches), State(StateFetchDiff)
+{
+
+ DestFile = _config->FindDir("Dir::State::lists") + "partial/";
+ DestFile += URItoFileName(URI);
+
+ Debug = _config->FindB("Debug::pkgAcquire::Diffs",false);
+
+ Description = URIDesc;
+ Desc.Owner = this;
+ Desc.ShortDesc = ShortDesc;
+
+ Desc.URI = string(RealURI) + ".diff/" + patch.file + ".gz";
+ Desc.Description = Description + " " + patch.file + string(".pdiff");
+ DestFile = _config->FindDir("Dir::State::lists") + "partial/";
+ DestFile += URItoFileName(RealURI + ".diff/" + patch.file);
+
+ if(Debug)
+ std::clog << "pkgAcqIndexMergeDiffs: " << Desc.URI << std::endl;
+
+ QueueURI(Desc);
+}
+ /*}}}*/
+void pkgAcqIndexMergeDiffs::Failed(string Message,pkgAcquire::MethodConfig *Cnf)/*{{{*/
+{
+ if(Debug)
+ std::clog << "pkgAcqIndexMergeDiffs failed: " << Desc.URI << " with " << Message << std::endl;
+ Complete = false;
+ Status = StatDone;
+ Dequeue();
+
+ // check if we are the first to fail, otherwise we are done here
+ State = StateDoneDiff;
+ for (std::vector<pkgAcqIndexMergeDiffs *>::const_iterator I = allPatches->begin();
+ I != allPatches->end(); ++I)
+ if ((*I)->State == StateErrorDiff)
+ return;
+
+ // first failure means we should fallback
+ State = StateErrorDiff;
+ std::clog << "Falling back to normal index file aquire" << std::endl;
+ new pkgAcqIndex(Owner, RealURI, Description,Desc.ShortDesc,
+ ExpectedHash);
+}
+ /*}}}*/
+void pkgAcqIndexMergeDiffs::Done(string Message,unsigned long long Size,string Md5Hash, /*{{{*/
+ pkgAcquire::MethodConfig *Cnf)
+{
+ if(Debug)
+ std::clog << "pkgAcqIndexMergeDiffs::Done(): " << Desc.URI << std::endl;
+
+ Item::Done(Message,Size,Md5Hash,Cnf);
+
+ string const FinalFile = _config->FindDir("Dir::State::lists") + URItoFileName(RealURI);
+
+ if (State == StateFetchDiff)
+ {
+ // rred expects the patch as $FinalFile.ed.$patchname.gz
+ Rename(DestFile, FinalFile + ".ed." + patch.file + ".gz");
+
+ // check if this is the last completed diff
+ State = StateDoneDiff;
+ for (std::vector<pkgAcqIndexMergeDiffs *>::const_iterator I = allPatches->begin();
+ I != allPatches->end(); ++I)
+ if ((*I)->State != StateDoneDiff)
+ {
+ if(Debug)
+ std::clog << "Not the last done diff in the batch: " << Desc.URI << std::endl;
+ return;
+ }
+
+ // this is the last completed diff, so we are ready to apply now
+ State = StateApplyDiff;
+
+ if(Debug)
+ std::clog << "Sending to rred method: " << FinalFile << std::endl;
+
+ Local = true;
+ Desc.URI = "rred:" + FinalFile;
+ QueueURI(Desc);
+ Mode = "rred";
+ return;
+ }
+ // success in download/apply all diffs, clean up
+ else if (State == StateApplyDiff)
+ {
+ // see if we really got the expected file
+ if(!ExpectedHash.empty() && !ExpectedHash.VerifyFile(DestFile))
+ {
+ RenameOnError(HashSumMismatch);
+ return;
+ }
+
+ // move the result into place
+ if(Debug)
+ std::clog << "Moving patched file in place: " << std::endl
+ << DestFile << " -> " << FinalFile << std::endl;
+ Rename(DestFile, FinalFile);
+ chmod(FinalFile.c_str(), 0644);
+
+ // otherwise lists cleanup will eat the file
+ DestFile = FinalFile;
+
+ // all set and done
+ Complete = true;
+ if(Debug)
+ std::clog << "allDone: " << DestFile << "\n" << std::endl;
+ }
+}
+ /*}}}*/
// AcqIndex::AcqIndex - Constructor /*{{{*/
// ---------------------------------------------------------------------
/* The package file is added to the queue and a second class is