summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Andres Klode <julian.klode@canonical.com>2020-01-08 11:03:28 +0100
committerJulian Andres Klode <julian.klode@canonical.com>2020-01-08 11:13:27 +0100
commit6902792898a9fcc3bdff605e2097e6a5cd2d6bbc (patch)
tree6206b42552cdabdf0c373a921edd46cef2e3dbc7
parentd3636f2666b77eb17b261300cb91eb912e2789c6 (diff)
Avoid extra out-of-cache hash table deduplication for package names
We were de-duplicating package name strings in StoreString, but also deduplicating most of them by them being in groups, so we had extra hash table lookups that could be avoided in NewGroup(). To continue deduplicating names across binary packages and source packages, insert groups for source packages as well. This is also a good first step in allowing efficient lookup of packages by source package - we can extend Group later by a list of SourceVersion objects, or alternatively, simply add a by-source chain into pkgCache::Version. This change improves performance by about 10% (913 to 814 ms), while having no significant overhead on the cache size: --- before +++ after @@ -1,7 +1,7 @@ -Total package names: 109536 (2.191 k) -Total package structures: 118689 (4.748 k) +Total package names: 119642 (2.393 k) +Total package structures: 118687 (4.747 k) Normal packages: 83309 - Pure virtual packages: 3365 + Pure virtual packages: 3363 Single virtual packages: 17811 Mixed virtual packages: 1973 Missing: 12231 @@ -10,21 +10,21 @@ Total distinct descriptions: 149291 (3.583 k) Total dependencies: 484135/156650 (12,2 M) Total ver/file relations: 57421 (1.378 k) Total Desc/File relations: 18219 (437 k) -Total Provides mappings: 29963 (719 k) +Total Provides mappings: 29959 (719 k) Total globbed strings: 226993 (5.332 k) Total slack space: 26,8 k -Total space accounted for: 38,1 M +Total space accounted for: 38,3 M Total buckets in PkgHashTable: 50503 - Unused: 5727 - Used: 44776 - Utilization: 88.6601% - Average entries: 2.65073 + Unused: 5728 + Used: 44775 + Utilization: 88.6581% + Average entries: 2.65074 Longest: 60 Shortest: 1 Total buckets in GrpHashTable: 50503 - Unused: 5727 - Used: 44776 - Utilization: 88.6601% - Average entries: 2.44631 - Longest: 10 + Unused: 4649 + Used: 45854 + Utilization: 90.7946% + Average entries: 2.60919 + Longest: 11 Shortest: 1
-rw-r--r--apt-pkg/deb/deblistparser.cc7
-rw-r--r--apt-pkg/pkgcachegen.cc3
-rw-r--r--apt-pkg/pkgcachegen.h5
3 files changed, 8 insertions, 7 deletions
diff --git a/apt-pkg/deb/deblistparser.cc b/apt-pkg/deb/deblistparser.cc
index 75fc2d242..88b41ad30 100644
--- a/apt-pkg/deb/deblistparser.cc
+++ b/apt-pkg/deb/deblistparser.cc
@@ -211,9 +211,12 @@ bool debListParser::NewVersion(pkgCache::VerIterator &Ver)
}
if (V.end() == true)
{
- map_stringitem_t const idx = StoreString(pkgCacheGenerator::PKGNAME, pkgname);
+ pkgCache::GrpIterator SG;
+ if (not NewGroup(SG, pkgname))
+ return false;
+
G = Ver.ParentPkg().Group();
- Ver->SourcePkgName = idx;
+ Ver->SourcePkgName = SG->Name;
}
}
}
diff --git a/apt-pkg/pkgcachegen.cc b/apt-pkg/pkgcachegen.cc
index 030f4df0a..183750acb 100644
--- a/apt-pkg/pkgcachegen.cc
+++ b/apt-pkg/pkgcachegen.cc
@@ -559,7 +559,7 @@ bool pkgCacheGenerator::NewGroup(pkgCache::GrpIterator &Grp, StringView Name)
return false;
Grp = pkgCache::GrpIterator(Cache, Cache.GrpP + Group);
- map_stringitem_t const idxName = StoreString(PKGNAME, Name);
+ map_stringitem_t const idxName = WriteStringInMap(Name);
if (unlikely(idxName == 0))
return false;
Grp->Name = idxName;
@@ -1336,7 +1336,6 @@ map_stringitem_t pkgCacheGenerator::StoreString(enum StringType const type, cons
auto strings = &strMixed;
switch(type) {
case MIXED: strings = &strMixed; break;
- case PKGNAME: strings = &strPkgNames; break;
case VERSIONNUMBER: strings = &strVersions; break;
case SECTION: strings = &strSections; break;
default: _error->Fatal("Unknown enum type used for string storage of '%.*s'", Size, S); return 0;
diff --git a/apt-pkg/pkgcachegen.h b/apt-pkg/pkgcachegen.h
index 1f639c526..2db2237da 100644
--- a/apt-pkg/pkgcachegen.h
+++ b/apt-pkg/pkgcachegen.h
@@ -70,7 +70,6 @@ class APT_HIDDEN pkgCacheGenerator /*{{{*/
};
std::unordered_set<string_pointer, hash> strMixed;
- std::unordered_set<string_pointer, hash> strPkgNames;
std::unordered_set<string_pointer, hash> strVersions;
std::unordered_set<string_pointer, hash> strSections;
#endif
@@ -124,7 +123,7 @@ class APT_HIDDEN pkgCacheGenerator /*{{{*/
public:
- enum StringType { MIXED, PKGNAME, VERSIONNUMBER, SECTION };
+ enum StringType { MIXED, VERSIONNUMBER, SECTION };
map_stringitem_t StoreString(StringType const type, const char * S, unsigned int const Size);
inline map_stringitem_t StoreString(enum StringType const type, APT::StringView S) {return StoreString(type, S.data(),S.length());};
@@ -179,7 +178,7 @@ class APT_HIDDEN pkgCacheListParser
void * const d;
protected:
-
+ inline bool NewGroup(pkgCache::GrpIterator &Grp, APT::StringView Name) { return Owner->NewGroup(Grp, Name); }
inline map_stringitem_t StoreString(pkgCacheGenerator::StringType const type, const char *S,unsigned int Size) {return Owner->StoreString(type, S, Size);};
inline map_stringitem_t StoreString(pkgCacheGenerator::StringType const type, APT::StringView S) {return Owner->StoreString(type, S);};
inline map_stringitem_t WriteString(APT::StringView S) {return Owner->WriteStringInMap(S.data(), S.size());};