diff options
author | David Kalnischkies <david@kalnischkies.de> | 2014-05-10 11:24:44 +0200 |
---|---|---|
committer | David Kalnischkies <david@kalnischkies.de> | 2014-05-10 11:24:44 +0200 |
commit | 8710a36a01c0cb1648926792c2ad05185535558e (patch) | |
tree | 0f4bc04b87ae5926d9f855f7268ee84802232749 /apt-pkg/tagfile.h | |
parent | ffe3c68e494efc1c2c4d748fa9d69e150867e5c2 (diff) |
improve pkgTagSection scanning and parsing
Removes the 256 fields limit, deals consistently with spaces littered
all over the place and is even a tiny bit faster than before.
Even comes with a bunch of new tests to validate these claims.
Diffstat (limited to 'apt-pkg/tagfile.h')
-rw-r--r-- | apt-pkg/tagfile.h | 63 |
1 files changed, 42 insertions, 21 deletions
diff --git a/apt-pkg/tagfile.h b/apt-pkg/tagfile.h index d1a24ba45..b0cfab759 100644 --- a/apt-pkg/tagfile.h +++ b/apt-pkg/tagfile.h @@ -25,6 +25,8 @@ #include <stdio.h> #include <string> +#include <vector> +#include <list> #ifndef APT_8_CLEANER_HEADERS #include <apt-pkg/fileutl.h> @@ -35,23 +37,20 @@ class FileFd; class pkgTagSection { const char *Section; - // We have a limit of 256 tags per section. - unsigned int Indexes[256]; - unsigned int AlphaIndexes[0x100]; - unsigned int TagCount; + struct TagData { + unsigned int StartTag; + unsigned int EndTag; + unsigned int StartValue; + unsigned int NextInBucket; + + TagData(unsigned int const StartTag) : StartTag(StartTag), NextInBucket(0) {} + }; + std::vector<TagData> Tags; + unsigned int LookupTable[0x100]; + // dpointer placeholder (for later in case we need it) void *d; - /* This very simple hash function for the last 8 letters gives - very good performance on the debian package files */ - inline static unsigned long AlphaHash(const char *Text, const char *End = 0) - { - unsigned long Res = 0; - for (; Text != End && *Text != ':' && *Text != 0; Text++) - Res = ((unsigned long)(*Text) & 0xDF) ^ (Res << 1); - return Res & 0xFF; - } - protected: const char *Stop; @@ -69,17 +68,39 @@ class pkgTagSection unsigned long Flag) const; bool static FindFlag(unsigned long &Flags, unsigned long Flag, const char* Start, const char* Stop); - bool Scan(const char *Start,unsigned long MaxLength); + + /** \brief searches the boundaries of the current section + * + * While parameter Start marks the beginning of the section, this method + * will search for the first double newline in the data stream which marks + * the end of the section. It also does a first pass over the content of + * the section parsing it as encountered for processing later on by Find + * + * @param Start is the beginning of the section + * @param MaxLength is the size of valid data in the stream pointed to by Start + * @param Restart if enabled internal state will be cleared, otherwise it is + * assumed that now more data is available in the stream and the parsing will + * start were it encountered insufficent data the last time. + * + * @return \b true if section end was found, \b false otherwise. + * Beware that internal state will be inconsistent if \b false is returned! + */ + APT_MUSTCHECK bool Scan(const char *Start, unsigned long MaxLength, bool const Restart = true); inline unsigned long size() const {return Stop - Section;}; void Trim(); virtual void TrimRecord(bool BeforeRecord, const char* &End); - - inline unsigned int Count() const {return TagCount;}; - bool Exists(const char* const Tag); - + + /** \brief amount of Tags in the current section + * + * Note: if a Tag is mentioned repeatly it will be counted multiple + * times, but only the last occurance is available via Find methods. + */ + unsigned int Count() const; + bool Exists(const char* const Tag) const; + inline void Get(const char *&Start,const char *&Stop,unsigned int I) const - {Start = Section + Indexes[I]; Stop = Section + Indexes[I+1];} - + {Start = Section + Tags[I].StartTag; Stop = Section + Tags[I+1].StartTag;} + inline void GetSection(const char *&Start,const char *&Stop) const { Start = Section; |