From f741491272812ce20764eef0bcb1917beda3f309 Mon Sep 17 00:00:00 2001 From: Julian Andres Klode Date: Mon, 11 Mar 2019 10:46:40 +0100 Subject: Use system-provided triehash --- CMakeLists.txt | 2 + apt-pkg/CMakeLists.txt | 2 +- debian/control | 1 + triehash/.travis.yml | 14 - triehash/LICENSE.md | 17 - triehash/README.md | 58 --- triehash/tests/framework.sh | 84 ---- triehash/tests/run-tests.sh | 22 - triehash/tests/test-basic | 245 ---------- triehash/tests/test-case-insensitive | 109 ----- triehash/tests/test-enum-include-name-flags | 129 ------ triehash/tests/test-multi-byte-level | 427 ----------------- triehash/triehash.pl | 691 ---------------------------- 13 files changed, 4 insertions(+), 1797 deletions(-) delete mode 100644 triehash/.travis.yml delete mode 100644 triehash/LICENSE.md delete mode 100644 triehash/README.md delete mode 100644 triehash/tests/framework.sh delete mode 100755 triehash/tests/run-tests.sh delete mode 100755 triehash/tests/test-basic delete mode 100755 triehash/tests/test-case-insensitive delete mode 100755 triehash/tests/test-enum-include-name-flags delete mode 100755 triehash/tests/test-multi-byte-level delete mode 100755 triehash/triehash.pl diff --git a/CMakeLists.txt b/CMakeLists.txt index c65f99ccd..eed22e1a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,6 +38,8 @@ find_package(Iconv REQUIRED) find_package(Perl REQUIRED) +find_program(TRIEHASH_EXECUTABLE NAMES triehash) + if(USE_NLS) find_package(Intl REQUIRED) link_libraries(${Intl_LIBRARIES}) diff --git a/apt-pkg/CMakeLists.txt b/apt-pkg/CMakeLists.txt index 64709ce34..256747935 100644 --- a/apt-pkg/CMakeLists.txt +++ b/apt-pkg/CMakeLists.txt @@ -4,7 +4,7 @@ include_directories(${PROJECT_BINARY_DIR}/include/apt-pkg) add_definitions("-DAPT_PKG_EXPOSE_STRING_VIEW") file(MAKE_DIRECTORY ${PROJECT_BINARY_DIR}/include/apt-pkg/) -execute_process(COMMAND ${PERL_EXECUTABLE} ${PROJECT_SOURCE_DIR}/triehash/triehash.pl +execute_process(COMMAND ${TRIEHASH_EXECUTABLE} --ignore-case --header ${PROJECT_BINARY_DIR}/include/apt-pkg/tagfile-keys.h --code ${CMAKE_CURRENT_BINARY_DIR}/tagfile-keys.cc diff --git a/debian/control b/debian/control index 4a5aed33c..1362bd984 100644 --- a/debian/control +++ b/debian/control @@ -26,6 +26,7 @@ Build-Depends: cmake (>= 3.4), ninja-build, pkg-config, po4a (>= 0.34-2), + triehash, xsltproc, zlib1g-dev Build-Depends-Indep: doxygen, graphviz, w3m diff --git a/triehash/.travis.yml b/triehash/.travis.yml deleted file mode 100644 index c66ea9697..000000000 --- a/triehash/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -language: perl -perl: - - "5.24" - - "5.22" - - "5.20" - - "5.18" - - "5.16" - - "5.14" -install: - - cpanm --quiet --notest --skip-satisfied Devel::Cover Devel::Cover::Report::Codecov -script: - - ./tests/run-tests.sh -after_script: - - cover -report codecov diff --git a/triehash/LICENSE.md b/triehash/LICENSE.md deleted file mode 100644 index 89de35479..000000000 --- a/triehash/LICENSE.md +++ /dev/null @@ -1,17 +0,0 @@ -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in -all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -THE SOFTWARE. diff --git a/triehash/README.md b/triehash/README.md deleted file mode 100644 index d6f6e7fc3..000000000 --- a/triehash/README.md +++ /dev/null @@ -1,58 +0,0 @@ -# Order-preserving minimal perfect hash function generator - -Build order-preserving minimal perfect hash functions. - -[![codecov](https://codecov.io/gh/julian-klode/triehash/branch/master/graph/badge.svg)](https://codecov.io/gh/julian-klode/triehash) -[![Build Status](https://travis-ci.org/julian-klode/triehash.svg?branch=master)](https://travis-ci.org/julian-klode/triehash) - -## Performance - -Performance was evaluated against other hash functions. As an input set, the -fields of Debian Packages and Sources files was used, and each hash function -was run 1,000,000 times for each word. The byte count of the words were then -summed up and divided by the total number of nanoseconds each function ran, so -all speeds below are given in bytes per nanosecond, AKA gigabyte per second. - -arch/function|jak-x230 (amd64)|backup (amd64)|asachi.d.o (arm64)|asachi.d.o (armel)|asachi.d.o (armhf)|plummer.d.o (ppc64el)|eller.d.o (mipsel) --------------|----------------|--------------|------------------|------------------|------------------|---------------------|------------------ -Trie | 2.4 | 1.9 | 1.2 | 0.9 | 0.8 | 2.0 | 0.2 -Trie (*) | 2.2 | 1.7 | 0.8 | 0.7 | 0.7 | 1.8 | 0.2 -re2c | 1.7 | 1.3 | 0.9 | 0.9 | 0.7 | 1.6 | 0.2 -re2c (*) | 1.2 | 0.9 | 0.6 | 0.6 | 0.5 | 1.1 | 0.1 -gperf (*) | 0.7 | 0.5 | 0.2 | 0.2 | 0.2 | 0.5 | 0.1 -gperf | 1.3 | 0.9 | 0.3 | 0.3 | 0.2 | 0.4 | 0.1 -djb (*) | 0.7 | 0.5 | 0.3 | 0.3 | 0.3 | 0.5 | 0.1 -djb (**) | 1.0 | 0.7 | 0.4 | 0.5 | 0.5 | 0.6 | 0.2 -djb | 0.9 | 0.7 | 0.5 | 0.5 | 0.5 | 0.7 | 0.2 -apt (*) | 1.2 | 0.9 | 0.7 | 0.7 | 0.7 | 1.1 | 0.2 -apt (**) | 2.3 | 1.7 | 0.7 | 0.9 | 0.8 | 1.9 | 0.2 - -And transposed: - -function/arch |Trie |Trie (*) |re2c |re2c (*) |gperf (*)|gperf |djb (*) |djb (**) |djb |apt (*) |apt (**) ----------------------|---------|---------|---------|---------|---------|---------|---------|---------|---------|---------|--------- -jak-x230 (amd64) | 2.4| 2.2| 1.7| 1.2| 0.7| 1.3| 0.7| 1.0| 0.9| 1.2| 2.3 -backup (amd64) | 1.9| 1.7| 1.3| 0.9| 0.5| 0.9| 0.5| 0.7| 0.7| 0.9| 1.7 -asachi.d.o (arm64) | 1.2| 0.8| 0.9| 0.6| 0.2| 0.3| 0.3| 0.4| 0.5| 0.7| 0.7 -asachi.d.o (armel) | 0.9| 0.7| 0.9| 0.6| 0.2| 0.3| 0.3| 0.5| 0.5| 0.7| 0.9 -asachi.d.o (armhf) | 0.8| 0.7| 0.7| 0.5| 0.2| 0.2| 0.3| 0.5| 0.5| 0.7| 0.8 -plummer.d.o (ppc64el)| 2.0| 1.8| 1.6| 1.1| 0.5| 0.4| 0.5| 0.6| 0.7| 1.1| 1.9 -eller.d.o (mipsel) | 0.2| 0.2| 0.2| 0.1| 0.1| 0.1| 0.1| 0.2| 0.2| 0.2| 0.2 - - -Legend: - -* The (*) variants are case-insensitive, (**) are more optimised versions - of the (*) versions. -* DJB (*) is a DJB Hash with naive lowercase conversion, DJB (**) just ORs one - bit into each value to get alphabetical characters to be lowercase -* APT (*) is the AlphaHash function from APT which hashes the last 8 bytes in a - word in a case-insensitive manner. APT (**) is the same function unrolled. -* All hosts except the x230 are Debian porterboxes. The x230 has a Core i5-3320M, - barriere has an Opteron 23xx. - -Notes: - -* The overhead is larger than needed on some platforms due to gcc inserting - unneeded zero extend instructions, see: - https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77729 diff --git a/triehash/tests/framework.sh b/triehash/tests/framework.sh deleted file mode 100644 index 51d4580a6..000000000 --- a/triehash/tests/framework.sh +++ /dev/null @@ -1,84 +0,0 @@ -#!/bin/sh -# Simple integration test framework - -set -e - - -cleanup() { - rm -f test.output test.c test.h test.tree -} - -dumpone() { - if [ -e "$@" ]; then - echo "Content of $@:" - cat "$@" | sed "s#^#\t#g" - fi - -} - -dump() { - dumpone test.output - dumpone test.c - dumpone test.h - dumpone test.tree - return 1 -} - -testsuccess() { - [ "$INNER" ] || cleanup - [ "$INNER" ] || echo "Testing success of $@" - if ! "$@" > test.output 2>&1; then - echo "ERROR: Running $@ failed with error $?, messages were:" >&2 - dump - return 1 - fi -} - -testfailure() { - [ "$INNER" ] || cleanup - [ "$INNER" ] || echo "Testing failure of $@" - if "$@" > test.output 2>&1; then - echo "ERROR: Running $@ unexpectedly succeeded, messages were:" >&2 - dump - return 1 - fi -} - -testfileequal() { - [ "$INNER" ] || echo "Testing output of $2" - printf "%b\n" "$1" > expected - if ! diff -u "expected" "$2" > test.diff; then - echo "ERROR: Differences between expected output and and $2:" >&2 - cat test.diff | sed "s#^#\t#g" - dump - return 1 - fi -} - -testgrep() { - [ "$INNER" ] || echo "Testing grep $@" - INNER=1 testsuccess grep "$@" - unset INNER -} - -testsuccessequal() { - expect="$1" - shift - cleanup - echo "Testing success and output of $@" - INNER=1 testsuccess "$@" - INNER=1 testfileequal "$expect" test.output - unset INNER -} - - -WORDS="Word-_0 -Word = 42 -VeryLongWord -Label ~ Word2 -= -9" - -triehash() { - printf "%b\n" "$WORDS" | perl -MDevel::Cover=-summary,0,-silent,1 $(dirname $(dirname $(readlink -f $0)))/triehash.pl "$@" || return $? - return $? -} diff --git a/triehash/tests/run-tests.sh b/triehash/tests/run-tests.sh deleted file mode 100755 index b9c1ec309..000000000 --- a/triehash/tests/run-tests.sh +++ /dev/null @@ -1,22 +0,0 @@ -#!/bin/sh -DIR=$(dirname $(readlink -f $0)) - -# Let's go into triehash.pl's dir -cd $(dirname "$DIR") - -rm -rf cover_db - -count=$(cd "$DIR" && echo test-* | wc -w) -i=1 - -for test in $DIR/test-*; do - echo "[$i/$count] Running testcase $test" - if ! $test > test.summary 2>&1; then - cat test.summary - exit 1 - fi - i=$((i + 1)) -done - - -cover diff --git a/triehash/tests/test-basic b/triehash/tests/test-basic deleted file mode 100755 index 19cb08684..000000000 --- a/triehash/tests/test-basic +++ /dev/null @@ -1,245 +0,0 @@ -#!/bin/sh -. $(dirname $(readlink -f $0))/framework.sh - -# Check for non-existing files -testfailure triehash -C /does/not/exist1 -H /does/not/exist1 /does/not/exist/input - -# Check that we can specify - for -C and -H -testsuccessequal "#ifndef TRIE_HASH_PerfectHash -#define TRIE_HASH_PerfectHash -#include -#include -enum PerfectKey { - Unknown = -1, -}; -static enum PerfectKey PerfectHash(const char *string, size_t length); -static enum PerfectKey PerfectHash(const char *string, size_t length) -{ - switch (length) { - default: - return Unknown; - } -} -#endif /* TRIE_HASH_PerfectHash */" triehash --multi-byte=0 -C - -H - - -# Check that split files work -testsuccess triehash -C test.c -H test.h --multi-byte=0 -testfileequal "#include \"test.h\" - enum PerfectKey PerfectHash(const char *string, size_t length) -{ - switch (length) { - default: - return Unknown; - } -}" test.c -testfileequal "#ifndef TRIE_HASH_PerfectHash -#define TRIE_HASH_PerfectHash -#include -#include -enum PerfectKey { - Unknown = -1, -}; - enum PerfectKey PerfectHash(const char *string, size_t length); -#endif /* TRIE_HASH_PerfectHash */" test.h - - -# Check the C code generator -testsuccess triehash -C test.c -H test.c /dev/stdin -testfileequal "#ifndef TRIE_HASH_PerfectHash -#define TRIE_HASH_PerfectHash -#include -#include -enum PerfectKey { - VeryLongWord = 43, - Word = 42, - Word___0 = 0, - Label = 44, - Unknown = -9, -}; -static enum PerfectKey PerfectHash(const char *string, size_t length); -#ifdef __GNUC__ -typedef uint16_t __attribute__((aligned (1))) triehash_uu16; -typedef char static_assert16[__alignof__(triehash_uu16) == 1 ? 1 : -1]; -typedef uint32_t __attribute__((aligned (1))) triehash_uu32; -typedef char static_assert32[__alignof__(triehash_uu32) == 1 ? 1 : -1]; -typedef uint64_t __attribute__((aligned (1))) triehash_uu64; -typedef char static_assert64[__alignof__(triehash_uu64) == 1 ? 1 : -1]; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define onechar(c, s, l) (((uint64_t)(c)) << (s)) -#else -#define onechar(c, s, l) (((uint64_t)(c)) << (l-8-s)) -#endif -#if (!defined(__ARM_ARCH) || defined(__ARM_FEATURE_UNALIGNED)) && !defined(TRIE_HASH_NO_MULTI_BYTE) -#define TRIE_HASH_MULTI_BYTE -#endif -#endif /*GNUC */ -#ifdef TRIE_HASH_MULTI_BYTE -static enum PerfectKey PerfectHash4(const char *string) -{ - switch(*((triehash_uu32*) &string[0])) { - case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32): - return Word; - } - return Unknown; -} -static enum PerfectKey PerfectHash5(const char *string) -{ - switch(*((triehash_uu32*) &string[0])) { - case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32): - switch(string[4]) { - case 0| onechar('2', 0, 8): - return Label; - } - } - return Unknown; -} -static enum PerfectKey PerfectHash7(const char *string) -{ - switch(*((triehash_uu32*) &string[0])) { - case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32): - switch(string[4]) { - case 0| onechar('-', 0, 8): - switch(string[5]) { - case 0| onechar('_', 0, 8): - switch(string[6]) { - case 0| onechar('0', 0, 8): - return Word___0; - } - } - } - } - return Unknown; -} -static enum PerfectKey PerfectHash12(const char *string) -{ - switch(*((triehash_uu64*) &string[0])) { - case 0| onechar('V', 0, 64)| onechar('e', 8, 64)| onechar('r', 16, 64)| onechar('y', 24, 64)| onechar('L', 32, 64)| onechar('o', 40, 64)| onechar('n', 48, 64)| onechar('g', 56, 64): - switch(*((triehash_uu32*) &string[8])) { - case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32): - return VeryLongWord; - } - } - return Unknown; -} -#else -static enum PerfectKey PerfectHash4(const char *string) -{ - switch(string[0]) { - case 'W': - switch(string[1]) { - case 'o': - switch(string[2]) { - case 'r': - switch(string[3]) { - case 'd': - return Word; - } - } - } - } - return Unknown; -} -static enum PerfectKey PerfectHash5(const char *string) -{ - switch(string[0]) { - case 'W': - switch(string[1]) { - case 'o': - switch(string[2]) { - case 'r': - switch(string[3]) { - case 'd': - switch(string[4]) { - case '2': - return Label; - } - } - } - } - } - return Unknown; -} -static enum PerfectKey PerfectHash7(const char *string) -{ - switch(string[0]) { - case 'W': - switch(string[1]) { - case 'o': - switch(string[2]) { - case 'r': - switch(string[3]) { - case 'd': - switch(string[4]) { - case '-': - switch(string[5]) { - case '_': - switch(string[6]) { - case '0': - return Word___0; - } - } - } - } - } - } - } - return Unknown; -} -static enum PerfectKey PerfectHash12(const char *string) -{ - switch(string[0]) { - case 'V': - switch(string[1]) { - case 'e': - switch(string[2]) { - case 'r': - switch(string[3]) { - case 'y': - switch(string[4]) { - case 'L': - switch(string[5]) { - case 'o': - switch(string[6]) { - case 'n': - switch(string[7]) { - case 'g': - switch(string[8]) { - case 'W': - switch(string[9]) { - case 'o': - switch(string[10]) { - case 'r': - switch(string[11]) { - case 'd': - return VeryLongWord; - } - } - } - } - } - } - } - } - } - } - } - } - return Unknown; -} -#endif /* TRIE_HASH_MULTI_BYTE */ -static enum PerfectKey PerfectHash(const char *string, size_t length) -{ - switch (length) { - case 4: - return PerfectHash4(string); - case 5: - return PerfectHash5(string); - case 7: - return PerfectHash7(string); - case 12: - return PerfectHash12(string); - default: - return Unknown; - } -} -#endif /* TRIE_HASH_PerfectHash */" test.c diff --git a/triehash/tests/test-case-insensitive b/triehash/tests/test-case-insensitive deleted file mode 100755 index 25ab2dd78..000000000 --- a/triehash/tests/test-case-insensitive +++ /dev/null @@ -1,109 +0,0 @@ -#!/bin/sh -. $(dirname $(readlink -f $0))/framework.sh - -WORDS="Halllo\nH-lllo\nHalll1" - -# Case-insensitive test -testsuccessequal "#include \"/dev/null\" -#ifdef __GNUC__ -typedef uint16_t __attribute__((aligned (1))) triehash_uu16; -typedef char static_assert16[__alignof__(triehash_uu16) == 1 ? 1 : -1]; -typedef uint32_t __attribute__((aligned (1))) triehash_uu32; -typedef char static_assert32[__alignof__(triehash_uu32) == 1 ? 1 : -1]; -typedef uint64_t __attribute__((aligned (1))) triehash_uu64; -typedef char static_assert64[__alignof__(triehash_uu64) == 1 ? 1 : -1]; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define onechar(c, s, l) (((uint64_t)(c)) << (s)) -#else -#define onechar(c, s, l) (((uint64_t)(c)) << (l-8-s)) -#endif -#if (!defined(__ARM_ARCH) || defined(__ARM_FEATURE_UNALIGNED)) && !defined(TRIE_HASH_NO_MULTI_BYTE) -#define TRIE_HASH_MULTI_BYTE -#endif -#endif /*GNUC */ -#ifdef TRIE_HASH_MULTI_BYTE -static enum PerfectKey PerfectHash6(const char *string) -{ - switch(string[0] | 0x20) { - case 0| onechar('h', 0, 8): - switch(string[1]) { - case 0| onechar('-', 0, 8): - switch(*((triehash_uu32*) &string[2]) | 0x20202020) { - case 0| onechar('l', 0, 32)| onechar('l', 8, 32)| onechar('l', 16, 32)| onechar('o', 24, 32): - return H_lllo; - } - break; - case 0| onechar('a', 0, 8): - case 0| onechar('A', 0, 8): - switch(*((triehash_uu16*) &string[2]) | 0x2020) { - case 0| onechar('l', 0, 16)| onechar('l', 8, 16): - switch(string[4] | 0x20) { - case 0| onechar('l', 0, 8): - switch(string[5]) { - case 0| onechar('1', 0, 8): - return Halll1; - break; - case 0| onechar('o', 0, 8): - case 0| onechar('O', 0, 8): - return Halllo; - } - } - } - } - } - return Unknown; -} -#else -static enum PerfectKey PerfectHash6(const char *string) -{ - switch(string[0] | 0x20) { - case 'h': - switch(string[1]) { - case '-': - switch(string[2] | 0x20) { - case 'l': - switch(string[3] | 0x20) { - case 'l': - switch(string[4] | 0x20) { - case 'l': - switch(string[5] | 0x20) { - case 'o': - return H_lllo; - } - } - } - } - break; - case 'a': - case 'A': - switch(string[2] | 0x20) { - case 'l': - switch(string[3] | 0x20) { - case 'l': - switch(string[4] | 0x20) { - case 'l': - switch(string[5]) { - case '1': - return Halll1; - break; - case 'o': - case 'O': - return Halllo; - } - } - } - } - } - } - return Unknown; -} -#endif /* TRIE_HASH_MULTI_BYTE */ - enum PerfectKey PerfectHash(const char *string, size_t length) -{ - switch (length) { - case 6: - return PerfectHash6(string); - default: - return Unknown; - } -}" triehash --multi-byte=3210 --ignore-case -H /dev/null /dev/stdin diff --git a/triehash/tests/test-enum-include-name-flags b/triehash/tests/test-enum-include-name-flags deleted file mode 100755 index 33bd97c0f..000000000 --- a/triehash/tests/test-enum-include-name-flags +++ /dev/null @@ -1,129 +0,0 @@ -#!/bin/sh -. $(dirname $(readlink -f $0))/framework.sh - -# Need a short word, we really just need to check if the labels work -WORDS=w - -testsuccessequal "\ -#ifndef TRIE_HASH_PerfectHash -#define TRIE_HASH_PerfectHash -#include -#include -#include -enum PerfectKey { - w = 0, - Unknown = -1, -}; - enum PerfectKey PerfectHash(const char *string, size_t length); -#endif /* TRIE_HASH_PerfectHash */" triehash --multi-byte=0 -C /dev/null --include="" /dev/stdin - -# Check for --enum-class support -testsuccessequal "\ -#ifndef TRIE_HASH_PerfectHash -#define TRIE_HASH_PerfectHash -#include -#include -enum class PerfectKey { - w = 0, - Unknown = -1, -}; -static enum PerfectKey PerfectHash(const char *string, size_t length); -static enum PerfectKey PerfectHash1(const char *string) -{ - switch(string[0]) { - case 'w': - return PerfectKey::w; - } - return PerfectKey::Unknown; -} -static enum PerfectKey PerfectHash(const char *string, size_t length) -{ - switch (length) { - case 1: - return PerfectHash1(string); - default: - return PerfectKey::Unknown; - } -} -#endif /* TRIE_HASH_PerfectHash */" triehash --multi-byte=0 --enum-class /dev/stdin - -# Check for --enum-name support -testsuccessequal "\ -#ifndef TRIE_HASH_PerfectHash -#define TRIE_HASH_PerfectHash -#include -#include -enum Foo { - Unknown = -1, -}; -static enum Foo PerfectHash(const char *string, size_t length); -static enum Foo PerfectHash(const char *string, size_t length) -{ - switch (length) { - default: - return Unknown; - } -} -#endif /* TRIE_HASH_PerfectHash */\ -" triehash --multi-byte=0 --enum-name="Foo" - -# Check for --enum-class support -testsuccessequal "\ -#ifndef TRIE_HASH_PerfectHash -#define TRIE_HASH_PerfectHash -#include -#include -enum class Foo::Bar { - Unknown = -1, -}; -static enum Foo::Bar PerfectHash(const char *string, size_t length); -static enum Foo::Bar PerfectHash(const char *string, size_t length) -{ - switch (length) { - default: - return Foo::Bar::Unknown; - } -} -#endif /* TRIE_HASH_PerfectHash */\ -" triehash --multi-byte=0 --enum-class --enum-name="Foo::Bar" - -# Check for --function-name support -testsuccessequal "\ -#ifndef TRIE_HASH_NonSense -#define TRIE_HASH_NonSense -#include -#include -enum PerfectKey { - Unknown = -1, -}; -static enum PerfectKey NonSense(const char *string, size_t length); -static enum PerfectKey NonSense(const char *string, size_t length) -{ - switch (length) { - default: - return Unknown; - } -} -#endif /* TRIE_HASH_NonSense */\ -" triehash --multi-byte=0 --function-name="NonSense" - -# Check for --counter-name support -testsuccessequal "\ -#ifndef TRIE_HASH_PerfectHash -#define TRIE_HASH_PerfectHash -#include -#include -enum { MyCounter = 0 }; -enum PerfectKey { - Unknown = -1, -}; -static enum PerfectKey PerfectHash(const char *string, size_t length); -static enum PerfectKey PerfectHash(const char *string, size_t length) -{ - switch (length) { - default: - return Unknown; - } -} -#endif /* TRIE_HASH_PerfectHash */\ -" triehash --multi-byte=0 --counter-name="MyCounter" diff --git a/triehash/tests/test-multi-byte-level b/triehash/tests/test-multi-byte-level deleted file mode 100755 index ddfb8cd1b..000000000 --- a/triehash/tests/test-multi-byte-level +++ /dev/null @@ -1,427 +0,0 @@ -#!/bin/sh -. $(dirname $(readlink -f $0))/framework.sh - -# Check that building a single-byte trie works -testsuccessequal "\ -┌────────────────────────────────────────────────────┐ -│ Initial trie │ -└────────────────────────────────────────────────────┘ - -├── V -│ ├── e -│ │ ├── r -│ │ │ ├── y -│ │ │ │ ├── L -│ │ │ │ │ ├── o -│ │ │ │ │ │ ├── n -│ │ │ │ │ │ │ ├── g -│ │ │ │ │ │ │ │ ├── W -│ │ │ │ │ │ │ │ │ ├── o -│ │ │ │ │ │ │ │ │ │ ├── r -│ │ │ │ │ │ │ │ │ │ │ ├── d → VeryLongWord -├── W -│ ├── o -│ │ ├── r -│ │ │ ├── d → Word -│ │ │ │ ├── - -│ │ │ │ │ ├── _ -│ │ │ │ │ │ ├── 0 → Word-_0 -│ │ │ │ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Rebuilt trie │ -└────────────────────────────────────────────────────┘ - -├── V -│ ├── e -│ │ ├── r -│ │ │ ├── y -│ │ │ │ ├── L -│ │ │ │ │ ├── o -│ │ │ │ │ │ ├── n -│ │ │ │ │ │ │ ├── g -│ │ │ │ │ │ │ │ ├── W -│ │ │ │ │ │ │ │ │ ├── o -│ │ │ │ │ │ │ │ │ │ ├── r -│ │ │ │ │ │ │ │ │ │ │ ├── d → VeryLongWord -├── W -│ ├── o -│ │ ├── r -│ │ │ ├── d → Word -│ │ │ │ ├── - -│ │ │ │ │ ├── _ -│ │ │ │ │ │ ├── 0 → Word-_0 -│ │ │ │ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 4 │ -└────────────────────────────────────────────────────┘ - -├── W -│ ├── o -│ │ ├── r -│ │ │ ├── d → Word -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 5 │ -└────────────────────────────────────────────────────┘ - -├── W -│ ├── o -│ │ ├── r -│ │ │ ├── d -│ │ │ │ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 7 │ -└────────────────────────────────────────────────────┘ - -├── W -│ ├── o -│ │ ├── r -│ │ │ ├── d -│ │ │ │ ├── - -│ │ │ │ │ ├── _ -│ │ │ │ │ │ ├── 0 → Word-_0 -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 12 │ -└────────────────────────────────────────────────────┘ - -├── V -│ ├── e -│ │ ├── r -│ │ │ ├── y -│ │ │ │ ├── L -│ │ │ │ │ ├── o -│ │ │ │ │ │ ├── n -│ │ │ │ │ │ │ ├── g -│ │ │ │ │ │ │ │ ├── W -│ │ │ │ │ │ │ │ │ ├── o -│ │ │ │ │ │ │ │ │ │ ├── r -│ │ │ │ │ │ │ │ │ │ │ ├── d → VeryLongWord" triehash --multi-byte=0 -l tree /dev/stdin - -# Two byte optimization -testsuccessequal "\ -┌────────────────────────────────────────────────────┐ -│ Initial trie │ -└────────────────────────────────────────────────────┘ - -├── Ve -│ ├── ry -│ │ ├── Lo -│ │ │ ├── ng -│ │ │ │ ├── Wo -│ │ │ │ │ ├── rd → VeryLongWord -├── Wo -│ ├── rd → Word -│ │ ├── -_ -│ │ │ ├── 0 → Word-_0 -│ │ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Rebuilt trie │ -└────────────────────────────────────────────────────┘ - -├── Ve -│ ├── ry -│ │ ├── Lo -│ │ │ ├── ng -│ │ │ │ ├── Wo -│ │ │ │ │ ├── rd → VeryLongWord -├── Wo -│ ├── rd → Word -│ │ ├── - -│ │ │ ├── _0 → Word-_0 -│ │ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 4 │ -└────────────────────────────────────────────────────┘ - -├── Wo -│ ├── rd → Word -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 5 │ -└────────────────────────────────────────────────────┘ - -├── Wo -│ ├── rd -│ │ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 7 │ -└────────────────────────────────────────────────────┘ - -├── Wo -│ ├── rd -│ │ ├── -_ -│ │ │ ├── 0 → Word-_0 -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 12 │ -└────────────────────────────────────────────────────┘ - -├── Ve -│ ├── ry -│ │ ├── Lo -│ │ │ ├── ng -│ │ │ │ ├── Wo -│ │ │ │ │ ├── rd → VeryLongWord" triehash --multi-byte=1 -l tree /dev/stdin -# Four byte optimization -testsuccessequal "\ -┌────────────────────────────────────────────────────┐ -│ Initial trie │ -└────────────────────────────────────────────────────┘ - -├── Very -│ ├── Long -│ │ ├── Word → VeryLongWord -├── Word → Word -│ ├── - -│ │ ├── _ -│ │ │ ├── 0 → Word-_0 -│ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Rebuilt trie │ -└────────────────────────────────────────────────────┘ - -├── Very -│ ├── Long -│ │ ├── Word → VeryLongWord -├── Word → Word -│ ├── - -│ │ ├── _ -│ │ │ ├── 0 → Word-_0 -│ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 4 │ -└────────────────────────────────────────────────────┘ - -├── Word → Word -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 5 │ -└────────────────────────────────────────────────────┘ - -├── Word -│ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 7 │ -└────────────────────────────────────────────────────┘ - -├── Word -│ ├── - -│ │ ├── _ -│ │ │ ├── 0 → Word-_0 -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 12 │ -└────────────────────────────────────────────────────┘ - -├── Very -│ ├── Long -│ │ ├── Word → VeryLongWord" triehash --multi-byte=2 -l tree /dev/stdin -# Eigh byte optimization -testsuccessequal "\ -┌────────────────────────────────────────────────────┐ -│ Initial trie │ -└────────────────────────────────────────────────────┘ - -├── VeryLong -│ ├── W -│ │ ├── o -│ │ │ ├── r -│ │ │ │ ├── d → VeryLongWord -├── W -│ ├── o -│ │ ├── r -│ │ │ ├── d → Word -│ │ │ │ ├── - -│ │ │ │ │ ├── _ -│ │ │ │ │ │ ├── 0 → Word-_0 -│ │ │ │ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Rebuilt trie │ -└────────────────────────────────────────────────────┘ - -├── V -│ ├── eryLongW -│ │ ├── o -│ │ │ ├── r -│ │ │ │ ├── d → VeryLongWord -├── W -│ ├── o -│ │ ├── r -│ │ │ ├── d → Word -│ │ │ │ ├── - -│ │ │ │ │ ├── _ -│ │ │ │ │ │ ├── 0 → Word-_0 -│ │ │ │ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 4 │ -└────────────────────────────────────────────────────┘ - -├── W -│ ├── o -│ │ ├── r -│ │ │ ├── d → Word -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 5 │ -└────────────────────────────────────────────────────┘ - -├── W -│ ├── o -│ │ ├── r -│ │ │ ├── d -│ │ │ │ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 7 │ -└────────────────────────────────────────────────────┘ - -├── W -│ ├── o -│ │ ├── r -│ │ │ ├── d -│ │ │ │ ├── - -│ │ │ │ │ ├── _ -│ │ │ │ │ │ ├── 0 → Word-_0 -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 12 │ -└────────────────────────────────────────────────────┘ - -├── VeryLong -│ ├── W -│ │ ├── o -│ │ │ ├── r -│ │ │ │ ├── d → VeryLongWord" triehash --multi-byte=3 -l tree /dev/stdin - - -# Check that building a multi-byte trie works -testsuccessequal "\ -┌────────────────────────────────────────────────────┐ -│ Initial trie │ -└────────────────────────────────────────────────────┘ - -├── VeryLong -│ ├── Word → VeryLongWord -├── Word → Word -│ ├── - -│ │ ├── _ -│ │ │ ├── 0 → Word-_0 -│ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Rebuilt trie │ -└────────────────────────────────────────────────────┘ - -├── Very -│ ├── LongWord → VeryLongWord -├── Word → Word -│ ├── - -│ │ ├── _ -│ │ │ ├── 0 → Word-_0 -│ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 4 │ -└────────────────────────────────────────────────────┘ - -├── Word → Word -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 5 │ -└────────────────────────────────────────────────────┘ - -├── Word -│ ├── 2 → Label -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 7 │ -└────────────────────────────────────────────────────┘ - -├── Word -│ ├── - -│ │ ├── _ -│ │ │ ├── 0 → Word-_0 -┌────────────────────────────────────────────────────┐ -│ Trie for words of length 12 │ -└────────────────────────────────────────────────────┘ - -├── VeryLong -│ ├── Word → VeryLongWord" triehash -l tree /dev/stdin - - -###### CHANGE THE WORDS FOR THE FOLLOWING TESTS ####### -WORDS="Word" - -# Check that we are generating the proper multi-byte and fallback sessions -testsuccessequal "#include \"/dev/null\" -#ifdef __GNUC__ -typedef uint16_t __attribute__((aligned (1))) triehash_uu16; -typedef char static_assert16[__alignof__(triehash_uu16) == 1 ? 1 : -1]; -typedef uint32_t __attribute__((aligned (1))) triehash_uu32; -typedef char static_assert32[__alignof__(triehash_uu32) == 1 ? 1 : -1]; -typedef uint64_t __attribute__((aligned (1))) triehash_uu64; -typedef char static_assert64[__alignof__(triehash_uu64) == 1 ? 1 : -1]; -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ -#define onechar(c, s, l) (((uint64_t)(c)) << (s)) -#else -#define onechar(c, s, l) (((uint64_t)(c)) << (l-8-s)) -#endif -#if (!defined(__ARM_ARCH) || defined(__ARM_FEATURE_UNALIGNED)) && !defined(TRIE_HASH_NO_MULTI_BYTE) -#define TRIE_HASH_MULTI_BYTE -#endif -#endif /*GNUC */ -#ifdef TRIE_HASH_MULTI_BYTE -static enum PerfectKey PerfectHash4(const char *string) -{ - switch(*((triehash_uu32*) &string[0])) { - case 0| onechar('W', 0, 32)| onechar('o', 8, 32)| onechar('r', 16, 32)| onechar('d', 24, 32): - return Word; - } - return Unknown; -} -#else -static enum PerfectKey PerfectHash4(const char *string) -{ - switch(string[0]) { - case 'W': - switch(string[1]) { - case 'o': - switch(string[2]) { - case 'r': - switch(string[3]) { - case 'd': - return Word; - } - } - } - } - return Unknown; -} -#endif /* TRIE_HASH_MULTI_BYTE */ - enum PerfectKey PerfectHash(const char *string, size_t length) -{ - switch (length) { - case 4: - return PerfectHash4(string); - default: - return Unknown; - } -}" triehash -H /dev/null /dev/stdin - - -# Check that we are generating no multi-byte session -testsuccessequal "#include \"/dev/null\" -static enum PerfectKey PerfectHash4(const char *string) -{ - switch(string[0]) { - case 'W': - switch(string[1]) { - case 'o': - switch(string[2]) { - case 'r': - switch(string[3]) { - case 'd': - return Word; - } - } - } - } - return Unknown; -} - enum PerfectKey PerfectHash(const char *string, size_t length) -{ - switch (length) { - case 4: - return PerfectHash4(string); - default: - return Unknown; - } -}" triehash --multi-byte=0 -H /dev/null /dev/stdin diff --git a/triehash/triehash.pl b/triehash/triehash.pl deleted file mode 100755 index 841c0e7e2..000000000 --- a/triehash/triehash.pl +++ /dev/null @@ -1,691 +0,0 @@ -#!/usr/bin/perl -w -# -# Copyright (C) 2016 Julian Andres Klode -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - - -=head1 NAME - -triehash - Generate a perfect hash function derived from a trie. - -=cut - -use strict; -use warnings; -use Getopt::Long; - -=head1 SYNOPSIS - -B [S>] [S>] - -=head1 DESCRIPTION - -triehash takes a list of words in input file and generates a function and -an enumeration to describe the word - -=head1 INPUT FILE FORMAT - -The file consists of multiple lines of the form: - - [label ~ ] word [= value] - -This maps word to value, and generates an enumeration with entries of the form: - - label = value - -If I