diff options
-rw-r--r-- | apt-pkg/endian.h | 118 | ||||
-rw-r--r-- | apt-pkg/getservbyport_r.cc | 59 | ||||
-rw-r--r-- | apt-pkg/memrchr.cc | 157 | ||||
-rw-r--r-- | apt-pkg/missing.h | 26 | ||||
-rw-r--r-- | apt-pkg/nameser_compat.h | 187 | ||||
-rw-r--r-- | apt-pkg/rawmemchr.cc | 139 | ||||
-rw-r--r-- | apt-pkg/strchrnul.cc | 147 |
7 files changed, 833 insertions, 0 deletions
diff --git a/apt-pkg/endian.h b/apt-pkg/endian.h new file mode 100644 index 000000000..e89694a44 --- /dev/null +++ b/apt-pkg/endian.h @@ -0,0 +1,118 @@ +// "License": Public Domain +// I, Mathias Panzenböck, place this file hereby into the public domain. Use it at your own risk for whatever you like. +// In case there are jurisdictions that don't support putting things in the public domain you can also consider it to +// be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it +// an example on how to get the endian conversion functions on different platforms. + +#ifndef PORTABLE_ENDIAN_H__ +#define PORTABLE_ENDIAN_H__ + +#if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__) + +# define __WINDOWS__ + +#endif + +#if defined(__linux__) || defined(__CYGWIN__) + +# include <endian.h> + +#elif defined(__APPLE__) + +# include <libkern/OSByteOrder.h> + +# define htobe16(x) OSSwapHostToBigInt16(x) +# define htole16(x) OSSwapHostToLittleInt16(x) +# define be16toh(x) OSSwapBigToHostInt16(x) +# define le16toh(x) OSSwapLittleToHostInt16(x) + +# define htobe32(x) OSSwapHostToBigInt32(x) +# define htole32(x) OSSwapHostToLittleInt32(x) +# define be32toh(x) OSSwapBigToHostInt32(x) +# define le32toh(x) OSSwapLittleToHostInt32(x) + +# define htobe64(x) OSSwapHostToBigInt64(x) +# define htole64(x) OSSwapHostToLittleInt64(x) +# define be64toh(x) OSSwapBigToHostInt64(x) +# define le64toh(x) OSSwapLittleToHostInt64(x) + +# define __BYTE_ORDER BYTE_ORDER +# define __BIG_ENDIAN BIG_ENDIAN +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# define __PDP_ENDIAN PDP_ENDIAN + +#elif defined(__OpenBSD__) + +# include <sys/endian.h> + +#elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) + +# include <sys/endian.h> + +# define be16toh(x) betoh16(x) +# define le16toh(x) letoh16(x) + +# define be32toh(x) betoh32(x) +# define le32toh(x) letoh32(x) + +# define be64toh(x) betoh64(x) +# define le64toh(x) letoh64(x) + +#elif defined(__WINDOWS__) + +# include <winsock2.h> +# include <sys/param.h> + +# if BYTE_ORDER == LITTLE_ENDIAN + +# define htobe16(x) htons(x) +# define htole16(x) (x) +# define be16toh(x) ntohs(x) +# define le16toh(x) (x) + +# define htobe32(x) htonl(x) +# define htole32(x) (x) +# define be32toh(x) ntohl(x) +# define le32toh(x) (x) + +# define htobe64(x) htonll(x) +# define htole64(x) (x) +# define be64toh(x) ntohll(x) +# define le64toh(x) (x) + +# elif BYTE_ORDER == BIG_ENDIAN + + /* that would be xbox 360 */ +# define htobe16(x) (x) +# define htole16(x) __builtin_bswap16(x) +# define be16toh(x) (x) +# define le16toh(x) __builtin_bswap16(x) + +# define htobe32(x) (x) +# define htole32(x) __builtin_bswap32(x) +# define be32toh(x) (x) +# define le32toh(x) __builtin_bswap32(x) + +# define htobe64(x) (x) +# define htole64(x) __builtin_bswap64(x) +# define be64toh(x) (x) +# define le64toh(x) __builtin_bswap64(x) + +# else + +# error byte order not supported + +# endif + +# define __BYTE_ORDER BYTE_ORDER +# define __BIG_ENDIAN BIG_ENDIAN +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# define __PDP_ENDIAN PDP_ENDIAN + +#else + +# error platform not supported + +#endif + +#endif diff --git a/apt-pkg/getservbyport_r.cc b/apt-pkg/getservbyport_r.cc new file mode 100644 index 000000000..cf78ad514 --- /dev/null +++ b/apt-pkg/getservbyport_r.cc @@ -0,0 +1,59 @@ +#define _GNU_SOURCE +#include <sys/types.h> +#include <sys/socket.h> +#include <netinet/in.h> +#include <netdb.h> +#include <inttypes.h> +#include <errno.h> +#include <string.h> + +#ifndef HAVE_GETSERVBYPORT_R + +extern "C" int getservbyport_r(int port, const char *prots, + struct servent *se, char *buf, size_t buflen, struct servent **res) +{ + int i; + struct sockaddr_in sin = { + .sin_family = AF_INET, + .sin_port = (in_port_t) port, + }; + + if (!prots) { + int r = getservbyport_r(port, "tcp", se, buf, buflen, res); + if (r) r = getservbyport_r(port, "udp", se, buf, buflen, res); + return r; + } + + /* Align buffer */ + i = (uintptr_t)buf & sizeof(char *)-1; + if (!i) i = sizeof(char *); + if (buflen < 3*sizeof(char *)-i) + return ERANGE; + buf += sizeof(char *)-i; + buflen -= sizeof(char *)-i; + + if (strcmp(prots, "tcp") && strcmp(prots, "udp")) return EINVAL; + + se->s_port = port; + se->s_proto = (char *)prots; + se->s_aliases = (char **)buf; + buf += 2*sizeof(char *); + buflen -= 2*sizeof(char *); + se->s_aliases[1] = 0; + se->s_aliases[0] = se->s_name = buf; + + switch (getnameinfo((const struct sockaddr *) &sin, sizeof sin, 0, 0, buf, buflen, + strcmp(prots, "udp") ? 0 : NI_DGRAM)) { + case EAI_MEMORY: + case EAI_SYSTEM: + return ENOMEM; + default: + return ENOENT; + case 0: + break; + } + + *res = se; + return 0; +} +#endif diff --git a/apt-pkg/memrchr.cc b/apt-pkg/memrchr.cc new file mode 100644 index 000000000..edf8f346a --- /dev/null +++ b/apt-pkg/memrchr.cc @@ -0,0 +1,157 @@ +/* memrchr -- find the last occurrence of a byte in a memory block + + Copyright (C) 1991, 1993, 1996-1997, 1999-2000, 2003-2015 Free Software + Foundation, Inc. + + Based on strlen implementation by Torbjorn Granlund (tege@sics.se), + with help from Dan Sahlin (dan@sics.se) and + commentary by Jim Blandy (jimb@ai.mit.edu); + adaptation to memchr suggested by Dick Karpinski (dick@cca.ucsf.edu), + and implemented by Roland McGrath (roland@ai.mit.edu). + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +#ifndef HAVE_MEMRCHR +#define reg_char char + +#include <string.h> +#include <limits.h> + +#undef __memrchr +#ifdef _LIBC +# undef memrchr +#endif + +#ifndef weak_alias +# define __memrchr memrchr +#endif + +/* Search no more than N bytes of S for C. */ +extern "C" void * +memrchr (const void *s, int c_in, size_t n) +{ + /* On 32-bit hardware, choosing longword to be a 32-bit unsigned + long instead of a 64-bit uintmax_t tends to give better + performance. On 64-bit hardware, unsigned long is generally 64 + bits already. Change this typedef to experiment with + performance. */ + typedef unsigned long int longword; + + const unsigned char *char_ptr; + const longword *longword_ptr; + longword repeated_one; + longword repeated_c; + unsigned reg_char c; + + c = (unsigned char) c_in; + + /* Handle the last few bytes by reading one byte at a time. + Do this until CHAR_PTR is aligned on a longword boundary. */ + for (char_ptr = (const unsigned char *) s + n; + n > 0 && (size_t) char_ptr % sizeof (longword) != 0; + --n) + if (*--char_ptr == c) + return (void *) char_ptr; + + longword_ptr = (const longword *) char_ptr; + + /* All these elucidatory comments refer to 4-byte longwords, + but the theory applies equally well to any size longwords. */ + + /* Compute auxiliary longword values: + repeated_one is a value which has a 1 in every byte. + repeated_c has c in every byte. */ + repeated_one = 0x01010101; + repeated_c = c | (c << 8); + repeated_c |= repeated_c << 16; + if (0xffffffffU < (longword) -1) + { + repeated_one |= repeated_one << 31 << 1; + repeated_c |= repeated_c << 31 << 1; + if (8 < sizeof (longword)) + { + size_t i; + + for (i = 64; i < sizeof (longword) * 8; i *= 2) + { + repeated_one |= repeated_one << i; + repeated_c |= repeated_c << i; + } + } + } + + /* Instead of the traditional loop which tests each byte, we will test a + longword at a time. The tricky part is testing if *any of the four* + bytes in the longword in question are equal to c. We first use an xor + with repeated_c. This reduces the task to testing whether *any of the + four* bytes in longword1 is zero. + + We compute tmp = + ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7). + That is, we perform the following operations: + 1. Subtract repeated_one. + 2. & ~longword1. + 3. & a mask consisting of 0x80 in every byte. + Consider what happens in each byte: + - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff, + and step 3 transforms it into 0x80. A carry can also be propagated + to more significant bytes. + - If a byte of longword1 is nonzero, let its lowest 1 bit be at + position k (0 <= k <= 7); so the lowest k bits are 0. After step 1, + the byte ends in a single bit of value 0 and k bits of value 1. + After step 2, the result is just k bits of value 1: 2^k - 1. After + step 3, the result is 0. And no carry is produced. + So, if longword1 has only non-zero bytes, tmp is zero. + Whereas if longword1 has a zero byte, call j the position of the least + significant zero byte. Then the result has a zero at positions 0, ..., + j-1 and a 0x80 at position j. We cannot predict the result at the more + significant bytes (positions j+1..3), but it does not matter since we + already have a non-zero bit at position 8*j+7. + + So, the test whether any byte in longword1 is zero is equivalent to + testing whether tmp is nonzero. */ + + while (n >= sizeof (longword)) + { + longword longword1 = *--longword_ptr ^ repeated_c; + + if ((((longword1 - repeated_one) & ~longword1) + & (repeated_one << 7)) != 0) + { + longword_ptr++; + break; + } + n -= sizeof (longword); + } + + char_ptr = (const unsigned char *) longword_ptr; + + /* At this point, we know that either n < sizeof (longword), or one of the + sizeof (longword) bytes starting at char_ptr is == c. On little-endian + machines, we could determine the first such byte without any further + memory accesses, just by looking at the tmp result from the last loop + iteration. But this does not work on big-endian machines. Choose code + that works in both cases. */ + + while (n-- > 0) + { + if (*--char_ptr == c) + return (void *) char_ptr; + } + + return NULL; +} +#endif diff --git a/apt-pkg/missing.h b/apt-pkg/missing.h new file mode 100644 index 000000000..441b47ce1 --- /dev/null +++ b/apt-pkg/missing.h @@ -0,0 +1,26 @@ +#define _GNU_SOURCE +#include <sys/socket.h> +#include <netinet/in.h> +#include <netdb.h> +#include <inttypes.h> +#include <errno.h> +#include <string.h> + +#ifndef PKGLIB_MISSING_H +#define PKGLIB_MISSING_H + +extern "C" { + void *memrchr(const void *s, int c, size_t n); + void *rawmemchr(const void *s, int c); + char *strchrnul(const char *s, int c); + int getservbyport_r(int port, const char *prots, struct servent *se, char *buf, size_t buflen, struct servent **res); +} + +typedef void (*sighandler_t)(int); + +extern char **environ; + +#define AI_IDN 0x0040 + +#endif + diff --git a/apt-pkg/nameser_compat.h b/apt-pkg/nameser_compat.h new file mode 100644 index 000000000..b2cf2ffaa --- /dev/null +++ b/apt-pkg/nameser_compat.h @@ -0,0 +1,187 @@ +/* Copyright (c) 1983, 1989 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/*% + * from nameser.h 8.1 (Berkeley) 6/2/93 + * $BINDId: nameser_compat.h,v 8.11 1999/01/02 08:00:58 vixie Exp $ + */ + +#ifndef _ARPA_NAMESER_COMPAT_ +#define _ARPA_NAMESER_COMPAT_ + +#define __BIND 19950621 /*%< (DEAD) interface version stamp. */ + +#include <apt-pkg/endian.h> + +/*% + * Structure for query header. The order of the fields is machine- and + * compiler-dependent, depending on the byte/bit order and the layout + * of bit fields. We use bit fields only in int variables, as this + * is all ANSI requires. This requires a somewhat confusing rearrangement. + */ + +typedef struct { + unsigned id :16; /*%< query identification number */ +#if BYTE_ORDER == BIG_ENDIAN + /* fields in third byte */ + unsigned qr: 1; /*%< response flag */ + unsigned opcode: 4; /*%< purpose of message */ + unsigned aa: 1; /*%< authoritive answer */ + unsigned tc: 1; /*%< truncated message */ + unsigned rd: 1; /*%< recursion desired */ + /* fields in fourth byte */ + unsigned ra: 1; /*%< recursion available */ + unsigned unused :1; /*%< unused bits (MBZ as of 4.9.3a3) */ + unsigned ad: 1; /*%< authentic data from named */ + unsigned cd: 1; /*%< checking disabled by resolver */ + unsigned rcode :4; /*%< response code */ +#endif +#if BYTE_ORDER == LITTLE_ENDIAN || BYTE_ORDER == PDP_ENDIAN + /* fields in third byte */ + unsigned rd :1; /*%< recursion desired */ + unsigned tc :1; /*%< truncated message */ + unsigned aa :1; /*%< authoritive answer */ + unsigned opcode :4; /*%< purpose of message */ + unsigned qr :1; /*%< response flag */ + /* fields in fourth byte */ + unsigned rcode :4; /*%< response code */ + unsigned cd: 1; /*%< checking disabled by resolver */ + unsigned ad: 1; /*%< authentic data from named */ + unsigned unused :1; /*%< unused bits (MBZ as of 4.9.3a3) */ + unsigned ra :1; /*%< recursion available */ +#endif + /* remaining bytes */ + unsigned qdcount :16; /*%< number of question entries */ + unsigned ancount :16; /*%< number of answer entries */ + unsigned nscount :16; /*%< number of authority entries */ + unsigned arcount :16; /*%< number of resource entries */ +} HEADER; + +#define PACKETSZ NS_PACKETSZ +#define MAXDNAME NS_MAXDNAME +#define MAXCDNAME NS_MAXCDNAME +#define MAXLABEL NS_MAXLABEL +#define HFIXEDSZ NS_HFIXEDSZ +#define QFIXEDSZ NS_QFIXEDSZ +#define RRFIXEDSZ NS_RRFIXEDSZ +#define INT32SZ NS_INT32SZ +#define INT16SZ NS_INT16SZ +#define INT8SZ NS_INT8SZ +#define INADDRSZ NS_INADDRSZ +#define IN6ADDRSZ NS_IN6ADDRSZ +#define INDIR_MASK NS_CMPRSFLGS +#define NAMESERVER_PORT NS_DEFAULTPORT + +#define S_ZONE ns_s_zn +#define S_PREREQ ns_s_pr +#define S_UPDATE ns_s_ud +#define S_ADDT ns_s_ar + +#define QUERY ns_o_query +#define IQUERY ns_o_iquery +#define STATUS ns_o_status +#define NS_NOTIFY_OP ns_o_notify +#define NS_UPDATE_OP ns_o_update + +#define NOERROR ns_r_noerror +#define FORMERR ns_r_formerr +#define SERVFAIL ns_r_servfail +#define NXDOMAIN ns_r_nxdomain +#define NOTIMP ns_r_notimpl +#define REFUSED ns_r_refused +#define YXDOMAIN ns_r_yxdomain +#define YXRRSET ns_r_yxrrset +#define NXRRSET ns_r_nxrrset +#define NOTAUTH ns_r_notauth +#define NOTZONE ns_r_notzone +/*#define BADSIG ns_r_badsig*/ +/*#define BADKEY ns_r_badkey*/ +/*#define BADTIME ns_r_badtime*/ + + +#define DELETE ns_uop_delete +#define ADD ns_uop_add + +#define T_A ns_t_a +#define T_NS ns_t_ns +#define T_MD ns_t_md +#define T_MF ns_t_mf +#define T_CNAME ns_t_cname +#define T_SOA ns_t_soa +#define T_MB ns_t_mb +#define T_MG ns_t_mg +#define T_MR ns_t_mr +#define T_NULL ns_t_null +#define T_WKS ns_t_wks +#define T_PTR ns_t_ptr +#define T_HINFO ns_t_hinfo +#define T_MINFO ns_t_minfo +#define T_MX ns_t_mx +#define T_TXT ns_t_txt +#define T_RP ns_t_rp +#define T_AFSDB ns_t_afsdb +#define T_X25 ns_t_x25 +#define T_ISDN ns_t_isdn +#define T_RT ns_t_rt +#define T_NSAP ns_t_nsap +#define T_NSAP_PTR ns_t_nsap_ptr +#define T_SIG ns_t_sig +#define T_KEY ns_t_key +#define T_PX ns_t_px +#define T_GPOS ns_t_gpos +#define T_AAAA ns_t_aaaa +#define T_LOC ns_t_loc +#define T_NXT ns_t_nxt +#define T_EID ns_t_eid +#define T_NIMLOC ns_t_nimloc +#define T_SRV ns_t_srv +#define T_ATMA ns_t_atma +#define T_NAPTR ns_t_naptr +#define T_A6 ns_t_a6 +#define T_DNAME ns_t_dname +#define T_TSIG ns_t_tsig +#define T_IXFR ns_t_ixfr +#define T_AXFR ns_t_axfr +#define T_MAILB ns_t_mailb +#define T_MAILA ns_t_maila +#define T_ANY ns_t_any + +#define C_IN ns_c_in +#define C_CHAOS ns_c_chaos +#define C_HS ns_c_hs +/* BIND_UPDATE */ +#define C_NONE ns_c_none +#define C_ANY ns_c_any + +#define GETSHORT NS_GET16 +#define GETLONG NS_GET32 +#define PUTSHORT NS_PUT16 +#define PUTLONG NS_PUT32 + +#endif /* _ARPA_NAMESER_COMPAT_ */ +/*! \file */ diff --git a/apt-pkg/rawmemchr.cc b/apt-pkg/rawmemchr.cc new file mode 100644 index 000000000..8f7669b6c --- /dev/null +++ b/apt-pkg/rawmemchr.cc @@ -0,0 +1,139 @@ +/* Searching in a string. + Copyright (C) 2008-2015 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +#ifndef HAVE_RAWMEMCHR + +/* Specification. */ +#include <string.h> + +/* Find the first occurrence of C in S. */ +extern "C" void * +rawmemchr (const void *s, int c_in) +{ + /* On 32-bit hardware, choosing longword to be a 32-bit unsigned + long instead of a 64-bit uintmax_t tends to give better + performance. On 64-bit hardware, unsigned long is generally 64 + bits already. Change this typedef to experiment with + performance. */ + typedef unsigned long int longword; + + const unsigned char *char_ptr; + const longword *longword_ptr; + longword repeated_one; + longword repeated_c; + unsigned char c; + + c = (unsigned char) c_in; + + /* Handle the first few bytes by reading one byte at a time. + Do this until CHAR_PTR is aligned on a longword boundary. */ + for (char_ptr = (const unsigned char *) s; + (size_t) char_ptr % sizeof (longword) != 0; + ++char_ptr) + if (*char_ptr == c) + return (void *) char_ptr; + + longword_ptr = (const longword *) char_ptr; + + /* All these elucidatory comments refer to 4-byte longwords, + but the theory applies equally well to any size longwords. */ + + /* Compute auxiliary longword values: + repeated_one is a value which has a 1 in every byte. + repeated_c has c in every byte. */ + repeated_one = 0x01010101; + repeated_c = c | (c << 8); + repeated_c |= repeated_c << 16; + if (0xffffffffU < (longword) -1) + { + repeated_one |= repeated_one << 31 << 1; + repeated_c |= repeated_c << 31 << 1; + if (8 < sizeof (longword)) + { + size_t i; + + for (i = 64; i < sizeof (longword) * 8; i *= 2) + { + repeated_one |= repeated_one << i; + repeated_c |= repeated_c << i; + } + } + } + + /* Instead of the traditional loop which tests each byte, we will + test a longword at a time. The tricky part is testing if *any of + the four* bytes in the longword in question are equal to NUL or + c. We first use an xor with repeated_c. This reduces the task + to testing whether *any of the four* bytes in longword1 is zero. + + We compute tmp = + ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7). + That is, we perform the following operations: + 1. Subtract repeated_one. + 2. & ~longword1. + 3. & a mask consisting of 0x80 in every byte. + Consider what happens in each byte: + - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff, + and step 3 transforms it into 0x80. A carry can also be propagated + to more significant bytes. + - If a byte of longword1 is nonzero, let its lowest 1 bit be at + position k (0 <= k <= 7); so the lowest k bits are 0. After step 1, + the byte ends in a single bit of value 0 and k bits of value 1. + After step 2, the result is just k bits of value 1: 2^k - 1. After + step 3, the result is 0. And no carry is produced. + So, if longword1 has only non-zero bytes, tmp is zero. + Whereas if longword1 has a zero byte, call j the position of the least + significant zero byte. Then the result has a zero at positions 0, ..., + j-1 and a 0x80 at position j. We cannot predict the result at the more + significant bytes (positions j+1..3), but it does not matter since we + already have a non-zero bit at position 8*j+7. + + The test whether any byte in longword1 is zero is equivalent + to testing whether tmp is nonzero. + + This test can read beyond the end of a string, depending on where + C_IN is encountered. However, this is considered safe since the + initialization phase ensured that the read will be aligned, + therefore, the read will not cross page boundaries and will not + cause a fault. */ + + while (1) + { + longword longword1 = *longword_ptr ^ repeated_c; + + if ((((longword1 - repeated_one) & ~longword1) + & (repeated_one << 7)) != 0) + break; + longword_ptr++; + } + + char_ptr = (const unsigned char *) longword_ptr; + + /* At this point, we know that one of the sizeof (longword) bytes + starting at char_ptr is == c. On little-endian machines, we + could determine the first such byte without any further memory + accesses, just by looking at the tmp result from the last loop + iteration. But this does not work on big-endian machines. + Choose code that works in both cases. */ + + char_ptr = (unsigned char *) longword_ptr; + while (*char_ptr != c) + char_ptr++; + return (void *) char_ptr; +} +#endif diff --git a/apt-pkg/strchrnul.cc b/apt-pkg/strchrnul.cc new file mode 100644 index 000000000..b68b8501b --- /dev/null +++ b/apt-pkg/strchrnul.cc @@ -0,0 +1,147 @@ +/* Searching in a string. + Copyright (C) 2003, 2007-2015 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +#ifndef HAVE_STRCHRNUL + +/* Specification. */ +#include <string.h> + +#include <apt-pkg/missing.h> + +/* Find the first occurrence of C in S or the final NUL byte. */ +extern "C" char * +strchrnul (const char *s, int c_in) +{ + /* On 32-bit hardware, choosing longword to be a 32-bit unsigned + long instead of a 64-bit uintmax_t tends to give better + performance. On 64-bit hardware, unsigned long is generally 64 + bits already. Change this typedef to experiment with + performance. */ + typedef unsigned long int longword; + + const unsigned char *char_ptr; + const longword *longword_ptr; + longword repeated_one; + longword repeated_c; + unsigned char c; + + c = (unsigned char) c_in; + if (!c) + return (char*) rawmemchr (s, 0); + + /* Handle the first few bytes by reading one byte at a time. + Do this until CHAR_PTR is aligned on a longword boundary. */ + for (char_ptr = (const unsigned char *) s; + (size_t) char_ptr % sizeof (longword) != 0; + ++char_ptr) + if (!*char_ptr || *char_ptr == c) + return (char *) char_ptr; + + longword_ptr = (const longword *) char_ptr; + + /* All these elucidatory comments refer to 4-byte longwords, + but the theory applies equally well to any size longwords. */ + + /* Compute auxiliary longword values: + repeated_one is a value which has a 1 in every byte. + repeated_c has c in every byte. */ + repeated_one = 0x01010101; + repeated_c = c | (c << 8); + repeated_c |= repeated_c << 16; + if (0xffffffffU < (longword) -1) + { + repeated_one |= repeated_one << 31 << 1; + repeated_c |= repeated_c << 31 << 1; + if (8 < sizeof (longword)) + { + size_t i; + + for (i = 64; i < sizeof (longword) * 8; i *= 2) + { + repeated_one |= repeated_one << i; + repeated_c |= repeated_c << i; + } + } + } + + /* Instead of the traditional loop which tests each byte, we will + test a longword at a time. The tricky part is testing if *any of + the four* bytes in the longword in question are equal to NUL or + c. We first use an xor with repeated_c. This reduces the task + to testing whether *any of the four* bytes in longword1 or + longword2 is zero. + + Let's consider longword1. We compute tmp = + ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7). + That is, we perform the following operations: + 1. Subtract repeated_one. + 2. & ~longword1. + 3. & a mask consisting of 0x80 in every byte. + Consider what happens in each byte: + - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff, + and step 3 transforms it into 0x80. A carry can also be propagated + to more significant bytes. + - If a byte of longword1 is nonzero, let its lowest 1 bit be at + position k (0 <= k <= 7); so the lowest k bits are 0. After step 1, + the byte ends in a single bit of value 0 and k bits of value 1. + After step 2, the result is just k bits of value 1: 2^k - 1. After + step 3, the result is 0. And no carry is produced. + So, if longword1 has only non-zero bytes, tmp is zero. + Whereas if longword1 has a zero byte, call j the position of the least + significant zero byte. Then the result has a zero at positions 0, ..., + j-1 and a 0x80 at position j. We cannot predict the result at the more + significant bytes (positions j+1..3), but it does not matter since we + already have a non-zero bit at position 8*j+7. + + The test whether any byte in longword1 or longword2 is zero is equivalent + to testing whether tmp1 is nonzero or tmp2 is nonzero. We can combine + this into a single test, whether (tmp1 | tmp2) is nonzero. + + This test can read more than one byte beyond the end of a string, + depending on where the terminating NUL is encountered. However, + this is considered safe since the initialization phase ensured + that the read will be aligned, therefore, the read will not cross + page boundaries and will not cause a fault. */ + + while (1) + { + longword longword1 = *longword_ptr ^ repeated_c; + longword longword2 = *longword_ptr; + + if (((((longword1 - repeated_one) & ~longword1) + | ((longword2 - repeated_one) & ~longword2)) + & (repeated_one << 7)) != 0) + break; + longword_ptr++; + } + + char_ptr = (const unsigned char *) longword_ptr; + + /* At this point, we know that one of the sizeof (longword) bytes + starting at char_ptr is == 0 or == c. On little-endian machines, + we could determine the first such byte without any further memory + accesses, just by looking at the tmp result from the last loop + iteration. But this does not work on big-endian machines. + Choose code that works in both cases. */ + + char_ptr = (unsigned char *) longword_ptr; + while (*char_ptr && (*char_ptr != c)) + char_ptr++; + return (char *) char_ptr; +} +#endif |