1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 10:57:35 +00:00

AK+Everywhere: Replace __builtin bit functions

In order to reduce our reliance on __builtin_{ffs, clz, ctz, popcount},
this commit removes all calls to these functions and replaces them with
the equivalent functions in AK/BuiltinWrappers.h.
This commit is contained in:
Nick Johnson 2021-12-19 15:46:55 -06:00 committed by Andreas Kling
parent 26bb3e1acf
commit 08e4a1a4dc
20 changed files with 108 additions and 115 deletions

View file

@ -54,30 +54,30 @@ public:
byte &= bitmask_first_byte[start % 8];
if (first == last) {
byte &= bitmask_last_byte[(start + len) % 8];
count = __builtin_popcount(byte);
count = popcount(byte);
} else {
count = __builtin_popcount(byte);
count = popcount(byte);
// Don't access *last if it's out of bounds
if (last < &m_data[size_in_bytes()]) {
byte = *last;
byte &= bitmask_last_byte[(start + len) % 8];
count += __builtin_popcount(byte);
count += popcount(byte);
}
if (++first < last) {
const u32* ptr32 = (const u32*)(((FlatPtr)first + sizeof(u32) - 1) & ~(sizeof(u32) - 1));
if ((const u8*)ptr32 > last)
ptr32 = (const u32*)last;
while (first < (const u8*)ptr32) {
count += __builtin_popcount(*first);
const size_t* ptr_large = (const size_t*)(((FlatPtr)first + sizeof(size_t) - 1) & ~(sizeof(size_t) - 1));
if ((const u8*)ptr_large > last)
ptr_large = (const size_t*)last;
while (first < (const u8*)ptr_large) {
count += popcount(*first);
first++;
}
const u32* last32 = (const u32*)((FlatPtr)last & ~(sizeof(u32) - 1));
while (ptr32 < last32) {
count += __builtin_popcountl(*ptr32);
ptr32++;
const size_t* last_large = (const size_t*)((FlatPtr)last & ~(sizeof(size_t) - 1));
while (ptr_large < last_large) {
count += popcount(*ptr_large);
ptr_large++;
}
for (first = (const u8*)ptr32; first < last; first++)
count += __builtin_popcount(*first);
for (first = (const u8*)ptr_large; first < last; first++)
count += popcount(*first);
}
}
@ -100,34 +100,34 @@ public:
// We will use hint as what it is: a hint. Because we try to
// scan over entire 32 bit words, we may start searching before
// the hint!
const u32* ptr32 = (const u32*)((FlatPtr)&m_data[hint / 8] & ~(sizeof(u32) - 1));
if ((const u8*)ptr32 < &m_data[0]) {
ptr32++;
const size_t* ptr_large = (const size_t*)((FlatPtr)&m_data[hint / 8] & ~(sizeof(size_t) - 1));
if ((const u8*)ptr_large < &m_data[0]) {
ptr_large++;
// m_data isn't aligned, check first bytes
size_t start_ptr32 = (const u8*)ptr32 - &m_data[0];
size_t start_ptr_large = (const u8*)ptr_large - &m_data[0];
size_t i = 0;
u8 byte = VALUE ? 0x00 : 0xff;
while (i < start_ptr32 && m_data[i] == byte)
while (i < start_ptr_large && m_data[i] == byte)
i++;
if (i < start_ptr32) {
if (i < start_ptr_large) {
byte = m_data[i];
if constexpr (!VALUE)
byte = ~byte;
VERIFY(byte != 0);
return i * 8 + __builtin_ffs(byte) - 1;
return i * 8 + bit_scan_forward(byte) - 1;
}
}
u32 val32 = VALUE ? 0x0 : 0xffffffff;
const u32* end32 = (const u32*)((FlatPtr)end & ~(sizeof(u32) - 1));
while (ptr32 < end32 && *ptr32 == val32)
ptr32++;
size_t val_large = VALUE ? 0x0 : NumericLimits<size_t>::max();
const size_t* end_large = (const size_t*)((FlatPtr)end & ~(sizeof(size_t) - 1));
while (ptr_large < end_large && *ptr_large == val_large)
ptr_large++;
if (ptr32 == end32) {
if (ptr_large == end_large) {
// We didn't find anything, check the remaining few bytes (if any)
u8 byte = VALUE ? 0x00 : 0xff;
size_t i = (const u8*)ptr32 - &m_data[0];
size_t i = (const u8*)ptr_large - &m_data[0];
size_t byte_count = m_size / 8;
VERIFY(i <= byte_count);
while (i < byte_count && m_data[i] == byte)
@ -137,7 +137,7 @@ public:
return {}; // We already checked from the beginning
// Try scanning before the hint
end = (const u8*)((FlatPtr)&m_data[hint / 8] & ~(sizeof(u32) - 1));
end = (const u8*)((FlatPtr)&m_data[hint / 8] & ~(sizeof(size_t) - 1));
hint = 0;
continue;
}
@ -145,16 +145,16 @@ public:
if constexpr (!VALUE)
byte = ~byte;
VERIFY(byte != 0);
return i * 8 + __builtin_ffs(byte) - 1;
return i * 8 + bit_scan_forward(byte) - 1;
}
// NOTE: We don't really care about byte ordering. We found *one*
// free bit, just calculate the position and return it
val32 = *ptr32;
val_large = *ptr_large;
if constexpr (!VALUE)
val32 = ~val32;
VERIFY(val32 != 0);
return ((const u8*)ptr32 - &m_data[0]) * 8 + __builtin_ffsl(val32) - 1;
val_large = ~val_large;
VERIFY(val_large != 0);
return ((const u8*)ptr_large - &m_data[0]) * 8 + bit_scan_forward(val_large) - 1;
}
}
@ -184,7 +184,7 @@ public:
if constexpr (!VALUE)
byte = ~byte;
VERIFY(byte != 0);
return i * 8 + __builtin_ffs(byte) - 1;
return i * 8 + bit_scan_forward(byte) - 1;
}
Optional<size_t> find_first_set() const { return find_first<true>(); }

View file

@ -7,7 +7,6 @@
#pragma once
#include "Concepts.h"
#include "Platform.h"
template<Unsigned IntType>
inline constexpr int popcount(IntType value)
@ -108,3 +107,25 @@ inline constexpr int count_leading_zeroes_safe(IntType value)
return 8 * sizeof(IntType);
return count_leading_zeroes(value);
}
// The function will return the number of leading zeroes in the type. If
// the given number is zero, this function will return the number of bits
// in the IntType.
template<Integral IntType>
inline constexpr int bit_scan_forward(IntType value)
{
#if defined(__GNUC__) || defined(__clang__)
static_assert(sizeof(IntType) <= sizeof(unsigned long long));
if constexpr (sizeof(IntType) <= sizeof(unsigned int))
return __builtin_ffs(value);
if constexpr (sizeof(IntType) == sizeof(unsigned long))
return __builtin_ffsl(value);
if constexpr (sizeof(IntType) == sizeof(unsigned long long))
return __builtin_ffsll(value);
VERIFY_NOT_REACHED();
#else
if (value == 0)
return 0;
return 1 + count_trailing_zeroes(static_cast<MakeUnsigned<IntType>>(value));
#endif
}

View file

@ -6,6 +6,7 @@
#pragma once
#include <AK/BuiltinWrappers.h>
#include <AK/Concepts.h>
#include <AK/StdLibExtraDetails.h>
#include <AK/Types.h>
@ -45,21 +46,6 @@ constexpr size_t product_odd() { return value * product_odd<value - 2>(); }
return __builtin_##function##f(args); \
}
#define INTEGER_BUILTIN(name) \
template<Integral T> \
constexpr T name(T x) \
{ \
if constexpr (sizeof(T) == sizeof(long long)) \
return __builtin_##name##ll(x); \
if constexpr (sizeof(T) == sizeof(long)) \
return __builtin_##name##l(x); \
return __builtin_##name(x); \
}
INTEGER_BUILTIN(clz);
INTEGER_BUILTIN(ctz);
INTEGER_BUILTIN(popcnt);
namespace Division {
template<FloatingPoint T>
constexpr T fmod(T x, T y)
@ -312,7 +298,7 @@ constexpr T log2(T x)
template<Integral T>
constexpr T log2(T x)
{
return x ? 8 * sizeof(T) - clz(x) : 0;
return x ? 8 * sizeof(T) - count_leading_zeroes(static_cast<MakeUnsigned<T>>(x)) : 0;
}
template<FloatingPoint T>
@ -468,6 +454,5 @@ constexpr T pow(T x, T y)
}
#undef CONSTEXPR_STATE
#undef INTEGER_BUILTIN
}

View file

@ -106,29 +106,6 @@ extern "C" {
# endif
#endif
#ifdef __cplusplus
ALWAYS_INLINE int count_trailing_zeroes_32(unsigned int val)
{
# if defined(__GNUC__) || defined(__clang__)
return __builtin_ctz(val);
# else
for (u8 i = 0; i < 32; ++i) {
if ((val >> i) & 1) {
return i;
}
}
return 0;
# endif
}
ALWAYS_INLINE int count_trailing_zeroes_32_safe(unsigned int val)
{
if (val == 0)
return 32;
return count_trailing_zeroes_32(val);
}
#endif
#ifdef AK_OS_BSD_GENERIC
# define CLOCK_MONOTONIC_COARSE CLOCK_MONOTONIC
# define CLOCK_REALTIME_COARSE CLOCK_REALTIME

View file

@ -6,6 +6,7 @@
#pragma once
#include <AK/BuiltinWrappers.h>
#include <AK/Checked.h>
#include <AK/Concepts.h>
#include <AK/Format.h>
@ -90,9 +91,9 @@ public:
constexpr size_t clz() const requires(IsSame<T, u64>)
{
if (m_high)
return __builtin_clzll(m_high);
return count_leading_zeroes(m_high);
else
return sizeof(T) * 8 + __builtin_clzll(m_low);
return sizeof(T) * 8 + count_leading_zeroes(m_low);
}
constexpr size_t clz() const requires(!IsSame<T, u64>)
{
@ -104,9 +105,9 @@ public:
constexpr size_t ctz() const requires(IsSame<T, u64>)
{
if (m_low)
return __builtin_ctzll(m_low);
return count_trailing_zeroes(m_low);
else
return sizeof(T) * 8 + __builtin_ctzll(m_high);
return sizeof(T) * 8 + count_trailing_zeroes(m_high);
}
constexpr size_t ctz() const requires(!IsSame<T, u64>)
{
@ -598,7 +599,7 @@ public:
R x1 = *this;
R x2 = *this * *this;
u64 exp_copy = exp;
for (ssize_t i = sizeof(u64) * 8 - __builtin_clzll(exp) - 2; i >= 0; --i) {
for (ssize_t i = sizeof(u64) * 8 - count_leading_zeroes(exp) - 2; i >= 0; --i) {
if (exp_copy & 1u) {
x2 *= x1;
x1 *= x1;
@ -642,7 +643,7 @@ public:
U res = 1;
u64 exp_copy = exp;
for (size_t i = sizeof(u64) - __builtin_clzll(exp) - 1u; i < exp; ++i) {
for (size_t i = sizeof(u64) - count_leading_zeroes(exp) - 1u; i < exp; ++i) {
res *= res;
res %= mod;
if (exp_copy & 1u) {
@ -682,7 +683,7 @@ public:
constexpr size_t logn(u64 base)
{
// FIXME: proper rounding
return log2() / (sizeof(u64) - __builtin_clzll(base));
return log2() / (sizeof(u64) - count_leading_zeroes(base));
}
template<Unsigned U>
requires(sizeof(U) > sizeof(u64)) constexpr size_t logn(U base)