optimize .count() method of bounded_bitset to leverage popcount special instructions. Confirmed to work for gcc in -msse4 flag is passed.

master
Francisco 4 years ago committed by Francisco Paisana
parent 7dcb703d06
commit 3b491ab06b

@ -164,10 +164,12 @@ public:
size_t result = 0; size_t result = 0;
for (size_t i = 0; i < nof_words_(); i++) { for (size_t i = 0; i < nof_words_(); i++) {
// result += __builtin_popcountl(buffer[i]); // result += __builtin_popcountl(buffer[i]);
word_t w = buffer[i]; // Note: use an "int" for count triggers popcount optimization if SSE instructions are enabled.
for (; w; w >>= 1u) { int c = 0;
result += (w & 1u); for (word_t w = buffer[i]; w > 0; c++) {
w &= w - 1;
} }
result += c;
} }
return result; return result;
} }

Loading…
Cancel
Save