### 为什么选择二叉树做搜索？

Realm 以二叉树结构存储数据。比如，假设 Realm 通过以下链表中的 IDs 存储对象：

“搜索过程从数组的中间元素开始，如果中间元素正好是要查找的元素，则搜索过程结束；如果某一特定元素大于或者小于中间元素，则在数组大于或小于中间元素的那一半中查找，而且跟开始一样从中间元素开始比较。如果在某一步骤数组为空，则代表找不到。” -Wikipedia

``````// For a sorted list, vec, return the index of the first element that compares
// greater to the value, or return vec.size() if no such element is found.
size_t match = std::upper_bound(vec.begin(), vec.end(), value) - vec.begin();
``````

### 基准

``````template <class T> INLINE size_t fast_upper_bound0(const vector<T>& vec, T value)
{
return std::upper_bound(vec.begin(), vec.end(), value) - vec.begin();
}
``````

``````while (__len > 0)
{
_DistanceType __half = __len >> 1;
_ForwardIterator __middle = __first;
if (__val < *__middle)
__len = __half;
else
{
__first = __middle;
++__first;
__len = __len - __half - 1;
}
}
return __first;
``````

… and benchmark it: … 测试结果如下：

### Version 1

``````__len = __half;
``````

``````__first = __middle;
++__first;
__len = __len - __half - 1;
``````

``````template <class T> INLINE size_t fast_upper_bound1(const vector<T>& vec, T value)
{
size_t size = vec.size();
size_t i = 0;
size_t size_2 = size;

while (size_2 > 0) {
size_t half = size_2 / 2;
size_t mid = i + half;
T probe = vec[mid];
if (value >= probe) {
i = mid + 1;
size_2 -= half + 1;
}
else {
size_2 = half;
}
}
return i;
}
``````

### Version 2

``````template <class T> INLINE size_t fast_upper_bound2(const vector<T>& vec, T value)
{
size_t m_len = vec.size();
size_t low = -1;
size_t high = m_len;
assert(m_len < std::numeric_limits<size_t>::max() / 2);
while (high - low > 1) {
size_t probe = (low + high) / 2;
T v = vec[probe];
if (v > value)
high = probe;
else
low = probe;
}

if (high == m_len)
return m_len;
else
return high;
}
``````

``````[true, false, true, false, true, false]
``````

### Version 3

``````template <class T> INLINE size_t fast_upper_bound3(const vector<T>& vec, T value)
{
size_t size = vec.size();
size_t low = 0;

while (size > 0) {
size_t half = size / 2;
size_t other_half = size - half;
size_t probe = low + half;
size_t other_low = low + other_half;
T v = vec[probe];
size = half;
low = v >= value ? other_low : low;
}

return low;
}
``````

``````template <class T> INLINE size_t choose(T a, T b, size_t src1, size_t src2)
{
#if defined(__clang__) && defined(__x86_64)
size_t res = src1;
asm("cmpq %1, %2; cmovaeq %4, %0"
:
"=q" (res)
:
"q" (a),
"q" (b),
"q" (src1),
"q" (src2),
"0" (res)
:
"cc");
return res;
#else
return b >= a ? src2 : src1;
#endif
}
``````

### Version 3 的循环展开

``````template <class T> INLINE size_t fast_upper_bound4(const vector<T>& vec, T value)
{
size_t size = vec.size();
size_t low = 0;

while (size >= 8) {
size_t half = size / 2;
size_t other_half = size - half;
size_t probe = low + half;
size_t other_low = low + other_half;
T v = vec[probe];
size = half;
low = choose(v, value, low, other_low);

half = size / 2;
other_half = size - half;
probe = low + half;
other_low = low + other_half;
v = vec[probe];
size = half;
low = choose(v, value, low, other_low);

half = size / 2;
other_half = size - half;
probe = low + half;
other_low = low + other_half;
v = vec[probe];
size = half;
low = choose(v, value, low, other_low);
}

while (size > 0) {
size_t half = size / 2;
size_t other_half = size - half;
size_t probe = low + half;
size_t other_low = low + other_half;
T v = vec[probe];
size = half;
low = choose(v, value, low, other_low);
};

return low;
}
``````

### 想法总结

``````(76% + 81% + 80% + 75% + 70%) / 5 = 76%
``````