Веб-сайт самохостера Lotigara

summaryrefslogtreecommitdiff
path: root/source/core/StarBTreeDatabase.cpp
diff options
context:
space:
mode:
authorKae <80987908+Novaenia@users.noreply.github.com>2023-06-20 14:33:09 +1000
committerKae <80987908+Novaenia@users.noreply.github.com>2023-06-20 14:33:09 +1000
commit6352e8e3196f78388b6c771073f9e03eaa612673 (patch)
treee23772f79a7fbc41bc9108951e9e136857484bf4 /source/core/StarBTreeDatabase.cpp
parent6741a057e5639280d85d0f88ba26f000baa58f61 (diff)
everything everywhere
all at once
Diffstat (limited to 'source/core/StarBTreeDatabase.cpp')
-rw-r--r--source/core/StarBTreeDatabase.cpp1188
1 files changed, 1188 insertions, 0 deletions
diff --git a/source/core/StarBTreeDatabase.cpp b/source/core/StarBTreeDatabase.cpp
new file mode 100644
index 0000000..175de9e
--- /dev/null
+++ b/source/core/StarBTreeDatabase.cpp
@@ -0,0 +1,1188 @@
+#include "StarBTreeDatabase.hpp"
+#include "StarSha256.hpp"
+#include "StarVlqEncoding.hpp"
+
+namespace Star {
+
+BTreeDatabase::BTreeDatabase() {
+ m_impl.parent = this;
+ m_open = false;
+ m_deviceSize = 0;
+ m_blockSize = 2048;
+ m_headFreeIndexBlock = InvalidBlockIndex;
+ m_keySize = 0;
+ m_autoCommit = true;
+ m_indexCache.setMaxSize(64);
+ m_root = InvalidBlockIndex;
+ m_rootIsLeaf = false;
+ m_usingAltRoot = false;
+}
+
+BTreeDatabase::BTreeDatabase(String const& contentIdentifier, size_t keySize)
+ : BTreeDatabase() {
+ setContentIdentifier(contentIdentifier);
+ setKeySize(keySize);
+}
+
+BTreeDatabase::~BTreeDatabase() {
+ close();
+}
+
+uint32_t BTreeDatabase::blockSize() const {
+ ReadLocker readLocker(m_lock);
+ return m_blockSize;
+}
+
+void BTreeDatabase::setBlockSize(uint32_t blockSize) {
+ WriteLocker writeLocker(m_lock);
+ checkIfOpen("setBlockSize", false);
+ m_blockSize = blockSize;
+}
+
+uint32_t BTreeDatabase::keySize() const {
+ ReadLocker readLocker(m_lock);
+ return m_keySize;
+}
+
+void BTreeDatabase::setKeySize(uint32_t keySize) {
+ WriteLocker writeLocker(m_lock);
+ checkIfOpen("setKeySize", false);
+ m_keySize = keySize;
+}
+
+String BTreeDatabase::contentIdentifier() const {
+ ReadLocker readLocker(m_lock);
+ return m_contentIdentifier;
+}
+
+void BTreeDatabase::setContentIdentifier(String contentIdentifier) {
+ WriteLocker writeLocker(m_lock);
+ checkIfOpen("setContentIdentifier", false);
+ m_contentIdentifier = move(contentIdentifier);
+}
+
+uint32_t BTreeDatabase::indexCacheSize() const {
+ SpinLocker lock(m_indexCacheSpinLock);
+ return m_indexCache.maxSize();
+}
+
+void BTreeDatabase::setIndexCacheSize(uint32_t indexCacheSize) {
+ SpinLocker lock(m_indexCacheSpinLock);
+ m_indexCache.setMaxSize(indexCacheSize);
+}
+
+bool BTreeDatabase::autoCommit() const {
+ ReadLocker readLocker(m_lock);
+ return m_autoCommit;
+}
+
+void BTreeDatabase::setAutoCommit(bool autoCommit) {
+ WriteLocker writeLocker(m_lock);
+ m_autoCommit = autoCommit;
+ if (m_autoCommit)
+ doCommit();
+}
+
+IODevicePtr BTreeDatabase::ioDevice() const {
+ ReadLocker readLocker(m_lock);
+ return m_device;
+}
+
+void BTreeDatabase::setIODevice(IODevicePtr device) {
+ WriteLocker writeLocker(m_lock);
+ checkIfOpen("setIODevice", false);
+ m_device = move(device);
+}
+
+bool BTreeDatabase::isOpen() const {
+ ReadLocker readLocker(m_lock);
+ return m_open;
+}
+
+bool BTreeDatabase::open() {
+ WriteLocker writeLocker(m_lock);
+ if (m_open)
+ return false;
+
+ if (!m_device)
+ throw DBException("BlockStorage::open called with no IODevice set");
+
+ if (!m_device->isOpen())
+ m_device->open(IOMode::ReadWrite);
+
+ m_open = true;
+
+ if (m_device->size() > 0) {
+ DataStreamIODevice ds(m_device);
+ ds.seek(0);
+
+ auto magic = ds.readBytes(VersionMagicSize);
+ if (magic != ByteArray::fromCString(VersionMagic))
+ throw DBException("Device is not a valid BTreeDatabase file");
+
+ m_blockSize = ds.read<uint32_t>();
+
+ auto contentIdentifier = ds.readBytes(ContentIdentifierStringSize);
+ contentIdentifier.appendByte('\0');
+ m_contentIdentifier = String(contentIdentifier.ptr());
+ m_keySize = ds.read<uint32_t>();
+
+ readRoot();
+
+ if (m_device->isWritable())
+ m_device->resize(m_deviceSize);
+
+ return false;
+
+ } else {
+ m_deviceSize = HeaderSize;
+ m_device->resize(m_deviceSize);
+ m_headFreeIndexBlock = InvalidBlockIndex;
+
+ DataStreamIODevice ds(m_device);
+ ds.seek(0);
+
+ ds.writeData(VersionMagic, VersionMagicSize);
+ ds.write<uint32_t>(m_blockSize);
+
+ if (m_contentIdentifier.empty())
+ throw DBException("Opening new database and no content identifier set!");
+
+ if (m_contentIdentifier.utf8Size() > ContentIdentifierStringSize)
+ throw DBException("contentIdentifier in BTreeDatabase implementation is greater than maximum identifier length");
+ if (m_keySize == 0)
+ throw DBException("key size is not set opening a new BTreeDatabase");
+
+ ByteArray contentIdentifier = m_contentIdentifier.utf8Bytes();
+ contentIdentifier.resize(ContentIdentifierStringSize, 0);
+ ds.writeBytes(contentIdentifier);
+ ds.write(m_keySize);
+
+ m_impl.createNewRoot();
+ doCommit();
+
+ return true;
+ }
+}
+
+bool BTreeDatabase::contains(ByteArray const& k) {
+ ReadLocker readLocker(m_lock);
+ checkKeySize(k);
+ return m_impl.contains(k);
+}
+
+Maybe<ByteArray> BTreeDatabase::find(ByteArray const& k) {
+ ReadLocker readLocker(m_lock);
+ checkKeySize(k);
+ return m_impl.find(k);
+}
+
+List<pair<ByteArray, ByteArray>> BTreeDatabase::find(ByteArray const& lower, ByteArray const& upper) {
+ ReadLocker readLocker(m_lock);
+ checkKeySize(lower);
+ checkKeySize(upper);
+ return m_impl.find(lower, upper);
+}
+
+void BTreeDatabase::forEach(ByteArray const& lower, ByteArray const& upper, function<void(ByteArray, ByteArray)> v) {
+ ReadLocker readLocker(m_lock);
+ checkKeySize(lower);
+ checkKeySize(upper);
+ m_impl.forEach(lower, upper, move(v));
+}
+
+void BTreeDatabase::forAll(function<void(ByteArray, ByteArray)> v) {
+ ReadLocker readLocker(m_lock);
+ m_impl.forAll(move(v));
+}
+
+bool BTreeDatabase::insert(ByteArray const& k, ByteArray const& data) {
+ WriteLocker writeLocker(m_lock);
+ checkKeySize(k);
+ return m_impl.insert(move(k), move(data));
+}
+
+bool BTreeDatabase::remove(ByteArray const& k) {
+ WriteLocker writeLocker(m_lock);
+ checkKeySize(k);
+ return m_impl.remove(k);
+}
+
+uint64_t BTreeDatabase::recordCount() {
+ ReadLocker readLocker(m_lock);
+ return m_impl.recordCount();
+}
+
+uint8_t BTreeDatabase::indexLevels() {
+ ReadLocker readLocker(m_lock);
+ return m_impl.indexLevels();
+}
+
+uint32_t BTreeDatabase::totalBlockCount() {
+ ReadLocker readLocker(m_lock);
+ checkIfOpen("totalBlockCount", true);
+ return (m_device->size() - HeaderSize) / m_blockSize;
+}
+
+uint32_t BTreeDatabase::freeBlockCount() {
+ ReadLocker readLocker(m_lock);
+ checkIfOpen("freeBlockCount", true);
+
+ // Go through every FreeIndexBlock in the chain and count all of the tracked
+ // free blocks.
+ BlockIndex count = 0;
+ BlockIndex indexBlockIndex = m_headFreeIndexBlock;
+ while (indexBlockIndex != InvalidBlockIndex) {
+ FreeIndexBlock indexBlock = readFreeIndexBlock(indexBlockIndex);
+ count += 1 + indexBlock.freeBlocks.size();
+ indexBlockIndex = indexBlock.nextFreeBlock;
+ }
+
+ count += m_availableBlocks.size() + m_pendingFree.size();
+
+ // Include untracked blocks at the end of the file in the free count.
+ count += (m_device->size() - m_deviceSize) / m_blockSize;
+
+ return count;
+}
+
+uint32_t BTreeDatabase::indexBlockCount() {
+ ReadLocker readLocker(m_lock);
+ checkIfOpen("indexBlockCount", true);
+ // Indexes are simply one index per block
+ return m_impl.indexCount();
+}
+
+uint32_t BTreeDatabase::leafBlockCount() {
+ WriteLocker writeLocker(m_lock);
+ checkIfOpen("leafBlockCount", true);
+
+ struct LeafBlocksVisitor {
+ bool operator()(shared_ptr<IndexNode> const&) {
+ return true;
+ }
+
+ bool operator()(shared_ptr<LeafNode> const& leaf) {
+ leafBlockCount += 1 + parent->leafTailBlocks(leaf->self).size();
+ return true;
+ }
+
+ BTreeDatabase* parent;
+ BlockIndex leafBlockCount = 0;
+ };
+
+ LeafBlocksVisitor visitor;
+ visitor.parent = this;
+ m_impl.forAllNodes(visitor);
+
+ return visitor.leafBlockCount;
+}
+
+void BTreeDatabase::commit() {
+ WriteLocker writeLocker(m_lock);
+ doCommit();
+}
+
+void BTreeDatabase::rollback() {
+ WriteLocker writeLocker(m_lock);
+
+ m_availableBlocks.clear();
+ m_indexCache.clear();
+ m_uncommitted.clear();
+ m_pendingFree.clear();
+
+ readRoot();
+
+ if (m_device->isWritable())
+ m_device->resize(m_deviceSize);
+}
+
+void BTreeDatabase::close(bool closeDevice) {
+ WriteLocker writeLocker(m_lock);
+ if (m_open) {
+ doCommit();
+
+ m_indexCache.clear();
+
+ m_open = false;
+ if (closeDevice && m_device && m_device->isOpen())
+ m_device->close();
+ }
+}
+
+BTreeDatabase::BlockIndex const BTreeDatabase::InvalidBlockIndex;
+uint32_t const BTreeDatabase::HeaderSize;
+char const* const BTreeDatabase::VersionMagic = "BTreeDB5";
+uint32_t const BTreeDatabase::VersionMagicSize;
+char const* const BTreeDatabase::IndexMagic = "II";
+char const* const BTreeDatabase::LeafMagic = "LL";
+char const* const BTreeDatabase::FreeIndexMagic = "FF";
+size_t const BTreeDatabase::BTreeRootSelectorBit;
+size_t const BTreeDatabase::BTreeRootInfoStart;
+size_t const BTreeDatabase::BTreeRootInfoSize;
+
+size_t BTreeDatabase::IndexNode::pointerCount() const {
+ // If no begin pointer is set then the index is simply uninitialized.
+ if (!beginPointer)
+ return 0;
+ else
+ return pointers.size() + 1;
+}
+
+auto BTreeDatabase::IndexNode::pointer(size_t i) const -> BlockIndex {
+ if (i == 0)
+ return *beginPointer;
+ else
+ return pointers.at(i - 1).pointer;
+}
+
+void BTreeDatabase::IndexNode::updatePointer(size_t i, BlockIndex p) {
+ if (i == 0)
+ *beginPointer = p;
+ else
+ pointers.at(i - 1).pointer = p;
+}
+
+ByteArray const& BTreeDatabase::IndexNode::keyBefore(size_t i) const {
+ return pointers.at(i - 1).key;
+}
+
+void BTreeDatabase::IndexNode::updateKeyBefore(size_t i, ByteArray k) {
+ pointers.at(i - 1).key = k;
+}
+
+void BTreeDatabase::IndexNode::removeBefore(size_t i) {
+ if (i == 0) {
+ beginPointer = pointers.at(0).pointer;
+ pointers.eraseAt(0);
+ } else {
+ pointers.eraseAt(i - 1);
+ }
+}
+
+void BTreeDatabase::IndexNode::insertAfter(size_t i, ByteArray k, BlockIndex p) {
+ pointers.insertAt(i, Element{k, p});
+}
+
+uint8_t BTreeDatabase::IndexNode::indexLevel() const {
+ return level;
+}
+
+void BTreeDatabase::IndexNode::setIndexLevel(uint8_t indexLevel) {
+ level = indexLevel;
+}
+
+void BTreeDatabase::IndexNode::shiftLeft(ByteArray const& mid, IndexNode& right, size_t count) {
+ count = std::min(right.pointerCount(), count);
+
+ if (count == 0)
+ return;
+
+ pointers.append(Element{mid, *right.beginPointer});
+
+ ElementList::iterator s = right.pointers.begin();
+ std::advance(s, count - 1);
+ pointers.insert(pointers.end(), right.pointers.begin(), s);
+
+ right.pointers.erase(right.pointers.begin(), s);
+ if (right.pointers.size() != 0) {
+ right.beginPointer = right.pointers.at(0).pointer;
+ right.pointers.eraseAt(0);
+ } else {
+ right.beginPointer.reset();
+ }
+}
+
+void BTreeDatabase::IndexNode::shiftRight(ByteArray const& mid, IndexNode& left, size_t count) {
+ count = std::min(left.pointerCount(), count);
+
+ if (count == 0)
+ return;
+ --count;
+
+ pointers.insert(pointers.begin(), Element{mid, *beginPointer});
+
+ ElementList::iterator s = left.pointers.begin();
+ std::advance(s, left.pointers.size() - count);
+ pointers.insert(pointers.begin(), s, left.pointers.end());
+
+ left.pointers.erase(s, left.pointers.end());
+ if (left.pointers.size() != 0) {
+ beginPointer = left.pointers.at(left.pointers.size() - 1).pointer;
+ left.pointers.eraseAt(left.pointers.size() - 1);
+ } else {
+ beginPointer = left.beginPointer.take();
+ }
+}
+
+ByteArray BTreeDatabase::IndexNode::split(IndexNode& right, size_t i) {
+ ElementList::iterator s = pointers.begin();
+ std::advance(s, i - 1);
+
+ right.beginPointer = s->pointer;
+ ByteArray midKey = s->key;
+ right.level = level;
+ ++s;
+
+ right.pointers.insert(right.pointers.begin(), s, pointers.end());
+ --s;
+
+ pointers.erase(s, pointers.end());
+
+ return midKey;
+}
+
+size_t BTreeDatabase::LeafNode::count() const {
+ return elements.size();
+}
+
+ByteArray const& BTreeDatabase::LeafNode::key(size_t i) const {
+ return elements.at(i).key;
+}
+
+ByteArray const& BTreeDatabase::LeafNode::data(size_t i) const {
+ return elements.at(i).data;
+}
+
+void BTreeDatabase::LeafNode::insert(size_t i, ByteArray k, ByteArray d) {
+ elements.insertAt(i, Element{move(k), move(d)});
+}
+
+void BTreeDatabase::LeafNode::remove(size_t i) {
+ elements.eraseAt(i);
+}
+
+void BTreeDatabase::LeafNode::shiftLeft(LeafNode& right, size_t count) {
+ count = std::min(right.count(), count);
+
+ if (count == 0)
+ return;
+
+ ElementList::iterator s = right.elements.begin();
+ std::advance(s, count);
+
+ elements.insert(elements.end(), right.elements.begin(), s);
+ right.elements.erase(right.elements.begin(), s);
+}
+
+void BTreeDatabase::LeafNode::shiftRight(LeafNode& left, size_t count) {
+ count = std::min(left.count(), count);
+
+ if (count == 0)
+ return;
+
+ ElementList::iterator s = left.elements.begin();
+ std::advance(s, left.elements.size() - count);
+
+ elements.insert(elements.begin(), s, left.elements.end());
+ left.elements.erase(s, left.elements.end());
+}
+
+void BTreeDatabase::LeafNode::split(LeafNode& right, size_t i) {
+ ElementList::iterator s = elements.begin();
+ std::advance(s, i);
+
+ right.elements.insert(right.elements.begin(), s, elements.end());
+ elements.erase(s, elements.end());
+}
+
+auto BTreeDatabase::BTreeImpl::rootPointer() -> Pointer {
+ return parent->m_root;
+}
+
+bool BTreeDatabase::BTreeImpl::rootIsLeaf() {
+ return parent->m_rootIsLeaf;
+}
+
+void BTreeDatabase::BTreeImpl::setNewRoot(Pointer pointer, bool isLeaf) {
+ parent->m_root = pointer;
+ parent->m_rootIsLeaf = isLeaf;
+
+ if (parent->m_autoCommit)
+ parent->doCommit();
+}
+
+auto BTreeDatabase::BTreeImpl::createIndex(Pointer beginPointer) -> Index {
+ auto index = make_shared<IndexNode>();
+ index->self = InvalidBlockIndex;
+ index->level = 0;
+ index->beginPointer = beginPointer;
+ return index;
+}
+
+auto BTreeDatabase::BTreeImpl::loadIndex(Pointer pointer) -> Index {
+ SpinLocker lock(parent->m_indexCacheSpinLock);
+ if (auto index = parent->m_indexCache.ptr(pointer))
+ return *index;
+ lock.unlock();
+
+ auto index = make_shared<IndexNode>();
+
+ DataStreamBuffer buffer(parent->readBlock(pointer));
+
+ if (buffer.readBytes(2) != ByteArray(IndexMagic, 2))
+ throw DBException("Error, incorrect index block signature.");
+
+ index->self = pointer;
+
+ index->level = buffer.read<uint8_t>();
+ uint32_t s = buffer.read<uint32_t>();
+ index->beginPointer = buffer.read<BlockIndex>();
+ index->pointers.resize(s);
+ for (uint32_t i = 0; i < s; ++i) {
+ auto& e = index->pointers[i];
+ e.key =buffer.readBytes(parent->m_keySize);
+ e.pointer = buffer.read<BlockIndex>();
+ }
+
+ lock.lock();
+ parent->m_indexCache.set(pointer, index);
+ return index;
+}
+
+bool BTreeDatabase::BTreeImpl::indexNeedsShift(Index const& index) {
+ return index->pointerCount() < (parent->maxIndexPointers() + 1) / 2;
+}
+
+bool BTreeDatabase::BTreeImpl::indexShift(Index const& left, Key const& mid, Index const& right) {
+ if (left->pointerCount() + right->pointerCount() <= parent->maxIndexPointers()) {
+ left->shiftLeft(mid, *right, right->pointerCount());
+ return true;
+ } else {
+ if (indexNeedsShift(right)) {
+ right->shiftRight(mid, *left, 1);
+ return true;
+ } else if (indexNeedsShift(left)) {
+ left->shiftLeft(mid, *right, 1);
+ return true;
+ } else {
+ return false;
+ }
+ }
+}
+
+auto BTreeDatabase::BTreeImpl::indexSplit(Index const& index) -> Maybe<pair<Key, Index>> {
+ if (index->pointerCount() <= parent->maxIndexPointers())
+ return {};
+
+ auto right = make_shared<IndexNode>();
+ right->self = InvalidBlockIndex;
+ Key k = index->split(*right, (index->pointerCount() + 1) / 2);
+ return make_pair(k, right);
+}
+
+auto BTreeDatabase::BTreeImpl::storeIndex(Index index) -> Pointer {
+ if (index->self != InvalidBlockIndex) {
+ if (!parent->m_uncommitted.contains(index->self)) {
+ parent->freeBlock(index->self);
+ parent->m_indexCache.remove(index->self);
+ index->self = InvalidBlockIndex;
+ }
+ }
+
+ if (index->self == InvalidBlockIndex)
+ index->self = parent->reserveBlock();
+
+ DataStreamBuffer buffer(parent->m_blockSize);
+ buffer.writeData(IndexMagic, 2);
+
+ buffer.write<uint8_t>(index->level);
+ buffer.write<uint32_t>(index->pointers.size());
+ buffer.write<BlockIndex>(*index->beginPointer);
+ for (auto i = index->pointers.begin(); i != index->pointers.end(); ++i) {
+ starAssert(i->key.size() == parent->m_keySize);
+ buffer.writeBytes(i->key);
+ buffer.write<BlockIndex>(i->pointer);
+ }
+
+ parent->updateBlock(index->self, buffer.data());
+
+ parent->m_indexCache.set(index->self, index);
+ return index->self;
+}
+
+void BTreeDatabase::BTreeImpl::deleteIndex(Index index) {
+ parent->m_indexCache.remove(index->self);
+ parent->freeBlock(index->self);
+}
+
+auto BTreeDatabase::BTreeImpl::createLeaf() -> Leaf {
+ auto leaf = make_shared<LeafNode>();
+ leaf->self = InvalidBlockIndex;
+ return leaf;
+}
+
+auto BTreeDatabase::BTreeImpl::loadLeaf(Pointer pointer) -> Leaf {
+ auto leaf = make_shared<LeafNode>();
+ leaf->self = pointer;
+
+ BlockIndex currentLeafBlock = leaf->self;
+ DataStreamBuffer leafBuffer;
+ leafBuffer.reset(parent->m_blockSize);
+ parent->readBlock(currentLeafBlock, 0, leafBuffer.ptr(), parent->m_blockSize);
+
+ if (leafBuffer.readBytes(2) != ByteArray(LeafMagic, 2))
+ throw DBException("Error, incorrect leaf block signature.");
+
+ DataStreamFunctions leafInput([&](char* data, size_t len) -> size_t {
+ size_t pos = 0;
+ size_t left = len;
+
+ while (left > 0) {
+ if (leafBuffer.pos() + left < parent->m_blockSize - sizeof(BlockIndex)) {
+ leafBuffer.readData(data + pos, left);
+ pos += left;
+ left = 0;
+ } else {
+ size_t toRead = parent->m_blockSize - sizeof(BlockIndex) - leafBuffer.pos();
+ leafBuffer.readData(data + pos, toRead);
+ pos += toRead;
+ left -= toRead;
+ }
+
+ if (leafBuffer.pos() == (parent->m_blockSize - sizeof(BlockIndex)) && left > 0) {
+ currentLeafBlock = leafBuffer.read<BlockIndex>();
+ if (currentLeafBlock != InvalidBlockIndex) {
+ leafBuffer.reset(parent->m_blockSize);
+ parent->readBlock(currentLeafBlock, 0, leafBuffer.ptr(), parent->m_blockSize);
+
+ if (leafBuffer.readBytes(2) != ByteArray(LeafMagic, 2))
+ throw DBException("Error, incorrect leaf block signature.");
+
+ } else {
+ throw DBException("Leaf read off end of Leaf list.");
+ }
+ }
+ }
+
+ return len;
+ }, {});
+
+ uint32_t count = leafInput.read<uint32_t>();
+ leaf->elements.resize(count);
+ for (uint32_t i = 0; i < count; ++i) {
+ auto& element = leaf->elements[i];
+ element.key = leafInput.readBytes(parent->m_keySize);
+ element.data = leafInput.read<ByteArray>();
+ }
+
+ return leaf;
+}
+
+bool BTreeDatabase::BTreeImpl::leafNeedsShift(Leaf const& l) {
+ return parent->leafSize(l) < parent->m_blockSize / 2;
+}
+
+bool BTreeDatabase::BTreeImpl::leafShift(Leaf& left, Leaf& right) {
+ if (left->count() == 0) {
+ left->shiftLeft(*right, right->count());
+ return true;
+ }
+
+ if (right->count() == 0)
+ return true;
+
+ uint32_t leftSize = parent->leafSize(left);
+ uint32_t rightSize = parent->leafSize(right);
+ if (leftSize + rightSize < parent->m_blockSize) {
+ left->shiftLeft(*right, right->count());
+ return true;
+ }
+
+ // TODO: Shifting algorithm is bad, could potentially want to shift more
+ // than one element here.
+ uint32_t rightBeginSize = parent->m_keySize + parent->dataSize(right->elements[0].data);
+ uint32_t leftEndSize = parent->m_keySize + parent->dataSize(left->elements[left->elements.size() - 1].data);
+ if (leftSize < rightSize - rightBeginSize && leftSize + rightBeginSize < parent->m_blockSize) {
+ left->shiftLeft(*right, 1);
+ return true;
+ } else if (rightSize < leftSize - leftEndSize && rightSize + leftEndSize < parent->m_blockSize) {
+ right->shiftRight(*left, 1);
+ return true;
+ }
+
+ return false;
+}
+
+auto BTreeDatabase::BTreeImpl::leafSplit(Leaf& leaf) -> Maybe<Leaf> {
+ if (leaf->elements.size() < 2)
+ return {};
+
+ uint32_t size = 6;
+ bool boundaryFound = false;
+ uint32_t boundary = 0;
+ for (uint32_t i = 0; i < leaf->elements.size(); ++i) {
+ size += parent->m_keySize;
+ size += parent->dataSize(leaf->elements[i].data);
+ if (size > parent->m_blockSize - sizeof(BlockIndex) && !boundaryFound) {
+ boundary = i;
+ boundaryFound = true;
+ }
+ }
+ if (boundary == 0)
+ boundary = 1;
+
+ if (size < parent->m_blockSize * 2 - 2 * sizeof(BlockIndex) - 4) {
+ return {};
+ } else {
+ auto right = make_shared<LeafNode>();
+ right->self = InvalidBlockIndex;
+ leaf->split(*right, boundary);
+ return right;
+ }
+}
+
+auto BTreeDatabase::BTreeImpl::storeLeaf(Leaf leaf) -> Pointer {
+ if (leaf->self != InvalidBlockIndex) {
+ List<BlockIndex> tailBlocks = parent->leafTailBlocks(leaf->self);
+ for (uint32_t i = 0; i < tailBlocks.size(); ++i)
+ parent->freeBlock(tailBlocks[i]);
+
+ if (!parent->m_uncommitted.contains(leaf->self)) {
+ parent->freeBlock(leaf->self);
+ leaf->self = InvalidBlockIndex;
+ }
+ }
+
+ if (leaf->self == InvalidBlockIndex)
+ leaf->self = parent->reserveBlock();
+
+ BlockIndex currentLeafBlock = leaf->self;
+ DataStreamBuffer leafBuffer;
+ leafBuffer.reset(parent->m_blockSize);
+ leafBuffer.writeData(LeafMagic, 2);
+
+ DataStreamFunctions leafOutput({}, [&](char const* data, size_t len) -> size_t {
+ size_t pos = 0;
+ size_t left = len;
+
+ while (true) {
+ size_t toWrite = left;
+ if (toWrite > parent->m_blockSize - leafBuffer.pos() - sizeof(BlockIndex))
+ toWrite = parent->m_blockSize - leafBuffer.pos() - sizeof(BlockIndex);
+
+ if (toWrite != 0) {
+ leafBuffer.writeData(data + pos, toWrite);
+ left -= toWrite;
+ pos += toWrite;
+ }
+
+ if (left == 0)
+ break;
+
+ if (leafBuffer.pos() == (parent->m_blockSize - sizeof(BlockIndex))) {
+ BlockIndex nextBlock = parent->reserveBlock();
+ leafBuffer.write<BlockIndex>(nextBlock);
+ parent->updateBlock(currentLeafBlock, leafBuffer.data());
+ currentLeafBlock = nextBlock;
+ leafBuffer.reset(parent->m_blockSize);
+ leafBuffer.writeData(LeafMagic, 2);
+ }
+ }
+
+ return len;
+ });
+
+ leafOutput.write<uint32_t>(leaf->elements.size());
+
+ for (LeafNode::ElementList::iterator i = leaf->elements.begin(); i != leaf->elements.end(); ++i) {
+ starAssert(i->key.size() == parent->m_keySize);
+ leafOutput.writeBytes(i->key);
+ leafOutput.write(i->data);
+ }
+
+ leafBuffer.seek(parent->m_blockSize - sizeof(BlockIndex));
+ leafBuffer.write<BlockIndex>(InvalidBlockIndex);
+ parent->updateBlock(currentLeafBlock, leafBuffer.data());
+
+ return leaf->self;
+}
+
+void BTreeDatabase::BTreeImpl::deleteLeaf(Leaf leaf) {
+ List<BlockIndex> tailBlocks = parent->leafTailBlocks(leaf->self);
+ for (uint32_t i = 0; i < tailBlocks.size(); ++i)
+ parent->freeBlock(tailBlocks[i]);
+
+ parent->freeBlock(leaf->self);
+}
+
+size_t BTreeDatabase::BTreeImpl::indexPointerCount(Index const& index) {
+ return index->pointerCount();
+}
+
+auto BTreeDatabase::BTreeImpl::indexPointer(Index const& index, size_t i) -> Pointer {
+ return index->pointer(i);
+}
+
+void BTreeDatabase::BTreeImpl::indexUpdatePointer(Index& index, size_t i, Pointer p) {
+ index->updatePointer(i, p);
+}
+
+auto BTreeDatabase::BTreeImpl::indexKeyBefore(Index const& index, size_t i) -> Key {
+ return index->keyBefore(i);
+}
+
+void BTreeDatabase::BTreeImpl::indexUpdateKeyBefore(Index& index, size_t i, Key k) {
+ index->updateKeyBefore(i, k);
+}
+
+void BTreeDatabase::BTreeImpl::indexRemoveBefore(Index& index, size_t i) {
+ index->removeBefore(i);
+}
+
+void BTreeDatabase::BTreeImpl::indexInsertAfter(Index& index, size_t i, Key k, Pointer p) {
+ index->insertAfter(i, k, p);
+}
+
+size_t BTreeDatabase::BTreeImpl::indexLevel(Index const& index) {
+ return index->indexLevel();
+}
+
+void BTreeDatabase::BTreeImpl::setIndexLevel(Index& index, size_t indexLevel) {
+ index->setIndexLevel(indexLevel);
+}
+
+size_t BTreeDatabase::BTreeImpl::leafElementCount(Leaf const& leaf) {
+ return leaf->count();
+}
+
+auto BTreeDatabase::BTreeImpl::leafKey(Leaf const& leaf, size_t i) -> Key {
+ return leaf->key(i);
+}
+
+auto BTreeDatabase::BTreeImpl::leafData(Leaf const& leaf, size_t i) -> Data {
+ return leaf->data(i);
+}
+
+void BTreeDatabase::BTreeImpl::leafInsert(Leaf& leaf, size_t i, Key k, Data d) {
+ leaf->insert(i, move(k), move(d));
+}
+
+void BTreeDatabase::BTreeImpl::leafRemove(Leaf& leaf, size_t i) {
+ leaf->remove(i);
+}
+
+auto BTreeDatabase::BTreeImpl::nextLeaf(Leaf const&) -> Maybe<Pointer> {
+ return {};
+}
+
+void BTreeDatabase::BTreeImpl::setNextLeaf(Leaf&, Maybe<Pointer>) {}
+
+void BTreeDatabase::readBlock(BlockIndex blockIndex, size_t blockOffset, char* block, size_t size) const {
+ checkBlockIndex(blockIndex);
+ rawReadBlock(blockIndex, blockOffset, block, size);
+}
+
+ByteArray BTreeDatabase::readBlock(BlockIndex blockIndex) const {
+ ByteArray block(m_blockSize, 0);
+ readBlock(blockIndex, 0, block.ptr(), m_blockSize);
+ return block;
+}
+
+void BTreeDatabase::updateBlock(BlockIndex blockIndex, ByteArray const& block) {
+ checkBlockIndex(blockIndex);
+ rawWriteBlock(blockIndex, 0, block.ptr(), block.size());
+}
+
+void BTreeDatabase::rawReadBlock(BlockIndex blockIndex, size_t blockOffset, char* block, size_t size) const {
+ if (blockOffset > m_blockSize || size > m_blockSize - blockOffset)
+ throw DBException::format("Read past end of block, offset: %s size %s", blockOffset, size);
+
+ if (size <= 0)
+ return;
+
+ m_device->readFullAbsolute(HeaderSize + blockIndex * (StreamOffset)m_blockSize + blockOffset, block, size);
+}
+
+void BTreeDatabase::rawWriteBlock(BlockIndex blockIndex, size_t blockOffset, char const* block, size_t size) const {
+ if (blockOffset > m_blockSize || size > m_blockSize - blockOffset)
+ throw DBException::format("Write past end of block, offset: %s size %s", blockOffset, size);
+
+ if (size <= 0)
+ return;
+
+ m_device->writeFullAbsolute(HeaderSize + blockIndex * (StreamOffset)m_blockSize + blockOffset, block, size);
+}
+
+auto BTreeDatabase::readFreeIndexBlock(BlockIndex blockIndex) -> FreeIndexBlock {
+ checkBlockIndex(blockIndex);
+
+ ByteArray magic(2, 0);
+ rawReadBlock(blockIndex, 0, magic.ptr(), 2);
+ if (magic != ByteArray(FreeIndexMagic, 2))
+ throw DBException::format("Internal exception! block %s missing free index block marker!", blockIndex);
+
+ FreeIndexBlock freeIndexBlock;
+ DataStreamBuffer buffer(max(sizeof(BlockIndex), (size_t)4));
+
+ rawReadBlock(blockIndex, 2, buffer.ptr(), sizeof(BlockIndex));
+ buffer.seek(0);
+ freeIndexBlock.nextFreeBlock = buffer.read<BlockIndex>();
+
+ rawReadBlock(blockIndex, 2 + sizeof(BlockIndex), buffer.ptr(), 4);
+ buffer.seek(0);
+ size_t numFree = buffer.read<uint32_t>();
+
+ for (size_t i = 0; i < numFree; ++i) {
+ rawReadBlock(blockIndex, 6 + sizeof(BlockIndex) + sizeof(BlockIndex) * i, buffer.ptr(), sizeof(BlockIndex));
+ buffer.seek(0);
+ freeIndexBlock.freeBlocks.append(buffer.read<BlockIndex>());
+ }
+
+ return freeIndexBlock;
+}
+
+void BTreeDatabase::writeFreeIndexBlock(BlockIndex blockIndex, FreeIndexBlock indexBlock) {
+ checkBlockIndex(blockIndex);
+
+ rawWriteBlock(blockIndex, 0, FreeIndexMagic, 2);
+ DataStreamBuffer buffer(max(sizeof(BlockIndex), (size_t)4));
+
+ buffer.seek(0);
+ buffer.write<BlockIndex>(indexBlock.nextFreeBlock);
+ rawWriteBlock(blockIndex, 2, buffer.ptr(), sizeof(BlockIndex));
+
+ buffer.seek(0);
+ buffer.write<uint32_t>(indexBlock.freeBlocks.size());
+ rawWriteBlock(blockIndex, 2 + sizeof(BlockIndex), buffer.ptr(), 4);
+
+ for (size_t i = 0; i < indexBlock.freeBlocks.size(); ++i) {
+ buffer.seek(0);
+ buffer.write<BlockIndex>(indexBlock.freeBlocks[i]);
+ rawWriteBlock(blockIndex, 6 + sizeof(BlockIndex) + sizeof(BlockIndex) * i, buffer.ptr(), sizeof(BlockIndex));
+ }
+}
+
+uint32_t BTreeDatabase::leafSize(shared_ptr<LeafNode> const& leaf) const {
+ size_t s = 6;
+ for (LeafNode::ElementList::iterator i = leaf->elements.begin(); i != leaf->elements.end(); ++i) {
+ s += m_keySize;
+ s += dataSize(i->data);
+ }
+ return s;
+}
+
+uint32_t BTreeDatabase::maxIndexPointers() const {
+ // 2 for magic, 1 byte for level, sizeof(BlockIndex) for beginPointer, 4
+ // for size.
+ return (m_blockSize - 2 - 1 - sizeof(BlockIndex) - 4) / (m_keySize + sizeof(BlockIndex)) + 1;
+}
+
+uint32_t BTreeDatabase::dataSize(ByteArray const& d) const {
+ return vlqUSize(d.size()) + d.size();
+}
+
+auto BTreeDatabase::leafTailBlocks(BlockIndex leafPointer) -> List<BlockIndex> {
+ List<BlockIndex> tailBlocks;
+ DataStreamBuffer pointerBuffer(sizeof(BlockIndex));
+ while (leafPointer != InvalidBlockIndex) {
+ readBlock(leafPointer, m_blockSize - sizeof(BlockIndex), pointerBuffer.ptr(), sizeof(BlockIndex));
+ pointerBuffer.seek(0);
+ leafPointer = pointerBuffer.read<BlockIndex>();
+ if (leafPointer != InvalidBlockIndex)
+ tailBlocks.append(leafPointer);
+ }
+ return tailBlocks;
+}
+
+void BTreeDatabase::freeBlock(BlockIndex b) {
+ if (m_uncommitted.contains(b)) {
+ m_uncommitted.remove(b);
+ m_availableBlocks.add(b);
+ } else {
+ m_pendingFree.append(b);
+ }
+}
+
+auto BTreeDatabase::reserveBlock() -> BlockIndex {
+ if (m_availableBlocks.empty()) {
+ if (m_headFreeIndexBlock != InvalidBlockIndex) {
+ // If available, make available all the blocks in the first free index
+ // block.
+ FreeIndexBlock indexBlock = readFreeIndexBlock(m_headFreeIndexBlock);
+ for (auto const& b : indexBlock.freeBlocks)
+ m_availableBlocks.add(b);
+ // We cannot make available the block itself, because we must maintain
+ // atomic consistency. We will need to free this block later and commit
+ // the new free index block chain.
+ m_pendingFree.append(m_headFreeIndexBlock);
+ m_headFreeIndexBlock = indexBlock.nextFreeBlock;
+ }
+
+ if (m_availableBlocks.empty()) {
+ // If we still don't have any available blocks, just add a block to the
+ // end of the file.
+ m_availableBlocks.add(makeEndBlock());
+ }
+ }
+
+ BlockIndex block = m_availableBlocks.takeFirst();
+ m_uncommitted.add(block);
+ return block;
+}
+
+auto BTreeDatabase::makeEndBlock() -> BlockIndex {
+ BlockIndex blockCount = (m_deviceSize - HeaderSize) / m_blockSize;
+ m_deviceSize += m_blockSize;
+ m_device->resize(m_deviceSize);
+ return blockCount;
+}
+
+void BTreeDatabase::writeRoot() {
+ DataStreamIODevice ds(m_device);
+ // First write the root info to whichever section we are not currently using
+ ds.seek(BTreeRootInfoStart + (m_usingAltRoot ? 0 : BTreeRootInfoSize));
+ ds.write<BlockIndex>(m_headFreeIndexBlock);
+ ds.write<StreamOffset>(m_deviceSize);
+ ds.write<BlockIndex>(m_root);
+ ds.write<bool>(m_rootIsLeaf);
+
+ // Then flush all the pending changes.
+ m_device->sync();
+
+ // Then switch headers by writing the single bit that switches them
+ m_usingAltRoot = !m_usingAltRoot;
+ ds.seek(BTreeRootSelectorBit);
+ ds.write(m_usingAltRoot);
+
+ // Then flush this single bit write to make sure it happens before anything
+ // else.
+ m_device->sync();
+}
+
+void BTreeDatabase::readRoot() {
+ DataStreamIODevice ds(m_device);
+ ds.seek(BTreeRootSelectorBit);
+ ds.read(m_usingAltRoot);
+
+ ds.seek(BTreeRootInfoStart + (m_usingAltRoot ? BTreeRootInfoSize : 0));
+ m_headFreeIndexBlock = ds.read<BlockIndex>();
+ m_deviceSize = ds.read<StreamOffset>();
+ m_root = ds.read<BlockIndex>();
+ m_rootIsLeaf = ds.read<bool>();
+}
+
+void BTreeDatabase::doCommit() {
+ if (m_availableBlocks.empty() && m_pendingFree.empty() && m_uncommitted.empty())
+ return;
+
+ if (!m_availableBlocks.empty() || !m_pendingFree.empty()) {
+ // First, read the existing head FreeIndexBlock, if it exists
+ FreeIndexBlock indexBlock = FreeIndexBlock{InvalidBlockIndex, {}};
+ if (m_headFreeIndexBlock != InvalidBlockIndex) {
+ indexBlock = readFreeIndexBlock(m_headFreeIndexBlock);
+ if (indexBlock.freeBlocks.size() >= maxFreeIndexLength()) {
+ // If the existing head free index block is full, then we should start a
+ // new one and leave it alone
+ indexBlock.nextFreeBlock = m_headFreeIndexBlock;
+ indexBlock.freeBlocks.clear();
+ } else {
+ // If we are copying an existing free index block, the old free index
+ // block will be a newly freed block
+ indexBlock.freeBlocks.append(m_headFreeIndexBlock);
+ }
+ }
+
+ // Then, we need to write all the available blocks, which are safe to write
+ // to, and the pending free blocks, which are NOT safe to write to, to the
+ // FreeIndexBlock chain.
+ while (true) {
+ if (indexBlock.freeBlocks.size() < maxFreeIndexLength() && (!m_availableBlocks.empty() || !m_pendingFree.empty())) {
+ // If we have room on our current FreeIndexblock, just add a block to
+ // it. Prioritize the pending free blocks, because we cannot use those
+ // to write to.
+ BlockIndex toAdd;
+ if (m_pendingFree.empty())
+ toAdd = m_availableBlocks.takeFirst();
+ else
+ toAdd = m_pendingFree.takeFirst();
+
+ indexBlock.freeBlocks.append(toAdd);
+ } else {
+ // If our index block is full OR we are out of blocks to free, then
+ // need to write a new head free index block.
+ if (m_availableBlocks.empty())
+ m_headFreeIndexBlock = makeEndBlock();
+ else
+ m_headFreeIndexBlock = m_availableBlocks.takeFirst();
+ writeFreeIndexBlock(m_headFreeIndexBlock, indexBlock);
+
+ // If we're out of blocks to free, then we're done
+ if (m_availableBlocks.empty() && m_pendingFree.empty())
+ break;
+
+ indexBlock.nextFreeBlock = m_headFreeIndexBlock;
+ indexBlock.freeBlocks.clear();
+ }
+ }
+ }
+
+ writeRoot();
+
+ m_uncommitted.clear();
+}
+
+void BTreeDatabase::checkIfOpen(char const* methodName, bool shouldBeOpen) const {
+ if (shouldBeOpen && !m_open)
+ throw DBException::format("BTreeDatabase method '%s' called when not open, must be open.", methodName);
+ else if (!shouldBeOpen && m_open)
+ throw DBException::format("BTreeDatabase method '%s' called when open, cannot call when open.", methodName);
+}
+
+void BTreeDatabase::checkBlockIndex(size_t blockIndex) const {
+ BlockIndex blockCount = (m_deviceSize - HeaderSize) / m_blockSize;
+ if (blockIndex >= blockCount)
+ throw DBException::format("blockIndex: %s out of block range", blockIndex);
+}
+
+void BTreeDatabase::checkKeySize(ByteArray const& k) const {
+ if (k.size() != m_keySize)
+ throw DBException::format("Wrong key size %s", k.size());
+}
+
+uint32_t BTreeDatabase::maxFreeIndexLength() const {
+ return (m_blockSize - 2 - sizeof(BlockIndex) - 4) / sizeof(BlockIndex);
+}
+
+BTreeSha256Database::BTreeSha256Database() {
+ setKeySize(32);
+}
+
+BTreeSha256Database::BTreeSha256Database(String const& contentIdentifier) {
+ setKeySize(32);
+ setContentIdentifier(contentIdentifier);
+}
+
+bool BTreeSha256Database::contains(ByteArray const& key) {
+ return BTreeDatabase::contains(sha256(key));
+}
+
+Maybe<ByteArray> BTreeSha256Database::find(ByteArray const& key) {
+ return BTreeDatabase::find(sha256(key));
+}
+
+bool BTreeSha256Database::insert(ByteArray const& key, ByteArray const& value) {
+ return BTreeDatabase::insert(sha256(key), value);
+}
+
+bool BTreeSha256Database::remove(ByteArray const& key) {
+ return BTreeDatabase::remove(sha256(key));
+}
+
+bool BTreeSha256Database::contains(String const& key) {
+ return BTreeDatabase::contains(sha256(key));
+}
+
+Maybe<ByteArray> BTreeSha256Database::find(String const& key) {
+ return BTreeDatabase::find(sha256(key));
+}
+
+bool BTreeSha256Database::insert(String const& key, ByteArray const& value) {
+ return BTreeDatabase::insert(sha256(key), value);
+}
+
+bool BTreeSha256Database::remove(String const& key) {
+ return BTreeDatabase::remove(sha256(key));
+}
+
+}