diff options
Diffstat (limited to 'source/core/StarBTreeDatabase.hpp')
-rw-r--r-- | source/core/StarBTreeDatabase.hpp | 344 |
1 files changed, 344 insertions, 0 deletions
diff --git a/source/core/StarBTreeDatabase.hpp b/source/core/StarBTreeDatabase.hpp new file mode 100644 index 0000000..aff45d3 --- /dev/null +++ b/source/core/StarBTreeDatabase.hpp @@ -0,0 +1,344 @@ +#ifndef STAR_BTREE_DATABASE_HPP +#define STAR_BTREE_DATABASE_HPP + +#include "StarSet.hpp" +#include "StarBTree.hpp" +#include "StarLruCache.hpp" +#include "StarDataStreamDevices.hpp" +#include "StarThread.hpp" + +namespace Star { + +STAR_EXCEPTION(DBException, IOException); + +class BTreeDatabase { +public: + uint32_t const ContentIdentifierStringSize = 16; + + BTreeDatabase(); + BTreeDatabase(String const& contentIdentifier, size_t keySize); + ~BTreeDatabase(); + + // The underlying device will be allocated in "blocks" of this size. + // The larger the block size, the larger that index and leaf nodes can be + // before they need to be split, but it also means that more space is wasted + // for index or leaf nodes that are not completely full. Cannot be changed + // once the database is opened. Defaults to 2048. + uint32_t blockSize() const; + void setBlockSize(uint32_t blockSize); + + // Constant size of the database keys. Should be much smaller than the block + // size, cannot be changed once a database is opened. Defaults zero, which + // is invalid, so must be set if opening a new database. + uint32_t keySize() const; + void setKeySize(uint32_t keySize); + + // Must be no greater than ContentIdentifierStringSize large. May not be + // called when the database is opened. + String contentIdentifier() const; + void setContentIdentifier(String contentIdentifier); + + // Cache size for index nodes, defaults to 64 + uint32_t indexCacheSize() const; + void setIndexCacheSize(uint32_t indexCacheSize); + + // If true, very write operation will immediately result in a commit. + // Defaults to true. + bool autoCommit() const; + void setAutoCommit(bool autoCommit); + + IODevicePtr ioDevice() const; + void setIODevice(IODevicePtr device); + + // If an existing database is opened, this will update the key size, block + // size, and content identifier with those from the opened database. + // Otherwise, it will use the currently set values. Returns true if a new + // database was created, false if an existing database was found and opened. + bool open(); + + bool isOpen() const; + + bool contains(ByteArray const& k); + + Maybe<ByteArray> find(ByteArray const& k); + List<pair<ByteArray, ByteArray>> find(ByteArray const& lower, ByteArray const& upper); + + void forEach(ByteArray const& lower, ByteArray const& upper, function<void(ByteArray, ByteArray)> v); + void forAll(function<void(ByteArray, ByteArray)> v); + + // Returns true if a value was overwritten + bool insert(ByteArray const& k, ByteArray const& data); + + // Returns true if the element was found and removed + bool remove(ByteArray const& k); + + // Remove all elements in the given range, returns keys removed. + List<ByteArray> remove(ByteArray const& lower, ByteArray const& upper); + + uint64_t recordCount(); + + // The depth of the index nodes in this database + uint8_t indexLevels(); + + uint32_t totalBlockCount(); + uint32_t freeBlockCount(); + uint32_t indexBlockCount(); + uint32_t leafBlockCount(); + + void commit(); + void rollback(); + + void close(bool closeDevice = false); + +private: + typedef uint32_t BlockIndex; + static BlockIndex const InvalidBlockIndex = (BlockIndex)(-1); + static uint32_t const HeaderSize = 512; + + // 8 byte magic file identifier + static char const* const VersionMagic; + static uint32_t const VersionMagicSize = 8; + // 2 byte leaf and index start markers. + static char const* const FreeIndexMagic; + static char const* const IndexMagic; + static char const* const LeafMagic; + // static uint32_t const BlockMagicSize = 2; + static size_t const BTreeRootSelectorBit = 32; + static size_t const BTreeRootInfoStart = 33; + static size_t const BTreeRootInfoSize = 17; + + struct FreeIndexBlock { + BlockIndex nextFreeBlock; + List<BlockIndex> freeBlocks; + }; + + struct IndexNode { + size_t pointerCount() const; + BlockIndex pointer(size_t i) const; + void updatePointer(size_t i, BlockIndex p); + + ByteArray const& keyBefore(size_t i) const; + void updateKeyBefore(size_t i, ByteArray k); + + void removeBefore(size_t i); + void insertAfter(size_t i, ByteArray k, BlockIndex p); + + uint8_t indexLevel() const; + void setIndexLevel(uint8_t indexLevel); + + // count is number of elements to shift left *including* right's beginPointer + void shiftLeft(ByteArray const& mid, IndexNode& right, size_t count); + + // count is number of elements to shift right + void shiftRight(ByteArray const& mid, IndexNode& left, size_t count); + + // i should be index of pointer that will be the new beginPointer of right + // node (cannot be 0). + ByteArray split(IndexNode& right, size_t i); + + struct Element { + ByteArray key; + BlockIndex pointer; + }; + typedef List<Element> ElementList; + + BlockIndex self; + uint8_t level; + Maybe<BlockIndex> beginPointer; + ElementList pointers; + }; + + struct LeafNode { + size_t count() const; + ByteArray const& key(size_t i) const; + ByteArray const& data(size_t i) const; + + void insert(size_t i, ByteArray k, ByteArray d); + void remove(size_t i); + + // count is number of elements to shift left + void shiftLeft(LeafNode& right, size_t count); + + // count is number of elements to shift right + void shiftRight(LeafNode& left, size_t count); + + // i should be index of element that will be the new start of right node. + // Returns right index node. + void split(LeafNode& right, size_t i); + + struct Element { + ByteArray key; + ByteArray data; + }; + typedef List<Element> ElementList; + + BlockIndex self; + ElementList elements; + }; + + struct BTreeImpl { + typedef ByteArray Key; + typedef ByteArray Data; + typedef BlockIndex Pointer; + + typedef shared_ptr<IndexNode> Index; + typedef shared_ptr<LeafNode> Leaf; + + Pointer rootPointer(); + bool rootIsLeaf(); + void setNewRoot(Pointer pointer, bool isLeaf); + + Index createIndex(Pointer beginPointer); + Index loadIndex(Pointer pointer); + bool indexNeedsShift(Index const& index); + bool indexShift(Index const& left, Key const& mid, Index const& right); + Maybe<pair<Key, Index>> indexSplit(Index const& index); + Pointer storeIndex(Index index); + void deleteIndex(Index index); + + Leaf createLeaf(); + Leaf loadLeaf(Pointer pointer); + bool leafNeedsShift(Leaf const& l); + bool leafShift(Leaf& left, Leaf& right); + Maybe<Leaf> leafSplit(Leaf& leaf); + Pointer storeLeaf(Leaf leaf); + void deleteLeaf(Leaf leaf); + + size_t indexPointerCount(Index const& index); + Pointer indexPointer(Index const& index, size_t i); + void indexUpdatePointer(Index& index, size_t i, Pointer p); + Key indexKeyBefore(Index const& index, size_t i); + void indexUpdateKeyBefore(Index& index, size_t i, Key k); + void indexRemoveBefore(Index& index, size_t i); + void indexInsertAfter(Index& index, size_t i, Key k, Pointer p); + size_t indexLevel(Index const& index); + void setIndexLevel(Index& index, size_t indexLevel); + + size_t leafElementCount(Leaf const& leaf); + Key leafKey(Leaf const& leaf, size_t i); + Data leafData(Leaf const& leaf, size_t i); + void leafInsert(Leaf& leaf, size_t i, Key k, Data d); + void leafRemove(Leaf& leaf, size_t i); + Maybe<Pointer> nextLeaf(Leaf const& leaf); + void setNextLeaf(Leaf& leaf, Maybe<Pointer> n); + + BTreeDatabase* parent; + }; + + void readBlock(BlockIndex blockIndex, size_t blockOffset, char* block, size_t size) const; + ByteArray readBlock(BlockIndex blockIndex) const; + void updateBlock(BlockIndex blockIndex, ByteArray const& block); + + void rawReadBlock(BlockIndex blockIndex, size_t blockOffset, char* block, size_t size) const; + void rawWriteBlock(BlockIndex blockIndex, size_t blockOffset, char const* block, size_t size) const; + + void updateHeadFreeIndexBlock(BlockIndex newHead); + + FreeIndexBlock readFreeIndexBlock(BlockIndex blockIndex); + void writeFreeIndexBlock(BlockIndex blockIndex, FreeIndexBlock indexBlock); + + uint32_t leafSize(shared_ptr<LeafNode> const& leaf) const; + uint32_t maxIndexPointers() const; + + uint32_t dataSize(ByteArray const& d) const; + List<BlockIndex> leafTailBlocks(BlockIndex leafPointer); + + void freeBlock(BlockIndex b); + BlockIndex reserveBlock(); + BlockIndex makeEndBlock(); + + void dirty(); + void writeRoot(); + void readRoot(); + void doCommit(); + + void checkIfOpen(char const* methodName, bool shouldBeOpen) const; + void checkBlockIndex(size_t blockIndex) const; + void checkKeySize(ByteArray const& k) const; + uint32_t maxFreeIndexLength() const; + + mutable ReadersWriterMutex m_lock; + + BTreeMixin<BTreeImpl> m_impl; + + IODevicePtr m_device; + bool m_open; + + uint32_t m_blockSize; + String m_contentIdentifier; + uint32_t m_keySize; + + bool m_autoCommit; + + // Reading values can mutate the index cache, so the index cache is kept + // using a different lock. It is only necessary to acquire this lock when + // NOT holding the main writer lock, because if the main writer lock is held + // then no other method would be loading an index anyway. + mutable SpinLock m_indexCacheSpinLock; + LruCache<BlockIndex, shared_ptr<IndexNode>> m_indexCache; + + BlockIndex m_headFreeIndexBlock; + StreamOffset m_deviceSize; + BlockIndex m_root; + bool m_rootIsLeaf; + bool m_usingAltRoot; + bool m_dirty; + + // Blocks that can be freely allocated and written to without violating + // atomic consistency + Set<BlockIndex> m_availableBlocks; + + // Blocks to be freed on next commit. + Deque<BlockIndex> m_pendingFree; + + // Blocks that have been written in uncommitted portions of the tree. + Set<BlockIndex> m_uncommitted; +}; + +// Version of BTreeDatabase that hashes keys with SHA-256 to produce a unique +// constant size key. +class BTreeSha256Database : private BTreeDatabase { +public: + BTreeSha256Database(); + BTreeSha256Database(String const& contentIdentifier); + + // Keys can be arbitrary size, actual key is the SHA-256 checksum of the key. + bool contains(ByteArray const& key); + Maybe<ByteArray> find(ByteArray const& key); + bool insert(ByteArray const& key, ByteArray const& value); + bool remove(ByteArray const& key); + + // Convenience string versions of access methods. Equivalent to the utf8 + // bytes of the string minus the null terminator. + bool contains(String const& key); + Maybe<ByteArray> find(String const& key); + bool insert(String const& key, ByteArray const& value); + bool remove(String const& key); + + using BTreeDatabase::ContentIdentifierStringSize; + using BTreeDatabase::blockSize; + using BTreeDatabase::setBlockSize; + using BTreeDatabase::contentIdentifier; + using BTreeDatabase::setContentIdentifier; + using BTreeDatabase::indexCacheSize; + using BTreeDatabase::setIndexCacheSize; + using BTreeDatabase::autoCommit; + using BTreeDatabase::setAutoCommit; + using BTreeDatabase::ioDevice; + using BTreeDatabase::setIODevice; + using BTreeDatabase::open; + using BTreeDatabase::isOpen; + using BTreeDatabase::recordCount; + using BTreeDatabase::indexLevels; + using BTreeDatabase::totalBlockCount; + using BTreeDatabase::freeBlockCount; + using BTreeDatabase::indexBlockCount; + using BTreeDatabase::leafBlockCount; + using BTreeDatabase::commit; + using BTreeDatabase::rollback; + using BTreeDatabase::close; +}; + +} + +#endif |