可変長配列データベースのまとめ
かなり間が空きましたが、可変長配列データベースのまとめです。今まで個別に説明してきたものをまとめただけでなく幾つか修正しています。一つバグがあったので修正した他は、以下の点を改良しています。
- データベースの生成とオープンのメソッドを分離
- データベースのロックのメソッドを追加
- stringの登録追加のメソッドを追加
以下コードをそのまま一つのヘッダーファイルにすれば利用できます。
#ifndef ARRAYDATABASE #define ARRAYDATABASE #include <fcntl.h> #include <errno.h> #include <sys/mman.h> #include <sys/shm.h> #include <assert.h> #include <iostream> #include <exception> class Exception : public std::exception { public: Exception(const std::string &msg) { message = msg; } virtual ~Exception() throw() {} const char *what() { return message.c_str(); } std::string message; }; class Mutex { public: Mutex(){}; ~Mutex(){}; void lock(); void unlock(); void wait(); void signal(); void broadcast(); void create(); void destroy(); protected: pthread_mutex_t mutex; pthread_cond_t condition; pthread_mutexattr_t mutex_attr; }; inline void Mutex::create() { int stat = 0; if (pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED) != 0) { throw Exception(strerror(stat)); } if ((stat = pthread_mutex_init(&mutex, &mutex_attr)) != 0) { throw Exception(strerror(stat)); } if ((stat = pthread_cond_init(&condition, NULL)) != 0) { pthread_mutex_destroy(&mutex); throw Exception(strerror(stat)); } } inline void Mutex::destroy() { pthread_mutex_destroy(&mutex); pthread_cond_destroy(&condition); } inline void Mutex::lock() { int stat; if ((stat = pthread_mutex_lock(&mutex)) != 0) { throw Exception(strerror(stat)); } } inline void Mutex::unlock() { int stat; if ((stat = pthread_mutex_unlock(&mutex)) != 0) { throw Exception(strerror(stat)); } } inline void Mutex::signal() { int stat; if ((stat = pthread_cond_signal(&condition)) != 0) { throw Exception(strerror(stat)); } } inline void Mutex::wait() { int stat; if ((stat = pthread_cond_wait(&condition, &mutex)) != 0) { throw Exception(strerror(stat)); } } inline void Mutex::broadcast() { int stat; if ((stat = pthread_cond_broadcast(&condition)) != 0) { throw Exception(strerror(stat)); } } template <class T> class Array { public: class Header { public: void initialize() { headerSize = 256; pageSize = sysconf(_SC_PAGE_SIZE); elementSize = sizeof(T); arraySize = 0; validArraySize = 0; size = headerSize; } size_t updateSize(size_t arraysize) { size = ((headerSize + arraysize * elementSize - 1) / pageSize + 1) * pageSize; arraySize = (size - headerSize) / elementSize; return size; } Mutex &getMutex() { return mutex; } size_t headerSize; size_t pageSize; // page size size_t elementSize; // size of an element size_t arraySize; // number of elements size_t validArraySize; // number of valid elements size_t size; // total size Mutex mutex; // mutex }; Array():fd(-1) {} Array(const std::string &f):fd(-1) { initialize(f); } ~Array() { if (fd != -1) { msync(header, initialSize, MS_SYNC); munmap(header, initialSize); close(); } } void initialize(const std::string &f) { file = f; if ((fd = open(file.c_str(), O_RDWR, 0666)) == -1) { throw Exception(strerror(errno)); } struct stat fs; fstat(fd, &fs); map(fs.st_size); reservedSize = 0; } void map(size_t sz) { if (fd == -1) { throw Exception("Not ready"); } initialSize = sz; header = (Header*)mmap(0, sz, PROT_READ|PROT_WRITE,MAP_SHARED, fd, 0); if ((long long)header == -1){ close(); throw Exception(strerror(errno)); } array = (T*)((unsigned char*)header + header->headerSize); if (sz != header->size) { close(); throw Exception("Sizes are inconsistency"); } } void resize(size_t arraysize) { resizeCapacity(arraysize); if (arraysize != header->validArraySize) { header->validArraySize = arraysize; } } T &at(size_t idx) { if (fd == -1) { throw Exception("Not ready"); } if (idx >= header->validArraySize) { throw Exception("Invalid element"); } return array[idx]; } T &set(size_t idx, T& v) { if (idx >= header->arraySize) { resizeCapacity(idx + 1); } array[idx] = v; if (idx >= header->validArraySize) { header->validArraySize = idx + 1; } T &data = array[idx]; return data; } size_t reserved(size_t rs) { return reservedSize += rs; } size_t getReservedSize() { return reservedSize; } void *getReservedVariable(size_t offset) { return (void*)((unsigned char*)(Array<T>::array) - offset); } size_t getSize() { return header->validArraySize; } Header &getHeader() { return *header; } Mutex &getMutex() { return header->mutex; } static void create(const std::string &file) { int fd = -1; if ((fd = open(file.c_str(), O_RDWR|O_CREAT, 0666)) == -1) { throw Exception(strerror(errno)); } Header h; h.initialize(); if (write(fd, &h, sizeof(Header)) == -1) { close(fd); throw Exception(strerror(errno)); } unsigned char c = 0; for (size_t i = 0; i < h.headerSize - sizeof(Header); i++) { if (write(fd, &c, sizeof(c)) == -1) { close(fd); throw Exception(strerror(errno)); } } close(fd); fd = -1; Array<T> array(file); array.header->getMutex().create(); } protected: void remap(size_t sz) { msync(header, initialSize, MS_SYNC); munmap(header, initialSize); map(sz); } void close() { close(fd); } static void close(int fd) { if (fd != -1) { ::close(fd); } fd = -1; } void resizeCapacity(size_t arraysize) { if (fd == -1) { throw Exception("Not ready"); } if (initialSize != header->size) { remap(header->size); } Header h; h = *header; size_t s = h.size; h.updateSize(arraysize); if (h.size == s) { return; } if (fd != -1) { msync(header, header->size, MS_SYNC); munmap(header, header->size); close(); } if ((fd = open(file.c_str(), O_RDWR, 0666)) == -1) { throw Exception(strerror(errno)); } if (ftruncate(fd, h.size) == -1) { close(); throw Exception(strerror(errno)); } if (write(fd, &h, sizeof(Header)) == -1) { close(); throw Exception(strerror(errno)); } map(h.size); if (arraysize < header->validArraySize) { header->validArraySize = arraysize; } } protected: Header *header; int fd; T *array; std::string file; size_t initialSize; size_t reservedSize; }; template <class T> class Stack : public Array<T> { public: Stack(const std::string &f):Array<T>(f) {} Stack() {} void initialize(const std::string &f) { Array<T>::initialize(f); } void push(T &t) { set(Array<T>::getSize(), t); } T front() { if (Array<T>::getSize() <= 0) { throw Exception("No data"); } return Array<T>::at(Array<T>::getSize() - 1); } void pop() { Array<T>::resize(Array<T>::getSize() - 1); } }; template <class T> class Pool { public: Pool() {} Pool(const std::string &f) { initialize(f); } void initialize(const std::string &f) { std::string str = f; str.append("_p"); pool.initialize(str); reservedOffset = pool.reserved(sizeof(size_t)); str = f; str.append("_i"); invalidObjects.initialize(str); } static void create(const std::string &f) { std::string str = f; str.append("_p"); Array<T>::create(str); str = f; str.append("_i"); Stack<size_t>::create(str); } size_t insert() { size_t idx; try { idx = invalidObjects.front(); invalidObjects.pop(); } catch(...) { getPoolSize()++; idx = getPoolSize(); if (idx >= pool.getSize()) { pool.resize(idx + 1); } } return idx; } size_t insert(T &t) { size_t idx = insert(); pool.set(idx - 1, t); return idx; } void remove(size_t idx) { invalidObjects.push(idx); } void set(size_t idx, T &t) { pool.set(idx - 1, t); } T &at(size_t idx) { return pool.at(idx - 1); } protected: Array<T> pool; Stack<size_t> invalidObjects; size_t reservedOffset; size_t &getPoolSize() { return *(size_t*)pool.getReservedVariable(reservedOffset); } }; class Codec { public: Codec(){} static size_t encode(size_t v, unsigned char *buf) { size_t mask = 0xffffffffffffff80; size_t cnt; for (cnt = 0; cnt < sizeof(size_t); cnt++) { if ((mask & v) == 0) { break; } mask <<= 7; } if (cnt == sizeof(size_t)) { valcpy(&buf[1], v, sizeof(size_t)); buf[0] = 0xff; } else { valcpy(buf, v, cnt + 1); buf[0] |= (0xff << (sizeof(size_t) - cnt)); } size_t validSize = cnt + 1; return validSize; } static size_t decode(unsigned char *buf, size_t &size) { size_t cnt; unsigned char mask = 0x80; unsigned char head = buf[0]; for (cnt = 0; cnt < sizeof(size_t); cnt++) { if ((mask & head) == 0) { break; } mask >>= 1; } size_t v = 0; if (cnt > sizeof(size_t)) { valcpy(v, &buf[1], sizeof(size_t)); size = 9; } else { valcpy(v, buf, cnt + 1); *(((unsigned char*)&v) + cnt) &= (0x7f >> cnt); size = cnt + 1; } return v; } private: static void valcpy(unsigned char *buf, size_t &v, int s) { unsigned char *vptr = (unsigned char*)&v + s - 1; for (int i = 0; i < s; i++) { *buf++ = *vptr--; } } static void valcpy(size_t &v, unsigned char *buf, int s) { v = 0; unsigned char *vptr = (unsigned char*)&v + s - 1; for (int i = 0; i < s; i++) { *vptr-- = *buf++; } } }; class VariableArray { public: static const size_t headSize = 128; static const size_t bodySize = 256; // Format:|size(encoded)|body index(encoded)|data...| class Head { public: Head() { *record = 0; } unsigned char record[headSize]; }; // Format:|next index(encoded)|data...| class Body { public: unsigned char record[bodySize]; }; VariableArray(const std::string &f) { initialize(f); } void initialize(const std::string &f) { std::string str = f; str.append("_h"); head.initialize(str); str = f; str.append("_t"); body.initialize(str); } static void create(const std::string &f) { std::string str = f; str.append("_h"); Array<Head>::create(str); str = f; str.append("_t"); Pool<Body>::create(str); } void set(size_t idx, std::vector<unsigned char> &val) { set(idx, &val[0], val.size()); } void set(size_t idx, const string &val) { set(idx, (unsigned char*)val.c_str(), val.size() + 1); } void set(size_t idx, unsigned char *val, size_t size) { erase(idx); if (idx >= head.getSize()) { head.resize(idx + 1); } unsigned char *ptr = (unsigned char*)&head.at(idx); int sizeSize = Codec::encode(size, ptr); size_t bodyIndexSize = 1; size_t totalSize = size + sizeSize + bodyIndexSize; ptr += sizeSize; size_t bodyIndex; int len; if (totalSize > headSize) { bodyIndex = body.insert(); bodyIndexSize = Codec::encode(bodyIndex, ptr); len = headSize - sizeSize - bodyIndexSize; } else { bodyIndex = 0; //bodyIndexSize = Codec::encode(bodyIndex, ptr); bodyIndexSize = 1; len = size; } ptr += bodyIndexSize; memcpy(ptr, val, len); while (bodyIndex != 0) { size -= len; val += len; bodyIndexSize = 1; totalSize = size + bodyIndexSize; if (totalSize > bodySize) { size_t ti = body.insert(); ptr = (unsigned char*)&body.at(bodyIndex); bodyIndex = ti; bodyIndexSize = Codec::encode(bodyIndex, ptr); len = bodySize - bodyIndexSize; } else { ptr = (unsigned char*)&body.at(bodyIndex); bodyIndex = 0; *ptr = 0; len = size; } ptr += bodyIndexSize; memcpy(ptr, val, len); } } void get(size_t idx, string &str) { size_t strsize = 0; char *val = (char*)get(idx, strsize); if (val[strsize] != 0) { throw Exception("Not string!"); } if (strsize == 0) { str = ""; } else { str.assign(val, strsize - 1); } return; } unsigned char *get(size_t idx, size_t &size) { unsigned char *ptr = (unsigned char*)&head.at(idx); size_t sizeSize; size = Codec::decode(ptr, sizeSize); if (size == 0) { throw Exception("The specified element is empty"); } size_t restSize = size; unsigned char *buf = new unsigned char[size]; unsigned char *bufptr = buf; ptr += sizeSize; size_t bodyIndexSize; size_t bodyIndex = Codec::decode(ptr, bodyIndexSize); ptr += bodyIndexSize; size_t len = headSize - sizeSize - bodyIndexSize; len = restSize > len ? len : restSize; memcpy(bufptr, ptr, len); while (bodyIndex != 0) { restSize -= len; assert(restSize > 0); bufptr += len; ptr = (unsigned char*)&body.at(bodyIndex); bodyIndex = Codec::decode(ptr, bodyIndexSize); ptr += bodyIndexSize; len = bodySize - bodyIndexSize; len = restSize > len ? len : restSize;; memcpy(bufptr, ptr, len); } return buf; } void erase(size_t idx) { if (idx >= head.getSize()) { return; } unsigned char *headPtr = (unsigned char*)&head.at(idx); unsigned char *ptr = headPtr; size_t sizeSize; size_t size = Codec::decode(ptr, sizeSize); if (size == 0) { // already cleared return; } ptr += sizeSize; size_t bodyIndexSize; size_t bodyIndex = Codec::decode(ptr, bodyIndexSize); headPtr[0] = 0; // size <- 0 headPtr[1] = 0; // index <- 0 invalid index while (bodyIndex != 0) { ptr = (unsigned char*)&body.at(bodyIndex); size_t nextIndex = Codec::decode(ptr, bodyIndexSize); body.remove(bodyIndex); bodyIndex = nextIndex; } return; } size_t getSize() { return head.getSize(); } void lock() { getHeader().getMutex().lock(); } void unlock() { getHeader().getMutex().unlock(); } Array<Head>::Header &getHeader() { return head.getHeader(); } protected: Array<Head> head; Pool<Body> body; }; #endif
使い方の例です。簡単ですね。
void array_write() { VariableArray::create("sample_db"); VariableArray array("sample_db"); const string str = "abcdefghijklmn"; array.lock(); try { for (int i = 0; i < 10; i++) { string val = str.substr(0, i + 1); cerr << "val=" << val << endl; array.set(i, val); } } catch (Exception &e) { array.unlock(); std::cerr << e.what() << std::endl; } array.unlock(); } void array_read() { VariableArray array("sample_db"); array.lock(); try { for (int i = 0; i < 10; i++) { string val; array.get(i, val); std::cerr << "array[" << i << "]=" << val << std::endl; } } catch (Exception &e) { array.unlock(); std::cerr << e.what() << std::endl; } array.unlock(); } int main() { array_write(); array_read(); }