可変長配列データベースのまとめ

かなり間が空きましたが、可変長配列データベースのまとめです。今まで個別に説明してきたものをまとめただけでなく幾つか修正しています。一つバグがあったので修正した他は、以下の点を改良しています。

  • データベースの生成とオープンのメソッドを分離
  • データベースのロックのメソッドを追加
  • stringの登録追加のメソッドを追加

以下コードをそのまま一つのヘッダーファイルにすれば利用できます。

#ifndef ARRAYDATABASE
#define ARRAYDATABASE

#include <fcntl.h>
#include <errno.h>
#include <sys/mman.h>
#include <sys/shm.h>
#include <assert.h>

#include <iostream>
#include <exception>

class Exception : public std::exception {
 public:
  Exception(const std::string &msg)
    {
      message = msg;
    }

  virtual ~Exception() throw() {}

  const char *what() {
    return message.c_str();
  }

  std::string message;
};

class Mutex {
 public:
  Mutex(){};
  ~Mutex(){};
  void lock();
  void unlock();
  void wait();
  void signal();
  void broadcast();
  void create();
  void destroy();
 protected:
  pthread_mutex_t       mutex;
  pthread_cond_t        condition;
  pthread_mutexattr_t   mutex_attr;
};

inline void Mutex::create() {
  int stat = 0;
  if (pthread_mutexattr_setpshared(&mutex_attr, PTHREAD_PROCESS_SHARED) != 0) {
    throw Exception(strerror(stat));
  }
  if ((stat = pthread_mutex_init(&mutex, &mutex_attr)) != 0) {
    throw Exception(strerror(stat));
  }
  if ((stat = pthread_cond_init(&condition, NULL)) != 0) {
    pthread_mutex_destroy(&mutex);
    throw Exception(strerror(stat));
  }
}

inline void Mutex::destroy() {
  pthread_mutex_destroy(&mutex);
  pthread_cond_destroy(&condition);
}

inline void
Mutex::lock() {
  int stat;
  if ((stat = pthread_mutex_lock(&mutex)) != 0) {
    throw Exception(strerror(stat));
  }
}

inline void
Mutex::unlock() {
  int stat;
  if ((stat = pthread_mutex_unlock(&mutex)) != 0) {
    throw Exception(strerror(stat));
  }
}

inline void
Mutex::signal() {
  int stat;
  if ((stat = pthread_cond_signal(&condition)) != 0) {
    throw Exception(strerror(stat));
  }
}

inline void
Mutex::wait() {
  int stat;
  if ((stat = pthread_cond_wait(&condition, &mutex)) != 0) {
    throw Exception(strerror(stat));
  }
}

inline void
Mutex::broadcast() {
  int stat;
  if ((stat = pthread_cond_broadcast(&condition)) != 0) {
    throw Exception(strerror(stat));
  }
}

template <class T> class Array {
 public:
  class Header {
  public:
    void initialize() {
      headerSize        = 256;
      pageSize          = sysconf(_SC_PAGE_SIZE);
      elementSize       = sizeof(T);
      arraySize         = 0;
      validArraySize    = 0;
      size              = headerSize;
    }

    size_t updateSize(size_t arraysize) {
      size = ((headerSize + arraysize * elementSize - 1) / pageSize + 1) *
        pageSize;
      arraySize = (size - headerSize) / elementSize;
      return size;
    }
    Mutex &getMutex() { return mutex; }

    size_t headerSize;
    size_t pageSize;            // page size
    size_t elementSize;         // size of an element
    size_t arraySize;           // number of elements
    size_t validArraySize;      // number of valid elements
    size_t size;                // total size
    Mutex  mutex;               // mutex
  };

 Array():fd(-1) {}

 Array(const std::string &f):fd(-1) {
    initialize(f);
  }

  ~Array() {
    if (fd != -1) {
      msync(header, initialSize, MS_SYNC);
      munmap(header, initialSize);
      close();
    }
  }

  void initialize(const std::string &f) {
    file = f;
    if ((fd = open(file.c_str(), O_RDWR, 0666)) == -1) {
      throw Exception(strerror(errno));
    }
    struct stat fs;
    fstat(fd, &fs);
    map(fs.st_size);
    reservedSize = 0;
  }

  void map(size_t sz) {
    if (fd == -1) {
      throw Exception("Not ready");
    }
    initialSize = sz;
    header = (Header*)mmap(0, sz, PROT_READ|PROT_WRITE,MAP_SHARED,
                           fd, 0);
    if ((long long)header == -1){
      close();
      throw Exception(strerror(errno));
    }
    array = (T*)((unsigned char*)header + header->headerSize);
    if (sz != header->size) {
      close();
      throw Exception("Sizes are inconsistency");
    }
  }

  void resize(size_t arraysize) {
    resizeCapacity(arraysize);
    if (arraysize != header->validArraySize) {
      header->validArraySize = arraysize;
    }
  }

  T &at(size_t idx) {
    if (fd == -1) {
      throw Exception("Not ready");
    }
    if (idx >= header->validArraySize) {
      throw Exception("Invalid element");
    }
    return array[idx];
  }

  T &set(size_t idx, T& v) {
    if (idx >= header->arraySize) {
      resizeCapacity(idx + 1);
    }
    array[idx] = v;
    if (idx >= header->validArraySize) {
      header->validArraySize = idx + 1;
    }
    T &data = array[idx];
    return data;
  }

  size_t reserved(size_t rs) {
    return reservedSize += rs;
  }

  size_t getReservedSize() {
    return reservedSize;
  }

  void *getReservedVariable(size_t offset) {
    return (void*)((unsigned char*)(Array<T>::array) - offset);
  }

  size_t getSize() { return header->validArraySize; }

  Header &getHeader() { return *header; }
  Mutex &getMutex() { return header->mutex; }

  static void
    create(const std::string &file) {
    int fd = -1;
    if ((fd = open(file.c_str(), O_RDWR|O_CREAT, 0666)) == -1) {
      throw Exception(strerror(errno));
    }
    Header h;
    h.initialize();
    if (write(fd, &h, sizeof(Header)) == -1) {
      close(fd);
      throw Exception(strerror(errno));
    }
    unsigned char c = 0;
    for (size_t i = 0; i < h.headerSize - sizeof(Header); i++) {
      if (write(fd, &c, sizeof(c)) == -1) {
        close(fd);
        throw Exception(strerror(errno));
      }
    }
    close(fd);
    fd = -1;
    Array<T> array(file);
    array.header->getMutex().create();
  }

 protected:
  void remap(size_t sz) {
    msync(header, initialSize, MS_SYNC);
    munmap(header, initialSize);
    map(sz);
  }

  void close() {
    close(fd);
  }

  static void close(int fd) {
    if (fd != -1) {
      ::close(fd);
    }
    fd = -1;
  }

  void resizeCapacity(size_t arraysize) {
    if (fd == -1) {
      throw Exception("Not ready");
    }

    if (initialSize != header->size) {
      remap(header->size);
    }

    Header h;
    h = *header;
    size_t s = h.size;
    h.updateSize(arraysize);
    if (h.size == s) {
      return;
    }

    if (fd != -1) {
      msync(header, header->size, MS_SYNC);
      munmap(header, header->size);
      close();
    }
    if ((fd = open(file.c_str(), O_RDWR, 0666)) == -1) {
      throw Exception(strerror(errno));
    }
    if (ftruncate(fd, h.size) == -1) {
      close();
      throw Exception(strerror(errno));
    }
    if (write(fd, &h, sizeof(Header)) == -1) {
      close();
      throw Exception(strerror(errno));
    }
    map(h.size);
    if (arraysize < header->validArraySize) {
      header->validArraySize = arraysize;
    }
  }


 protected:
  Header        *header;
  int           fd;
  T             *array;
  std::string   file;
  size_t        initialSize;
  size_t        reservedSize;

};

template <class T> class Stack : public Array<T>  {
 public:
  Stack(const std::string &f):Array<T>(f) {}
  Stack() {}

  void initialize(const std::string &f) {
    Array<T>::initialize(f);
  }

  void push(T &t) {
    set(Array<T>::getSize(), t);
  }
  T front() {
    if (Array<T>::getSize() <= 0) {
      throw Exception("No data");
    }
    return Array<T>::at(Array<T>::getSize() - 1);
  }
  void pop() {
    Array<T>::resize(Array<T>::getSize() - 1);
  }
};

template <class T> class Pool  {
 public:
  Pool() {}
  Pool(const std::string &f) {
    initialize(f);
  }

  void
    initialize(const std::string &f) {
    std::string str = f;
    str.append("_p");
    pool.initialize(str);
    reservedOffset = pool.reserved(sizeof(size_t));
    str = f;
    str.append("_i");
    invalidObjects.initialize(str);
  }

  static void
    create(const std::string &f) {
    std::string str = f;
    str.append("_p");
    Array<T>::create(str);
    str = f;
    str.append("_i");
    Stack<size_t>::create(str);
  }

  size_t insert() {
    size_t idx;
    try {
      idx = invalidObjects.front();
      invalidObjects.pop();
    } catch(...) {
      getPoolSize()++;
      idx = getPoolSize();
      if (idx >= pool.getSize()) {
        pool.resize(idx + 1);
      }
    }
    return idx;
  }

  size_t insert(T &t) {
    size_t idx = insert();
    pool.set(idx - 1, t);
    return idx;
  }

  void remove(size_t idx) {
    invalidObjects.push(idx);
  }

  void set(size_t idx, T &t) {
    pool.set(idx - 1, t);
  }

  T &at(size_t idx) {
    return pool.at(idx - 1);
  }
 protected:
  Array<T> pool;
  Stack<size_t> invalidObjects;
  size_t reservedOffset;
  size_t &getPoolSize() {
    return *(size_t*)pool.getReservedVariable(reservedOffset);
  }

};

class Codec {
 public:
  Codec(){}
  static size_t encode(size_t v, unsigned char *buf) {
    size_t mask = 0xffffffffffffff80;
    size_t cnt;
    for (cnt = 0; cnt < sizeof(size_t); cnt++) {
      if ((mask & v) == 0) {
        break;
      }
      mask <<= 7;
    }

    if (cnt == sizeof(size_t)) {
      valcpy(&buf[1], v, sizeof(size_t));
      buf[0] = 0xff;
    } else {
      valcpy(buf, v, cnt + 1);
      buf[0] |= (0xff << (sizeof(size_t) - cnt));
    }

    size_t validSize = cnt + 1;
    return validSize;
  }
  static size_t decode(unsigned char *buf, size_t &size) {
    size_t cnt;
    unsigned char mask = 0x80;
    unsigned char head = buf[0];
    for (cnt = 0; cnt < sizeof(size_t); cnt++) {
      if ((mask & head) == 0) {
        break;
      }
      mask >>= 1;
    }

    size_t v = 0;
    if (cnt > sizeof(size_t)) {
      valcpy(v, &buf[1], sizeof(size_t));
      size = 9;
    } else {
      valcpy(v, buf, cnt + 1);
      *(((unsigned char*)&v) + cnt) &= (0x7f >> cnt);
      size = cnt + 1;
    }
    return v;
  }

 private:
  static void
    valcpy(unsigned char *buf, size_t &v, int s) {
    unsigned char *vptr = (unsigned char*)&v + s - 1;
    for (int i = 0; i < s; i++) {
      *buf++ = *vptr--;
    }
  }

  static void
    valcpy(size_t &v, unsigned char *buf, int s) {
    v = 0;
    unsigned char *vptr = (unsigned char*)&v + s - 1;
    for (int i = 0; i < s; i++) {
      *vptr-- = *buf++;
    }
  }

};

class VariableArray {
 public:
  static const size_t headSize = 128;
  static const size_t bodySize = 256;

  // Format:|size(encoded)|body index(encoded)|data...|
  class Head {
  public:
    Head() {
      *record = 0;
    }
    unsigned char record[headSize];
  };
  // Format:|next index(encoded)|data...|
  class Body {
  public:
    unsigned char record[bodySize];
  };

  VariableArray(const std::string &f) {
    initialize(f);
  }

  void initialize(const std::string &f) {
    std::string str = f;
    str.append("_h");
    head.initialize(str);
    str = f;
    str.append("_t");
    body.initialize(str);
  }

  static void
    create(const std::string &f) {
    std::string str = f;
    str.append("_h");
    Array<Head>::create(str);
    str = f;
    str.append("_t");
    Pool<Body>::create(str);
  }

  void set(size_t idx, std::vector<unsigned char> &val) {
    set(idx, &val[0], val.size());
  }

  void set(size_t idx, const string &val) {
    set(idx, (unsigned char*)val.c_str(), val.size() + 1);
  }

  void set(size_t idx, unsigned char *val, size_t size) {
    erase(idx);
    if (idx >= head.getSize()) {
      head.resize(idx + 1);
    }
    unsigned char *ptr = (unsigned char*)&head.at(idx);
    int sizeSize = Codec::encode(size, ptr);
    size_t bodyIndexSize = 1;
    size_t totalSize = size + sizeSize + bodyIndexSize;
    ptr += sizeSize;
    size_t bodyIndex;
    int len;
    if (totalSize > headSize) {
      bodyIndex = body.insert();
      bodyIndexSize = Codec::encode(bodyIndex, ptr);
      len = headSize - sizeSize - bodyIndexSize;
    } else {
      bodyIndex = 0;
      //bodyIndexSize = Codec::encode(bodyIndex, ptr);
      bodyIndexSize = 1;
      len = size;
    }
    ptr += bodyIndexSize;
    memcpy(ptr, val, len);
    while (bodyIndex != 0) {
      size -= len;
      val += len;
      bodyIndexSize = 1;
      totalSize = size + bodyIndexSize;
      if (totalSize > bodySize) {
        size_t ti = body.insert();
        ptr = (unsigned char*)&body.at(bodyIndex);
        bodyIndex = ti;
        bodyIndexSize = Codec::encode(bodyIndex, ptr);
        len = bodySize - bodyIndexSize;
      } else {
        ptr = (unsigned char*)&body.at(bodyIndex);
        bodyIndex = 0;
        *ptr = 0;
        len = size;
      }
      ptr += bodyIndexSize;
      memcpy(ptr, val, len);

    }
  }

  void
  get(size_t idx, string &str) {
    size_t strsize = 0;
    char *val = (char*)get(idx, strsize);
    if (val[strsize] != 0) {
      throw Exception("Not string!");
    }
    if (strsize == 0) {
      str = "";
    } else {
      str.assign(val, strsize - 1);
    }
    return;
  }

  unsigned char *get(size_t idx, size_t &size) {
    unsigned char *ptr = (unsigned char*)&head.at(idx);

    size_t sizeSize;
    size = Codec::decode(ptr, sizeSize);
    if (size == 0) {
      throw Exception("The specified element is empty");
    }
    size_t restSize = size;
    unsigned char *buf = new unsigned char[size];
    unsigned char *bufptr = buf;
    ptr += sizeSize;
    size_t bodyIndexSize;
    size_t bodyIndex = Codec::decode(ptr, bodyIndexSize);
    ptr += bodyIndexSize;
    size_t len = headSize - sizeSize - bodyIndexSize;
    len = restSize > len ? len : restSize;
    memcpy(bufptr, ptr, len);
    while (bodyIndex != 0) {
      restSize -= len;
      assert(restSize > 0);
      bufptr += len;
      ptr = (unsigned char*)&body.at(bodyIndex);
      bodyIndex = Codec::decode(ptr, bodyIndexSize);
      ptr += bodyIndexSize;
      len = bodySize - bodyIndexSize;
      len = restSize > len ? len : restSize;;
      memcpy(bufptr, ptr, len);
    }

    return buf;
  }

  void erase(size_t idx) {
    if (idx >= head.getSize()) {
      return;
    }
    unsigned char *headPtr = (unsigned char*)&head.at(idx);
    unsigned char *ptr = headPtr;
    size_t sizeSize;
    size_t size = Codec::decode(ptr, sizeSize);
    if (size == 0) {
      // already cleared
      return;
    }
    ptr += sizeSize;
    size_t bodyIndexSize;
    size_t bodyIndex = Codec::decode(ptr, bodyIndexSize);
    headPtr[0] = 0;  // size <- 0
    headPtr[1] = 0;  // index <- 0 invalid index
    while (bodyIndex != 0) {
      ptr = (unsigned char*)&body.at(bodyIndex);
      size_t nextIndex = Codec::decode(ptr, bodyIndexSize);
      body.remove(bodyIndex);
      bodyIndex = nextIndex;
    }
    return;
  }
  size_t getSize() { return head.getSize(); }
  void lock() { getHeader().getMutex().lock(); }
  void unlock() { getHeader().getMutex().unlock(); }
  Array<Head>::Header &getHeader() { return head.getHeader(); }

 protected:
  Array<Head>   head;
  Pool<Body>    body;

};

#endif

使い方の例です。簡単ですね。

void
array_write()
{

  VariableArray::create("sample_db");

  VariableArray array("sample_db");

  const string str = "abcdefghijklmn";
  array.lock();
  try {
    for (int i = 0; i < 10; i++) {
      string val = str.substr(0, i + 1);
      cerr << "val=" << val << endl;
      array.set(i, val);
    }
  } catch (Exception &e) {
    array.unlock();
    std::cerr << e.what() << std::endl;
  }
  array.unlock();

}

void
array_read()
{
  VariableArray array("sample_db");

  array.lock();
  try {
    for (int i = 0; i < 10; i++) {
      string val;
      array.get(i, val);
      std::cerr << "array[" << i << "]=" << val << std::endl;
    }
  } catch (Exception &e) {
    array.unlock();
    std::cerr << e.what() << std::endl;
  }
  array.unlock();
}

int
main()
{
  array_write();
  array_read();
}

前:可変長配列データベース