leveldb 学习记录(七) SSTable构造

时间:2024-06-22 23:33:56

使用TableBuilder构造一个Table

 struct TableBuilder::Rep {                       // TableBuilder内部使用的结构,记录当前的一些状态等
Options options;
Options index_block_options;
WritableFile* file; // 对应的.sst文件
uint64_t offset;
Status status;
BlockBuilder data_block; // Data Block
BlockBuilder index_block; // Index Block
std::string last_key; // 添加的最后一个key,一方面用于key是否排序的判断,另一方面当写入一个Data
//+ Block时记录index Block中索引项(last_key+offset+size)
int64_t num_entries; // .sst文件中已经添加的key/value数量
bool closed; // Either Finish() or Abandon() has been called. // Add下一Block的第一个key/value时,才根据这个key构造一个FindShortSuccessor,
// 写入Index Block中的一个entry(max_key+offset+size),是为了能够找到
// 一个更短的分割2个Block的key,从而减少存储容量;
// 只有Finish中是根据最后一个Block的最后一个key构造的。
// We do not emit the index entry for a block until we have seen the
// first key for the next data block. This allows us to use shorter
// keys in the index block. For example, consider a block boundary
// between the keys "the quick brown fox" and "the who". We can use
// "the r" as the key for the index block entry since it is >= all
// entries in the first block and < all entries in subsequent
// blocks.
//
// Invariant: r->pending_index_entry is true only if data_block is empty.
bool pending_index_entry; // 标识是否刚写入一个Data Block,控制在Index
//+ Block中添加一项索引信息(last_key+offset+size)
BlockHandle pending_handle; // Handle to add to index block std::string compressed_output; // 数据压缩 Rep(const Options& opt, WritableFile* f) // 构造函数
: options(opt),
index_block_options(opt),
file(f),
offset(),
data_block(&options),
index_block(&index_block_options),
num_entries(),
closed(false),
pending_index_entry(false)
{
index_block_options.block_restart_interval = ; // Index Block中每个restart块只有一个record,查找方便
}
};// struct TableBuilder::Rep ;

TableBuilder头文件

 class TableBuilder {
public:
// Create a builder that will store the contents of the table it is
// building in *file. Does not close the file. It is up to the
// caller to close the file after calling Finish().
//创建一个基于file的builder,存储table. 使用期间不能关闭文件,在调用Finish()后调用方关闭文件
TableBuilder(const Options& options, WritableFile* file); // REQUIRES: Either Finish() or Abandon() has been called.
~TableBuilder(); // Change the options used by this builder. Note: only some of the
// option fields can be changed after construction. If a field is
// not allowed to change dynamically and its value in the structure
// passed to the constructor is different from its value in the
// structure passed to this method, this method will return an error
// without changing any fields.
Status ChangeOptions(const Options& options); // Add key,value to the table being constructed.
// REQUIRES: key is after any previously added key according to comparator.
// REQUIRES: Finish(), Abandon() have not been called
//添加key value 稍后查看代码
void Add(const Slice& key, const Slice& value); // Advanced operation: flush any buffered key/value pairs to file.
// Can be used to ensure that two adjacent entries never live in
// the same data block. Most clients should not need to use this method.
// REQUIRES: Finish(), Abandon() have not been called
void Flush(); // Return non-ok iff some error has been detected.
Status status() const; // Finish building the table. Stops using the file passed to the
// constructor after this function returns.
// REQUIRES: Finish(), Abandon() have not been called Status Finish(); // Indicate that the contents of this builder should be abandoned. Stops
// using the file passed to the constructor after this function returns.
// If the caller is not going to call Finish(), it must call Abandon()
// before destroying this builder.
// REQUIRES: Finish(), Abandon() have not been called
void Abandon(); // Number of calls to Add() so far.
uint64_t NumEntries() const; // Size of the file generated so far. If invoked after a successful
// Finish() call, returns the size of the final generated file.
uint64_t FileSize() const; private:
bool ok() const { return status().ok(); }
void WriteBlock(BlockBuilder* block, BlockHandle* handle); struct Rep;
Rep* rep_; // No copying allowed
TableBuilder(const TableBuilder&);
void operator=(const TableBuilder&);
};

主要是按照格式填充  这里做了简单的注释

// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors. #include "leveldb/table_builder.h" #include <assert.h>
#include <stdio.h>
#include "leveldb/comparator.h"
#include "leveldb/env.h"
#include "table/block_builder.h"
#include "table/format.h"
#include "util/coding.h"
#include "util/crc32c.h"
#include "util/logging.h" namespace leveldb { struct TableBuilder::Rep {
Options options;
Options index_block_options;
WritableFile* file;
uint64_t offset;
Status status;
BlockBuilder data_block;
BlockBuilder index_block;
std::string last_key;
int64_t num_entries;
bool closed; // Either Finish() or Abandon() has been called. // We do not emit the index entry for a block until we have seen the
// first key for the next data block. This allows us to use shorter
// keys in the index block. For example, consider a block boundary
// between the keys "the quick brown fox" and "the who". We can use
// "the r" as the key for the index block entry since it is >= all
// entries in the first block and < all entries in subsequent
// blocks.
//
// Invariant: r->pending_index_entry is true only if data_block is empty.
bool pending_index_entry;
BlockHandle pending_handle; // Handle to add to index block std::string compressed_output; Rep(const Options& opt, WritableFile* f)
: options(opt),
index_block_options(opt),
file(f),
offset(),
data_block(&options),
index_block(&index_block_options),
num_entries(),
closed(false),
pending_index_entry(false) {
index_block_options.block_restart_interval = ;
}
}; TableBuilder::TableBuilder(const Options& options, WritableFile* file)
: rep_(new Rep(options, file)) {
} TableBuilder::~TableBuilder() {
assert(rep_->closed); // Catch errors where caller forgot to call Finish()
delete rep_;
} Status TableBuilder::ChangeOptions(const Options& options) {
// Note: if more fields are added to Options, update
// this function to catch changes that should not be allowed to
// change in the middle of building a Table.
if (options.comparator != rep_->options.comparator) {
return Status::InvalidArgument("changing comparator while building table");
} // Note that any live BlockBuilders point to rep_->options and therefore
// will automatically pick up the updated options.
rep_->options = options;
rep_->index_block_options = options;
rep_->index_block_options.block_restart_interval = ;
return Status::OK();
} void TableBuilder::Add(const Slice& key, const Slice& value) {
Rep* r = rep_;
assert(!r->closed);
if (!ok()) return; //确保Rep没有关闭 并且状态正常 //如果不是添加的table本身的属性 添加的key 必然是有序的的 否则报错
if (r->num_entries > ) {
assert(r->options.comparator->Compare(key, Slice(r->last_key)) > );
} //pending_index_entry标记是否是新创建的一个block
//当新创建一个block时 才可能确认上一个block和新block之间的key的一个分割字符串 记录在lastkey和index_block 方便以后查找key 定位 if (r->pending_index_entry) {
assert(r->data_block.empty());
//comparator 中有 FindShortestSeparator() / FindShortSuccessor()两个接口,
//FindShortestSeparator(start, limit)是获得大于 start 但小于 limit 的最小值。
//FindShortSuccessor(start)是获得比 start 大的最小值。比较都基于 user - commparator,二者会被
//用来确定 sstable 中 block 的 end - key。
r->options.comparator->FindShortestSeparator(&r->last_key, key);
std::string handle_encoding;
r->pending_handle.EncodeTo(&handle_encoding);
r->index_block.Add(r->last_key, Slice(handle_encoding));
r->pending_index_entry = false;
}
//更新lastkey 跟新记录计数 添加data block
r->last_key.assign(key.data(), key.size());
r->num_entries++;
r->data_block.Add(key, value); //data block 大于指定size 进行flush操作
const size_t estimated_block_size = r->data_block.CurrentSizeEstimate();
if (estimated_block_size >= r->options.block_size) {
Flush();
}
} //block flush落盘
void TableBuilder::Flush() {
Rep* r = rep_;
assert(!r->closed);
if (!ok()) return;
if (r->data_block.empty()) return;
assert(!r->pending_index_entry);
WriteBlock(&r->data_block, &r->pending_handle);
if (ok()) {
r->pending_index_entry = true;
r->status = r->file->Flush();
}
} //每个block data 包含 n个字节内容 以及type 1个字节 crc 4个字节
void TableBuilder::WriteBlock(BlockBuilder* block, BlockHandle* handle) {
// File format contains a sequence of blocks where each block has:
// block_data: uint8[n]
// type: uint8
// crc: uint32
assert(ok());
Rep* r = rep_;
Slice raw = block->Finish(); Slice block_contents;
CompressionType type = r->options.compression;
// TODO(postrelease): Support more compression options: zlib?
switch (type) {
case kNoCompression:
block_contents = raw;
break; case kSnappyCompression: {
std::string* compressed = &r->compressed_output;
if (port::Snappy_Compress(raw.data(), raw.size(), compressed) &&
compressed->size() < raw.size() - (raw.size() / 8u)) {
block_contents = *compressed;
} else {
// Snappy not supported, or compressed less than 12.5%, so just
// store uncompressed form
block_contents = raw;
type = kNoCompression;
}
break;
}
}
handle->set_offset(r->offset);
handle->set_size(block_contents.size());
r->status = r->file->Append(block_contents);
if (r->status.ok()) {
char trailer[kBlockTrailerSize];
trailer[] = type;
uint32_t crc = crc32c::Value(block_contents.data(), block_contents.size());
crc = crc32c::Extend(crc, trailer, ); // Extend crc to cover block type
EncodeFixed32(trailer+, crc32c::Mask(crc));
r->status = r->file->Append(Slice(trailer, kBlockTrailerSize));
if (r->status.ok()) {
r->offset += block_contents.size() + kBlockTrailerSize;
}
}
r->compressed_output.clear();
block->Reset();
} Status TableBuilder::status() const {
return rep_->status;
} Status TableBuilder::Finish() {
Rep* r = rep_;
Flush();
assert(!r->closed);
r->closed = true;
BlockHandle metaindex_block_handle;
BlockHandle index_block_handle;
if (ok()) {
BlockBuilder meta_index_block(&r->options);
// TODO(postrelease): Add stats and other meta blocks
WriteBlock(&meta_index_block, &metaindex_block_handle);
}
if (ok()) {
if (r->pending_index_entry) {
r->options.comparator->FindShortSuccessor(&r->last_key);
std::string handle_encoding;
r->pending_handle.EncodeTo(&handle_encoding);
r->index_block.Add(r->last_key, Slice(handle_encoding));
r->pending_index_entry = false;
}
WriteBlock(&r->index_block, &index_block_handle);
}
if (ok()) {
Footer footer;
footer.set_metaindex_handle(metaindex_block_handle);
footer.set_index_handle(index_block_handle);
std::string footer_encoding;
footer.EncodeTo(&footer_encoding);
r->status = r->file->Append(footer_encoding);
if (r->status.ok()) {
r->offset += footer_encoding.size();
}
}
return r->status;
} void TableBuilder::Abandon() {
Rep* r = rep_;
assert(!r->closed);
r->closed = true;
} uint64_t TableBuilder::NumEntries() const {
return rep_->num_entries;
} uint64_t TableBuilder::FileSize() const {
return rep_->offset;
} }

参考

https://blog.****.net/tankles/article/details/7663918

《leveldb实现解析》淘宝 那岩