diff --git a/db.go b/db.go index 64735c61..314b6669 100755 --- a/db.go +++ b/db.go @@ -582,10 +582,10 @@ func (db *DB) DropColumnFamily(c *ColumnFamilyHandle) error { // // The keys counted will begin at Range.Start and end on the key before // Range.Limit. -func (db *DB) GetApproximateSizes(ranges []Range) []uint64 { +func (db *DB) GetApproximateSizes(ranges []Range) ([]uint64, error) { sizes := make([]uint64, len(ranges)) if len(ranges) == 0 { - return sizes + return sizes, nil } cStarts := make([]*C.char, len(ranges)) @@ -606,6 +606,7 @@ func (db *DB) GetApproximateSizes(ranges []Range) []uint64 { } }() + var cErr *C.char C.rocksdb_approximate_sizes( db.c, C.int(len(ranges)), @@ -613,9 +614,15 @@ func (db *DB) GetApproximateSizes(ranges []Range) []uint64 { &cStartLens[0], &cLimits[0], &cLimitLens[0], - (*C.uint64_t)(&sizes[0])) + (*C.uint64_t)(&sizes[0]), + &cErr) + + if cErr != nil { + defer C.rocksdb_free(unsafe.Pointer(cErr)) + return nil, errors.New(C.GoString(cErr)) + } - return sizes + return sizes, nil } // GetApproximateSizesCF returns the approximate number of bytes of file system @@ -623,10 +630,10 @@ func (db *DB) GetApproximateSizes(ranges []Range) []uint64 { // // The keys counted will begin at Range.Start and end on the key before // Range.Limit. -func (db *DB) GetApproximateSizesCF(cf *ColumnFamilyHandle, ranges []Range) []uint64 { +func (db *DB) GetApproximateSizesCF(cf *ColumnFamilyHandle, ranges []Range) ([]uint64, error) { sizes := make([]uint64, len(ranges)) if len(ranges) == 0 { - return sizes + return sizes, nil } cStarts := make([]*C.char, len(ranges)) @@ -647,6 +654,7 @@ func (db *DB) GetApproximateSizesCF(cf *ColumnFamilyHandle, ranges []Range) []ui } }() + var cErr *C.char C.rocksdb_approximate_sizes_cf( db.c, cf.c, @@ -655,9 +663,15 @@ func (db *DB) GetApproximateSizesCF(cf *ColumnFamilyHandle, ranges []Range) []ui &cStartLens[0], &cLimits[0], &cLimitLens[0], - (*C.uint64_t)(&sizes[0])) + (*C.uint64_t)(&sizes[0]), + &cErr) + + if cErr != nil { + defer C.rocksdb_free(unsafe.Pointer(cErr)) + return nil, errors.New(C.GoString(cErr)) + } - return sizes + return sizes, nil } // SetOptions dynamically changes options through the SetOptions API. diff --git a/filter_policy.go b/filter_policy.go index a9c222b0..afe6696e 100644 --- a/filter_policy.go +++ b/filter_policy.go @@ -46,49 +46,11 @@ func (fp nativeFilterPolicy) Name() string { retur // FilterPolicy (like NewBloomFilterPolicy) that does not ignore // trailing spaces in keys. func NewBloomFilter(bitsPerKey int) FilterPolicy { - return NewNativeFilterPolicy(C.rocksdb_filterpolicy_create_bloom(C.int(bitsPerKey))) + return NewNativeFilterPolicy(C.rocksdb_filterpolicy_create_bloom(C.double(bitsPerKey))) } // NewBloomFilterFull returns a new filter policy created with use_block_based_builder=false // (use full or partitioned filter). func NewBloomFilterFull(bitsPerKey int) FilterPolicy { - return NewNativeFilterPolicy(C.rocksdb_filterpolicy_create_bloom_full(C.int(bitsPerKey))) -} - -// Hold references to filter policies. -var filterPolicies = NewCOWList() - -type filterPolicyWrapper struct { - name *C.char - filterPolicy FilterPolicy -} - -func registerFilterPolicy(fp FilterPolicy) int { - return filterPolicies.Append(filterPolicyWrapper{C.CString(fp.Name()), fp}) -} - -//export gorocksdb_filterpolicy_create_filter -func gorocksdb_filterpolicy_create_filter(idx int, cKeys **C.char, cKeysLen *C.size_t, cNumKeys C.int, cDstLen *C.size_t) *C.char { - rawKeys := charSlice(cKeys, cNumKeys) - keysLen := sizeSlice(cKeysLen, cNumKeys) - keys := make([][]byte, int(cNumKeys)) - for i, len := range keysLen { - keys[i] = charToByte(rawKeys[i], len) - } - - dst := filterPolicies.Get(idx).(filterPolicyWrapper).filterPolicy.CreateFilter(keys) - *cDstLen = C.size_t(len(dst)) - return cByteSlice(dst) -} - -//export gorocksdb_filterpolicy_key_may_match -func gorocksdb_filterpolicy_key_may_match(idx int, cKey *C.char, cKeyLen C.size_t, cFilter *C.char, cFilterLen C.size_t) C.uchar { - key := charToByte(cKey, cKeyLen) - filter := charToByte(cFilter, cFilterLen) - return boolToChar(filterPolicies.Get(idx).(filterPolicyWrapper).filterPolicy.KeyMayMatch(key, filter)) -} - -//export gorocksdb_filterpolicy_name -func gorocksdb_filterpolicy_name(idx int) *C.char { - return filterPolicies.Get(idx).(filterPolicyWrapper).name + return NewNativeFilterPolicy(C.rocksdb_filterpolicy_create_bloom_full(C.double(bitsPerKey))) } diff --git a/gorocksdb.c b/gorocksdb.c index efebbe51..74f0bbf0 100644 --- a/gorocksdb.c +++ b/gorocksdb.c @@ -25,22 +25,6 @@ rocksdb_compactionfilter_t* gorocksdb_compactionfilter_create(uintptr_t idx) { (const char *(*)(void*))(gorocksdb_compactionfilter_name)); } -/* Filter Policy */ - -rocksdb_filterpolicy_t* gorocksdb_filterpolicy_create(uintptr_t idx) { - return rocksdb_filterpolicy_create( - (void*)idx, - gorocksdb_destruct_handler, - (char* (*)(void*, const char* const*, const size_t*, int, size_t*))(gorocksdb_filterpolicy_create_filter), - (unsigned char (*)(void*, const char*, size_t, const char*, size_t))(gorocksdb_filterpolicy_key_may_match), - gorocksdb_filterpolicy_delete_filter, - (const char *(*)(void*))(gorocksdb_filterpolicy_name)); -} - -void gorocksdb_filterpolicy_delete_filter(void* state, const char* v, size_t s) { - free((char*)v); -} - /* Merge Operator */ rocksdb_mergeoperator_t* gorocksdb_mergeoperator_create(uintptr_t idx) { diff --git a/gorocksdb.h b/gorocksdb.h index 4a9968f0..c435a116 100644 --- a/gorocksdb.h +++ b/gorocksdb.h @@ -15,11 +15,6 @@ extern rocksdb_compactionfilter_t* gorocksdb_compactionfilter_create(uintptr_t i extern rocksdb_comparator_t* gorocksdb_comparator_create(uintptr_t idx); -/* Filter Policy */ - -extern rocksdb_filterpolicy_t* gorocksdb_filterpolicy_create(uintptr_t idx); -extern void gorocksdb_filterpolicy_delete_filter(void* state, const char* v, size_t s); - /* Merge Operator */ extern rocksdb_mergeoperator_t* gorocksdb_mergeoperator_create(uintptr_t idx); diff --git a/options.go b/options.go index 07000215..e43647b9 100644 --- a/options.go +++ b/options.go @@ -234,7 +234,8 @@ func (opts *Options) SetParanoidChecks(value bool) { // // For example, you have a flash device with 10GB allocated for the DB, // as well as a hard drive of 2TB, you should config it to be: -// [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] +// +// [{"/flash_path", 10GB}, {"/hard_drive", 2TB}] // // The system will try to guarantee data under each path is close to but // not larger than the target size. But current and future file sizes used @@ -485,19 +486,6 @@ func (opts *Options) SetLevel0StopWritesTrigger(value int) { C.rocksdb_options_set_level0_stop_writes_trigger(opts.c, C.int(value)) } -// SetMaxMemCompactionLevel sets the maximum level -// to which a new compacted memtable is pushed if it does not create overlap. -// -// We try to push to level 2 to avoid the -// relatively expensive level 0=>1 compactions and to avoid some -// expensive manifest file operations. We do not push all the way to -// the largest level since that can generate a lot of wasted disk -// space if the same key space is being repeatedly overwritten. -// Default: 2 -func (opts *Options) SetMaxMemCompactionLevel(value int) { - C.rocksdb_options_set_max_mem_compaction_level(opts.c, C.int(value)) -} - // SetTargetFileSizeBase sets the target file size for compaction. // // Target file size is per-file size for level-1. @@ -540,17 +528,18 @@ func (opts *Options) SetMaxBytesForLevelMultiplier(value float64) { C.rocksdb_options_set_max_bytes_for_level_multiplier(opts.c, C.double(value)) } -// SetLevelCompactiondynamiclevelbytes specifies whether to pick +// SetLevelCompactionDynamiclevelbytes specifies whether to pick // target size of each level dynamically. // // We will pick a base level b >= 1. L0 will be directly merged into level b, // instead of always into level 1. Level 1 to b-1 need to be empty. // We try to pick b and its target size so that -// 1. target size is in the range of -// (max_bytes_for_level_base / max_bytes_for_level_multiplier, -// max_bytes_for_level_base] -// 2. target size of the last level (level num_levels-1) equals to extra size -// of the level. +// 1. target size is in the range of +// (max_bytes_for_level_base / max_bytes_for_level_multiplier, +// max_bytes_for_level_base] +// 2. target size of the last level (level num_levels-1) equals to extra size +// of the level. +// // At the same time max_bytes_for_level_multiplier and // max_bytes_for_level_multiplier_additional are still satisfied. // @@ -739,34 +728,6 @@ func (opts *Options) SetKeepLogFileNum(value int) { C.rocksdb_options_set_keep_log_file_num(opts.c, C.size_t(value)) } -// SetSoftRateLimit sets the soft rate limit. -// -// Puts are delayed 0-1 ms when any level has a compaction score that exceeds -// soft_rate_limit. This is ignored when == 0.0. -// CONSTRAINT: soft_rate_limit <= hard_rate_limit. If this constraint does not -// hold, RocksDB will set soft_rate_limit = hard_rate_limit -// Default: 0.0 (disabled) -func (opts *Options) SetSoftRateLimit(value float64) { - C.rocksdb_options_set_soft_rate_limit(opts.c, C.double(value)) -} - -// SetHardRateLimit sets the hard rate limit. -// -// Puts are delayed 1ms at a time when any level has a compaction score that -// exceeds hard_rate_limit. This is ignored when <= 1.0. -// Default: 0.0 (disabled) -func (opts *Options) SetHardRateLimit(value float64) { - C.rocksdb_options_set_hard_rate_limit(opts.c, C.double(value)) -} - -// SetRateLimitDelayMaxMilliseconds sets the max time -// a put will be stalled when hard_rate_limit is enforced. -// If 0, then there is no limit. -// Default: 1000 -func (opts *Options) SetRateLimitDelayMaxMilliseconds(value uint) { - C.rocksdb_options_set_rate_limit_delay_max_milliseconds(opts.c, C.uint(value)) -} - // SetMaxManifestFileSize sets the maximal manifest file size until is rolled over. // The older manifest file be deleted. // Default: MAX_INT so that roll-over does not take place. @@ -780,19 +741,6 @@ func (opts *Options) SetTableCacheNumshardbits(value int) { C.rocksdb_options_set_table_cache_numshardbits(opts.c, C.int(value)) } -// SetTableCacheRemoveScanCountLimit sets the count limit during a scan. -// -// During data eviction of table's LRU cache, it would be inefficient -// to strictly follow LRU because this piece of memory will not really -// be released unless its refcount falls to zero. Instead, make two -// passes: the first pass will release items with refcount = 1, -// and if not enough space releases after scanning the number of -// elements specified by this parameter, we will remove items in LRU order. -// Default: 16 -func (opts *Options) SetTableCacheRemoveScanCountLimit(value int) { - C.rocksdb_options_set_table_cache_remove_scan_count_limit(opts.c, C.int(value)) -} - // SetArenaBlockSize sets the size of one block in arena memory allocation. // // If <= 0, a proper value is automatically calculated (usually 1/10 of @@ -821,17 +769,18 @@ func (opts *Options) SetWALRecoveryMode(mode WALRecoveryMode) { // SetWALTtlSeconds sets the WAL ttl in seconds. // // The following two options affect how archived logs will be deleted. -// 1. If both set to 0, logs will be deleted asap and will not get into -// the archive. -// 2. If wal_ttl_seconds is 0 and wal_size_limit_mb is not 0, -// WAL files will be checked every 10 min and if total size is greater -// then wal_size_limit_mb, they will be deleted starting with the -// earliest until size_limit is met. All empty files will be deleted. -// 3. If wal_ttl_seconds is not 0 and wall_size_limit_mb is 0, then -// WAL files will be checked every wal_ttl_seconds / 2 and those that -// are older than wal_ttl_seconds will be deleted. -// 4. If both are not 0, WAL files will be checked every 10 min and both -// checks will be performed with ttl being first. +// 1. If both set to 0, logs will be deleted asap and will not get into +// the archive. +// 2. If wal_ttl_seconds is 0 and wal_size_limit_mb is not 0, +// WAL files will be checked every 10 min and if total size is greater +// then wal_size_limit_mb, they will be deleted starting with the +// earliest until size_limit is met. All empty files will be deleted. +// 3. If wal_ttl_seconds is not 0 and wall_size_limit_mb is 0, then +// WAL files will be checked every wal_ttl_seconds / 2 and those that +// are older than wal_ttl_seconds will be deleted. +// 4. If both are not 0, WAL files will be checked every 10 min and both +// checks will be performed with ttl being first. +// // Default: 0 func (opts *Options) SetWALTtlSeconds(value uint64) { C.rocksdb_options_set_WAL_ttl_seconds(opts.c, C.uint64_t(value)) @@ -864,13 +813,6 @@ func (opts *Options) SetManifestPreallocationSize(value int) { C.rocksdb_options_set_manifest_preallocation_size(opts.c, C.size_t(value)) } -// SetPurgeRedundantKvsWhileFlush enable/disable purging of -// duplicate/deleted keys when a memtable is flushed to storage. -// Default: true -func (opts *Options) SetPurgeRedundantKvsWhileFlush(value bool) { - C.rocksdb_options_set_purge_redundant_kvs_while_flush(opts.c, boolToChar(value)) -} - // SetAllowMmapReads enable/disable mmap reads for reading sst tables. // Default: false func (opts *Options) SetAllowMmapReads(value bool) { @@ -902,14 +844,6 @@ func (opts *Options) SetIsFdCloseOnExec(value bool) { C.rocksdb_options_set_is_fd_close_on_exec(opts.c, boolToChar(value)) } -// SetSkipLogErrorOnRecovery enable/disable skipping of -// log corruption error on recovery (If client is ok with -// losing most recent changes) -// Default: false -func (opts *Options) SetSkipLogErrorOnRecovery(value bool) { - C.rocksdb_options_set_skip_log_error_on_recovery(opts.c, boolToChar(value)) -} - // SetStatsDumpPeriodSec sets the stats dump period in seconds. // // If not zero, dump stats to LOG every stats_dump_period_sec @@ -1036,7 +970,9 @@ func (opts *Options) SetInplaceUpdateNumLocks(value int) { // If <=0, it won't allocate from huge page but from malloc. // Users are responsible to reserve huge pages for it to be allocated. For // example: -// sysctl -w vm.nr_hugepages=20 +// +// sysctl -w vm.nr_hugepages=20 +// // See linux doc Documentation/vm/hugetlbpage.txt // If there isn't enough free huge page available, it will fall back to // malloc. @@ -1104,7 +1040,8 @@ func (opts *Options) SetMemtableVectorRep() { // bucketCount: number of fixed array buckets // skiplistHeight: the max height of the skiplist // skiplistBranchingFactor: probabilistic size ratio between adjacent -// link lists in the skiplist +// +// link lists in the skiplist func (opts *Options) SetHashSkipListRep(bucketCount int, skiplistHeight, skiplistBranchingFactor int32) { C.rocksdb_options_set_hash_skip_list_rep(opts.c, C.size_t(bucketCount), C.int32_t(skiplistHeight), C.int32_t(skiplistBranchingFactor)) } @@ -1119,26 +1056,6 @@ func (opts *Options) SetHashLinkListRep(bucketCount int) { C.rocksdb_options_set_hash_link_list_rep(opts.c, C.size_t(bucketCount)) } -// SetPlainTableFactory sets a plain table factory with prefix-only seek. -// -// For this factory, you need to set prefix_extractor properly to make it -// work. Look-up will starts with prefix hash lookup for key prefix. Inside the -// hash bucket found, a binary search is executed for hash conflicts. Finally, -// a linear search is used. -// -// keyLen: plain table has optimization for fix-sized keys, -// which can be specified via keyLen. -// bloomBitsPerKey: the number of bits used for bloom filer per prefix. You -// may disable it by passing a zero. -// hashTableRatio: the desired utilization of the hash table used for prefix -// hashing. hashTableRatio = number of prefixes / #buckets -// in the hash table -// indexSparseness: inside each prefix, need to build one index record for how -// many keys for binary search inside each hash bucket. -func (opts *Options) SetPlainTableFactory(keyLen uint32, bloomBitsPerKey int, hashTableRatio float64, indexSparseness int) { - C.rocksdb_options_set_plain_table_factory(opts.c, C.uint32_t(keyLen), C.int(bloomBitsPerKey), C.double(hashTableRatio), C.size_t(indexSparseness)) -} - // SetCreateIfMissingColumnFamilies specifies whether the column families // should be created if they are missing. func (opts *Options) SetCreateIfMissingColumnFamilies(value bool) { diff --git a/options_block_based_table.go b/options_block_based_table.go index 80244132..40e04b4c 100644 --- a/options_block_based_table.go +++ b/options_block_based_table.go @@ -144,18 +144,18 @@ func (opts *BlockBasedTableOptions) SetUseDeltaEncoding(value bool) { C.rocksdb_block_based_options_set_use_delta_encoding(opts.c, boolToChar(value)) } -// SetFilterPolicy sets the filter policy opts reduce disk reads. +// SetFilterPolicy sets the filter policy to reduce disk reads. // Many applications will benefit from passing the result of // NewBloomFilterPolicy() here. // Default: nil func (opts *BlockBasedTableOptions) SetFilterPolicy(fp FilterPolicy) { if nfp, ok := fp.(nativeFilterPolicy); ok { opts.cFp = nfp.c + C.rocksdb_block_based_options_set_filter_policy(opts.c, opts.cFp) } else { - idx := registerFilterPolicy(fp) - opts.cFp = C.gorocksdb_filterpolicy_create(C.uintptr_t(idx)) + // Custom filter policies are not supported in this RocksDB version + panic("custom filter policies are not supported, use NewBloomFilter() or NewBloomFilterFull()") } - C.rocksdb_block_based_options_set_filter_policy(opts.c, opts.cFp) } // SetNoBlockCache specify whether block cache should be used or not. @@ -175,14 +175,6 @@ func (opts *BlockBasedTableOptions) SetBlockCache(cache *Cache) { C.rocksdb_block_based_options_set_block_cache(opts.c, cache.c) } -// SetBlockCacheCompressed sets the cache for compressed blocks. -// If nil, rocksdb will not use a compressed block cache. -// Default: nil -func (opts *BlockBasedTableOptions) SetBlockCacheCompressed(cache *Cache) { - opts.compCache = cache - C.rocksdb_block_based_options_set_block_cache_compressed(opts.c, cache.c) -} - // SetWholeKeyFiltering specify if whole keys in the filter (not just prefixes) // should be placed. // This must generally be true for gets opts be efficient.