Skip to content

Commit d7ded04

Browse files
Initial support for keys following zipfian (#166)
* Initial support for keys following zipfian This patch allows memtier generate keys that follows zipfian distribution. An additional parameter --key-zipf-exp is introduced, meaning P(Key = n) ~ n^{-exp}, which is bounded to (0, 5) to be sane. The range of keys are limited to positive in this version. Signed-off-by: Su Lifan <[email protected]> * update bash-completion for zipf argument Signed-off-by: Su Lifan <[email protected]> * Add test to check Zipfian key distribution - tracks and counts command execution using redis-py Monitor class * Add test to validate impact of Zipfian's exponent on key distribution - Refactor Zipfian tests to avoid repetition * Enhance --help docs for key-zipf-exp option to clarify impact of higher exponents --------- Signed-off-by: Su Lifan <[email protected]> Co-authored-by: Paulo Sousa <[email protected]>
1 parent fef9718 commit d7ded04

File tree

9 files changed

+387
-4
lines changed

9 files changed

+387
-4
lines changed

bash-completion/memtier_benchmark

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ _memtier_completions()
2727

2828
options_no_args=("--debug" "--show-config" "--hide-histogram" "--distinct-client-seed" "--randomize"\
2929
"--random-data" "--data-verify" "--verify-only" "--generate-keys" "--key-stddev"\
30-
"--key-median" "--no-expiry" "--cluster-mode" "--help" "--version"\
30+
"--key-median" "--key-zipf-exp" "--no-expiry" "--cluster-mode" "--help" "--version"\
3131
"-D" "-R" "-h" "-v")
3232

3333
options_comp=("--protocol" "-P" "--key-pattern" "--data-size-pattern" "--command-key-pattern")

client.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -153,6 +153,8 @@ class client : public connections_manager {
153153
return OBJECT_GENERATOR_KEY_RANDOM;
154154
} else if (cfg->key_pattern[index] == 'G') {
155155
return OBJECT_GENERATOR_KEY_GAUSSIAN;
156+
} else if (cfg->key_pattern[index] == 'Z') {
157+
return OBJECT_GENERATOR_KEY_ZIPFIAN;
156158
} else {
157159
if (index == key_pattern_set)
158160
return OBJECT_GENERATOR_KEY_SET_ITER;
@@ -167,6 +169,8 @@ class client : public connections_manager {
167169
return OBJECT_GENERATOR_KEY_RANDOM;
168170
} else if (cmd.key_pattern == 'G') {
169171
return OBJECT_GENERATOR_KEY_GAUSSIAN;
172+
} else if (cmd.key_pattern == 'Z') {
173+
return OBJECT_GENERATOR_KEY_ZIPFIAN;
170174
} else {
171175
return index;
172176
}

memtier_benchmark.1

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ Key ID maximum value (default: 10000000)
209209
\fB\-\-key\-pattern\fR=\fI\,PATTERN\/\fR
210210
Set:Get pattern (default: R:R)
211211
G for Gaussian distribution.
212+
Z for Zipfian distribution (will limit keys to positive).
212213
R for uniform Random.
213214
S for Sequential.
214215
P for Parallel (Sequential were each client has a subset of the key\-range).
@@ -220,6 +221,10 @@ The standard deviation used in the Gaussian distribution
220221
\fB\-\-key\-median\fR
221222
The median point used in the Gaussian distribution
222223
(default is the center of the key range)
224+
.TP
225+
\fB\-\-key\-zipf\-exp\fR
226+
The exponent used in the zipf distribution, limit to (0, 5)
227+
(default is 1, though any number >2 seems insane)\n
223228
.SS "WAIT Options:"
224229
.TP
225230
\fB\-\-wait\-ratio\fR=\fI\,RATIO\/\fR

memtier_benchmark.cpp

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ static void config_print_to_json(json_handler * jsonhandler, struct benchmark_co
261261
jsonhandler->write_obj("key_pattern" ,"\"%s\"", cfg->key_pattern);
262262
jsonhandler->write_obj("key_stddev" ,"%f", cfg->key_stddev);
263263
jsonhandler->write_obj("key_median" ,"%f", cfg->key_median);
264+
jsonhandler->write_obj("key_zipf_exp" ,"%f", cfg->key_zipf_exp);
264265
jsonhandler->write_obj("reconnect_interval","%u", cfg->reconnect_interval);
265266
jsonhandler->write_obj("multi_key_get" ,"%u", cfg->multi_key_get);
266267
jsonhandler->write_obj("authenticate" ,"\"%s\"", cfg->authenticate ? cfg->authenticate : "");
@@ -403,6 +404,7 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
403404
o_key_pattern,
404405
o_key_stddev,
405406
o_key_median,
407+
o_key_zipf_exp,
406408
o_show_config,
407409
o_hide_histogram,
408410
o_print_percentiles,
@@ -486,6 +488,7 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
486488
{ "key-pattern", 1, 0, o_key_pattern },
487489
{ "key-stddev", 1, 0, o_key_stddev },
488490
{ "key-median", 1, 0, o_key_median },
491+
{ "key-zipf-exp", 1, 0, o_key_zipf_exp},
489492
{ "reconnect-interval", 1, 0, o_reconnect_interval },
490493
{ "multi-key-get", 1, 0, o_multi_key_get },
491494
{ "authenticate", 1, 0, 'a' },
@@ -754,19 +757,29 @@ static int config_parse_args(int argc, char *argv[], struct benchmark_config *cf
754757
return -1;
755758
}
756759
break;
760+
case o_key_zipf_exp:
761+
endptr = NULL;
762+
cfg->key_zipf_exp = strtod(optarg, &endptr);
763+
if (cfg->key_zipf_exp <= 0 || cfg->key_zipf_exp >= 5 || !endptr || *endptr != '\0') {
764+
fprintf(stderr, "error: key-zipf-exp must be within interval (0, 5).\n");
765+
return -1;
766+
}
767+
break;
757768
case o_key_pattern:
758769
cfg->key_pattern = optarg;
759770

760771
if (strlen(cfg->key_pattern) != 3 || cfg->key_pattern[key_pattern_delimiter] != ':' ||
761772
(cfg->key_pattern[key_pattern_set] != 'R' &&
762773
cfg->key_pattern[key_pattern_set] != 'S' &&
763774
cfg->key_pattern[key_pattern_set] != 'G' &&
775+
cfg->key_pattern[key_pattern_set] != 'Z' &&
764776
cfg->key_pattern[key_pattern_set] != 'P') ||
765777
(cfg->key_pattern[key_pattern_get] != 'R' &&
766778
cfg->key_pattern[key_pattern_get] != 'S' &&
767779
cfg->key_pattern[key_pattern_get] != 'G' &&
780+
cfg->key_pattern[key_pattern_get] != 'Z' &&
768781
cfg->key_pattern[key_pattern_get] != 'P')) {
769-
fprintf(stderr, "error: key-pattern must be in the format of [S/R/G/P]:[S/R/G/P].\n");
782+
fprintf(stderr, "error: key-pattern must be in the format of [S/R/G/P/Z]:[S/R/G/P/Z].\n");
770783
return -1;
771784
}
772785

@@ -1047,12 +1060,16 @@ void usage() {
10471060
" --key-pattern=PATTERN Set:Get pattern (default: R:R)\n"
10481061
" G for Gaussian distribution.\n"
10491062
" R for uniform Random.\n"
1063+
" Z for zipf distribution (will limit keys to positive).\n"
10501064
" S for Sequential.\n"
10511065
" P for Parallel (Sequential were each client has a subset of the key-range).\n"
10521066
" --key-stddev The standard deviation used in the Gaussian distribution\n"
10531067
" (default is key range / 6)\n"
10541068
" --key-median The median point used in the Gaussian distribution\n"
10551069
" (default is the center of the key range)\n"
1070+
" --key-zipf-exp The exponent used in the zipf distribution, limit to (0, 5)\n"
1071+
" Higher exponents result in higher concentration in top keys\n"
1072+
" (default is 1, though any number >2 seems insane)\n"
10561073
"\n"
10571074
"WAIT Options:\n"
10581075
" --wait-ratio=RATIO Set:Wait ratio (default is no WAIT commands - 1:0)\n"
@@ -1611,6 +1628,13 @@ int main(int argc, char *argv[])
16111628
obj_gen->set_key_distribution(cfg.key_stddev, cfg.key_median);
16121629
}
16131630
obj_gen->set_expiry_range(cfg.expiry_range.min, cfg.expiry_range.max);
1631+
if (cfg.key_pattern[key_pattern_set] == 'Z' || cfg.key_pattern[key_pattern_get] == 'Z') {
1632+
if (cfg.key_zipf_exp == 0.0) {
1633+
// user can't specify 0.0, so 0.0 means unset
1634+
cfg.key_zipf_exp = 1.0;
1635+
}
1636+
obj_gen->set_key_zipf_distribution(cfg.key_zipf_exp);
1637+
}
16141638

16151639
// Prepare output file
16161640
FILE *outfile;

memtier_benchmark.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ struct benchmark_config {
8989
unsigned long long key_maximum;
9090
double key_stddev;
9191
double key_median;
92+
double key_zipf_exp;
9293
const char *key_pattern;
9394
unsigned int reconnect_interval;
9495
int multi_key_get;

obj_gen.cpp

Lines changed: 105 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,6 +150,14 @@ object_generator::object_generator(size_t n_key_iterators/*= OBJECT_GENERATOR_KE
150150
m_key_max(0),
151151
m_key_stddev(0),
152152
m_key_median(0),
153+
m_key_zipf_min(0),
154+
m_key_zipf_max(0),
155+
m_key_zipf_exp(1),
156+
m_key_zipf_1mexp(0),
157+
m_key_zipf_1mexpInv(0),
158+
m_key_zipf_Hmin(0),
159+
m_key_zipf_Hmax(0),
160+
m_key_zipf_s(0),
153161
m_value_buffer(NULL),
154162
m_random_fd(-1),
155163
m_value_buffer_size(0),
@@ -172,6 +180,14 @@ object_generator::object_generator(const object_generator& copy) :
172180
m_key_max(copy.m_key_max),
173181
m_key_stddev(copy.m_key_stddev),
174182
m_key_median(copy.m_key_median),
183+
m_key_zipf_min(copy.m_key_zipf_min),
184+
m_key_zipf_max(copy.m_key_zipf_max),
185+
m_key_zipf_exp(copy.m_key_zipf_exp),
186+
m_key_zipf_1mexp(copy.m_key_zipf_1mexp),
187+
m_key_zipf_1mexpInv(copy.m_key_zipf_1mexpInv),
188+
m_key_zipf_Hmin(copy.m_key_zipf_Hmin),
189+
m_key_zipf_Hmax(copy.m_key_zipf_Hmax),
190+
m_key_zipf_s(copy.m_key_zipf_s),
175191
m_value_buffer(NULL),
176192
m_random_fd(-1),
177193
m_value_buffer_size(0),
@@ -348,6 +364,47 @@ void object_generator::set_key_distribution(double key_stddev, double key_median
348364
m_key_median = key_median;
349365
}
350366

367+
// should be called after set_key_range in memtier_benchmark.cpp
368+
void object_generator::set_key_zipf_distribution(double key_exp)
369+
{
370+
const double eps = 1e-4;
371+
372+
if (key_exp < eps)
373+
m_key_zipf_exp = 0.;
374+
else if (fabs(key_exp - 1) < eps)
375+
m_key_zipf_exp = 1.;
376+
else
377+
m_key_zipf_exp = key_exp;
378+
379+
if (m_key_min == 0)
380+
m_key_zipf_min = 1;
381+
else
382+
m_key_zipf_min = m_key_min;
383+
384+
if (m_key_max <= m_key_zipf_min)
385+
m_key_zipf_max = m_key_zipf_min;
386+
else
387+
m_key_zipf_max = m_key_max;
388+
389+
if (m_key_zipf_exp < eps)
390+
return; // degenerated to uniform distribution
391+
else if (fabs(key_exp - 1) < eps) {
392+
m_key_zipf_Hmin = log(m_key_zipf_min + 0.5) - 1. / m_key_zipf_min;
393+
m_key_zipf_Hmax = log(m_key_zipf_max + 0.5);
394+
double t = log(m_key_zipf_min + 1.5) - 1. / (m_key_zipf_min + 1);
395+
m_key_zipf_s = m_key_zipf_min + 1 - exp(t);
396+
} else {
397+
m_key_zipf_1mexp = 1. - m_key_zipf_exp;
398+
m_key_zipf_1mexpInv = 1. / m_key_zipf_1mexp;
399+
m_key_zipf_Hmin = pow(m_key_zipf_min + 0.5, m_key_zipf_1mexp) -
400+
m_key_zipf_1mexp * pow(m_key_zipf_min, -m_key_zipf_exp);
401+
m_key_zipf_Hmax = pow(m_key_zipf_max + 0.5, m_key_zipf_1mexp);
402+
double t = pow(m_key_zipf_min + 1.5, m_key_zipf_1mexp) -
403+
m_key_zipf_1mexp * pow(m_key_zipf_min + 1, -m_key_zipf_exp);
404+
m_key_zipf_s = m_key_zipf_min + 1 - pow(t, m_key_zipf_1mexpInv);
405+
}
406+
}
407+
351408
// return a random number between r_min and r_max
352409
unsigned long long object_generator::random_range(unsigned long long r_min, unsigned long long r_max)
353410
{
@@ -361,15 +418,62 @@ unsigned long long object_generator::normal_distribution(unsigned long long r_mi
361418
return m_random.gaussian_distribution_range(r_stddev, r_median, r_min, r_max);
362419
}
363420

421+
// following sampler is based on:
422+
// Rejection-inversion to generate variates from monotone discrete distributions
423+
// ACM Transactions on Modeling and Computer Simulation.
424+
// Volume 6 Issue 3 July 1996 pp 169–184
425+
// https://doi.org/10.1145/235025.235029
426+
unsigned long long object_generator::zipf_distribution()
427+
{
428+
const double eps = 1e-4;
429+
430+
if (m_key_zipf_exp < eps)
431+
return random_range(m_key_zipf_min, m_key_zipf_max);
432+
else if (fabs(m_key_zipf_exp - 1.0) < eps) {
433+
while (true) {
434+
double p = m_random.get_random() / (double)(m_random.get_random_max());
435+
double u = p * (m_key_zipf_Hmax - m_key_zipf_Hmin) + m_key_zipf_Hmin;
436+
double x = exp(u);
437+
if (x < m_key_zipf_min - 0.5)
438+
x = m_key_zipf_min + 0.5;
439+
if (x >= m_key_zipf_max + 0.5)
440+
x = m_key_zipf_max;
441+
double k = floor(x + 0.5);
442+
if (k - x <= m_key_zipf_s)
443+
return k;
444+
if (u > log(k + 0.5) - 1. / k)
445+
return k;
446+
}
447+
} else {
448+
while (true) {
449+
double p = m_random.get_random() / (double)(m_random.get_random_max());
450+
double u = p * (m_key_zipf_Hmax - m_key_zipf_Hmin) + m_key_zipf_Hmin;
451+
double x = pow(u, m_key_zipf_1mexpInv);
452+
if (x < m_key_zipf_min - 0.5)
453+
x = m_key_zipf_min + 0.5;
454+
if (x >= m_key_zipf_max + 0.5)
455+
x = m_key_zipf_max;
456+
double k = floor(x + 0.5);
457+
if (k - x <= m_key_zipf_s)
458+
return k;
459+
double t = (u - pow(k + 0.5, m_key_zipf_1mexp));
460+
if (m_key_zipf_1mexpInv * t > -pow(k, -m_key_zipf_exp))
461+
return k;
462+
}
463+
}
464+
}
465+
364466
unsigned long long object_generator::get_key_index(int iter)
365467
{
366-
assert(iter < static_cast<int>(m_next_key.size()) && iter >= OBJECT_GENERATOR_KEY_GAUSSIAN);
468+
assert(iter < static_cast<int>(m_next_key.size()) && iter >= OBJECT_GENERATOR_KEY_ZIPFIAN);
367469

368470
unsigned long long k;
369471
if (iter==OBJECT_GENERATOR_KEY_RANDOM) {
370472
k = random_range(m_key_min, m_key_max);
371473
} else if(iter==OBJECT_GENERATOR_KEY_GAUSSIAN) {
372474
k = normal_distribution(m_key_min, m_key_max, m_key_stddev, m_key_median);
475+
} else if(iter == OBJECT_GENERATOR_KEY_ZIPFIAN) {
476+
k = zipf_distribution();
373477
} else {
374478
if (m_next_key[iter] < m_key_min)
375479
m_next_key[iter] = m_key_min;

obj_gen.h

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,14 +47,15 @@ class gaussian_noise: public random_generator {
4747
private:
4848
double gaussian_distribution(const double &stddev);
4949
bool m_hasSpare;
50-
double m_spare;
50+
double m_spare;
5151
};
5252

5353
#define OBJECT_GENERATOR_KEY_ITERATORS 2 /* number of iterators */
5454
#define OBJECT_GENERATOR_KEY_SET_ITER 1
5555
#define OBJECT_GENERATOR_KEY_GET_ITER 0
5656
#define OBJECT_GENERATOR_KEY_RANDOM -1
5757
#define OBJECT_GENERATOR_KEY_GAUSSIAN -2
58+
#define OBJECT_GENERATOR_KEY_ZIPFIAN -3
5859

5960
class object_generator {
6061
public:
@@ -79,6 +80,18 @@ class object_generator {
7980
double m_key_stddev;
8081
double m_key_median;
8182

83+
// zipf will only be used for key generation
84+
// adjusted min and max key for zipf, may be difference from user specified
85+
unsigned long long m_key_zipf_min;
86+
unsigned long long m_key_zipf_max;
87+
// other persist data across generations
88+
double m_key_zipf_exp;
89+
double m_key_zipf_1mexp;
90+
double m_key_zipf_1mexpInv;
91+
double m_key_zipf_Hmin;
92+
double m_key_zipf_Hmax;
93+
double m_key_zipf_s;
94+
8295
std::vector<unsigned long long> m_next_key;
8396

8497
unsigned long long m_key_index;
@@ -102,6 +115,7 @@ class object_generator {
102115

103116
unsigned long long random_range(unsigned long long r_min, unsigned long long r_max);
104117
unsigned long long normal_distribution(unsigned long long r_min, unsigned long long r_max, double r_stddev, double r_median);
118+
unsigned long long zipf_distribution();
105119

106120
void set_random_data(bool random_data);
107121
void set_data_size_fixed(unsigned int size);
@@ -112,6 +126,7 @@ class object_generator {
112126
void set_key_prefix(const char *key_prefix);
113127
void set_key_range(unsigned long long key_min, unsigned long long key_max);
114128
void set_key_distribution(double key_stddev, double key_median);
129+
void set_key_zipf_distribution(double key_exp);
115130
void set_random_seed(int seed);
116131
unsigned long long get_key_index(int iter);
117132
void generate_key(unsigned long long key_index);

0 commit comments

Comments
 (0)