diff --git a/algos.yaml b/algos.yaml index e98086ec4..72f581366 100644 --- a/algos.yaml +++ b/algos.yaml @@ -55,9 +55,9 @@ float: constructor: LuceneBatch base-args: ["@metric", "@dimension"] run-groups: - M-4: + M-16: arg-groups: - - {"M": 4, "efConstruction": 500} + - {"M": 16, "efConstruction": 100} query-args: [[10, 20, 40, 80, 120, 200, 400, 600, 800]] luceneknn: diff --git a/ann_benchmarks/algorithms/luceneknn.py b/ann_benchmarks/algorithms/luceneknn.py index cb74b8601..4247a4531 100644 --- a/ann_benchmarks/algorithms/luceneknn.py +++ b/ann_benchmarks/algorithms/luceneknn.py @@ -15,15 +15,13 @@ class LuceneBatch(BaseANN): """ def __init__(self, metric: str, dimension: int, param): - self.name = f"luceneknn dim={dimension} {param}" self.metric = metric self.dimension = dimension self.param = param self.short_name = f"luceneknn-{dimension}-{param['M']}-{param['efConstruction']}" self.n_iters = -1 self.train_size = -1 - #if self.metric not in ("euclidean", "angular"): - if self.metric != "angular": + if self.metric not in ("euclidean", "angular"): raise NotImplementedError(f"Not implemented for metric {self.metric}") def fit(self, X): @@ -44,6 +42,7 @@ def fit(self, X): def set_query_arguments(self, fanout): self.fanout = fanout + self.name = f"luceneknn dim={self.dimension} {self.param} fanout={fanout}" def query(self, q, n): raise NotImplementedError(f"Single query testing not implemented: use -batch mode only") @@ -82,12 +81,14 @@ def get_batch_results(self): assert len(batch_res) == self.n_iters return batch_res + def knn_tester(self, *args): cmd = ['java', '-cp', 'lib/*:classes', '-Xmx2g', '-Xms2g', 'org.apache.lucene.util.hnsw.KnnGraphTester', - '-dim', str(self.dimension) + '-dim', str(self.dimension), + '-metric', str(self.metric) ] + list(args) sys.stderr.write(str(cmd)) subprocess.run(cmd)