Skip to content

Commit e397b6b

Browse files
authored
Finalize testing for gcs filesystem (#1400)
* filesystem with the same config should share the same random prefix * `fs` texture returns `uri` instead of each `{filesystem}_fs` * fix matching fields for gcs testbench * add more tests for `gcs` filesystem
1 parent 22f4103 commit e397b6b

File tree

2 files changed

+74
-48
lines changed

2 files changed

+74
-48
lines changed

tests/test_filesystem.py

Lines changed: 48 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929
ROOT_PREFIX = f"tf-io-root-{int(time.time())}/"
3030

3131
# This is the number of attributes each filesystem should return in `*_fs`.
32-
NUM_ATR_FS = 7
32+
NUM_ATR_FS = 6
3333

3434
S3_URI = "s3"
3535
AZ_URI = "az"
@@ -127,7 +127,7 @@ def mkdirs(path):
127127
path += "/"
128128
write(path, b"")
129129

130-
yield S3_URI, path_to, read, write, mkdirs, posixpath.join, (client, bucket_name)
130+
yield path_to, read, write, mkdirs, posixpath.join, (client, bucket_name)
131131
monkeypatch.undo()
132132

133133

@@ -182,7 +182,7 @@ def mkdirs(path):
182182
if path[-1] == "/":
183183
write(path, b"")
184184

185-
yield AZ_URI, path_to, read, write, mkdirs, posixpath.join, (
185+
yield path_to, read, write, mkdirs, posixpath.join, (
186186
client,
187187
container_name,
188188
account,
@@ -196,13 +196,13 @@ def az_dsn_fs(az_fs):
196196
yield [None] * NUM_ATR_FS
197197
return
198198

199-
uri, _, read, write, mkdirs, join, fs_internal = az_fs
199+
_, read, write, mkdirs, join, fs_internal = az_fs
200200
_, container_name, account = fs_internal
201201

202202
def path_to_dsn(*args):
203203
return f"{AZ_URI}://{account}.blob.core.windows.net/{container_name}/{posixpath.join(ROOT_PREFIX, *args)}"
204204

205-
yield uri, path_to_dsn, read, write, mkdirs, join, fs_internal
205+
yield path_to_dsn, read, write, mkdirs, join, fs_internal
206206

207207

208208
@pytest.fixture(scope="module")
@@ -223,10 +223,9 @@ def write(*_):
223223
def mkdirs(_):
224224
pass
225225

226-
yield HTTPS_URI, path_to, read, write, mkdirs, posixpath.join, None
226+
yield path_to, read, write, mkdirs, posixpath.join, None
227227

228228

229-
# TODO(vnvo2409): some tests with `gcs` are falling.
230229
@pytest.fixture(scope="module")
231230
def gcs_fs():
232231
if should_skip(GCS_URI):
@@ -275,29 +274,41 @@ def mkdirs(path):
275274
path += "/"
276275
write(path, b"")
277276

278-
yield GCS_URI, path_to, read, write, mkdirs, posixpath.join, None
277+
yield path_to, read, write, mkdirs, posixpath.join, None
279278
monkeypatch.undo()
280279

281280

282281
@pytest.fixture
283282
def fs(request, s3_fs, az_fs, az_dsn_fs, https_fs, gcs_fs):
284-
uri, path_to, read, write, mkdirs, join, internal = [None] * NUM_ATR_FS
285-
should_skip(request.param, check_only=False)
283+
path_to, read, write, mkdirs, join, internal = [None] * NUM_ATR_FS
284+
test_fs_uri = request.param
285+
real_uri = test_fs_uri
286+
should_skip(test_fs_uri, check_only=False)
287+
288+
if test_fs_uri == S3_URI:
289+
path_to, read, write, mkdirs, join, internal = s3_fs
290+
elif test_fs_uri == AZ_URI:
291+
path_to, read, write, mkdirs, join, internal = az_fs
292+
elif test_fs_uri == AZ_DSN_URI:
293+
real_uri = AZ_URI
294+
path_to, read, write, mkdirs, join, internal = az_dsn_fs
295+
elif test_fs_uri == HTTPS_URI:
296+
path_to, read, write, mkdirs, join, internal = https_fs
297+
elif test_fs_uri == GCS_URI:
298+
path_to, read, write, mkdirs, join, internal = gcs_fs
299+
300+
path_to_rand = None
301+
test_patchs = request.getfixturevalue("patchs")
302+
if (test_fs_uri, test_patchs) in fs.path_to_rand_cache:
303+
path_to_rand = fs.path_to_rand_cache[(test_fs_uri, test_patchs)]
304+
else:
305+
path_to_rand = functools.partial(path_to, str(random.getrandbits(32)))
306+
mkdirs(path_to_rand(""))
307+
fs.path_to_rand_cache[(test_fs_uri, test_patchs)] = path_to_rand
308+
yield real_uri, path_to_rand, read, write, mkdirs, join, internal
286309

287-
if request.param == S3_URI:
288-
uri, path_to, read, write, mkdirs, join, internal = s3_fs
289-
elif request.param == AZ_URI:
290-
uri, path_to, read, write, mkdirs, join, internal = az_fs
291-
elif request.param == AZ_DSN_URI:
292-
uri, path_to, read, write, mkdirs, join, internal = az_dsn_fs
293-
elif request.param == HTTPS_URI:
294-
uri, path_to, read, write, mkdirs, join, internal = https_fs
295-
elif request.param == GCS_URI:
296-
uri, path_to, read, write, mkdirs, join, internal = gcs_fs
297310

298-
path_to_rand = functools.partial(path_to, str(random.getrandbits(32)))
299-
mkdirs(path_to_rand(""))
300-
yield uri, path_to_rand, read, write, mkdirs, join, internal
311+
fs.path_to_rand_cache = {}
301312

302313

303314
@pytest.mark.parametrize(
@@ -328,7 +339,9 @@ def test_io_read_file(fs, patchs, monkeypatch):
328339

329340

330341
@pytest.mark.parametrize(
331-
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (AZ_DSN_URI, None)], indirect=["fs"]
342+
"fs, patchs",
343+
[(S3_URI, None), (AZ_URI, None), (AZ_DSN_URI, None), (GCS_URI, None)],
344+
indirect=["fs"],
332345
)
333346
def test_io_write_file(fs, patchs, monkeypatch):
334347
_, path_to, read, _, _, _, _ = fs
@@ -459,7 +472,7 @@ def test_dataset_from_remote_filename(fs, patchs, monkeypatch):
459472

460473

461474
@pytest.mark.parametrize(
462-
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
475+
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
463476
)
464477
def test_gfile_GFile_writable(fs, patchs, monkeypatch):
465478
uri, path_to, read, _, _, _, _ = fs
@@ -488,7 +501,7 @@ def test_gfile_GFile_writable(fs, patchs, monkeypatch):
488501

489502

490503
@pytest.mark.parametrize(
491-
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
504+
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
492505
)
493506
def test_gfile_isdir(fs, patchs, monkeypatch):
494507
_, path_to, _, write, mkdirs, join, _ = fs
@@ -506,10 +519,10 @@ def test_gfile_isdir(fs, patchs, monkeypatch):
506519

507520

508521
@pytest.mark.parametrize(
509-
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
522+
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
510523
)
511524
def test_gfile_listdir(fs, patchs, monkeypatch):
512-
_, path_to, _, write, mkdirs, join, _ = fs
525+
uri, path_to, _, write, mkdirs, join, _ = fs
513526
mock_patchs(monkeypatch, patchs)
514527

515528
root_path = "test_gfile_listdir"
@@ -519,6 +532,10 @@ def test_gfile_listdir(fs, patchs, monkeypatch):
519532
num_childs = 5
520533
childrens = [None] * num_childs
521534
childrens[0] = join(dname, "subdir")
535+
# TODO(vnvo2409): `gs` filesystem requires `/` at the end of directory's path.
536+
# Consider if we could change the behavior for matching the other filesystems.
537+
if uri == GCS_URI:
538+
childrens[0] += "/"
522539
mkdirs(childrens[0])
523540

524541
body = b"123456789"
@@ -532,7 +549,7 @@ def test_gfile_listdir(fs, patchs, monkeypatch):
532549

533550

534551
@pytest.mark.parametrize(
535-
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
552+
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
536553
)
537554
def test_gfile_makedirs(fs, patchs, monkeypatch):
538555
_, path_to, _, write, _, join, _ = fs
@@ -569,7 +586,7 @@ def test_gfile_remove(fs, patchs, monkeypatch):
569586

570587

571588
@pytest.mark.parametrize(
572-
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
589+
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
573590
)
574591
def test_gfile_rmtree(fs, patchs, monkeypatch):
575592
_, path_to, _, write, mkdirs, join, _ = fs
@@ -646,7 +663,7 @@ def test_gfile_rename(fs, patchs, monkeypatch):
646663

647664

648665
@pytest.mark.parametrize(
649-
"fs, patchs", [(S3_URI, None), (AZ_URI, None)], indirect=["fs"]
666+
"fs, patchs", [(S3_URI, None), (AZ_URI, None), (GCS_URI, None)], indirect=["fs"]
650667
)
651668
def test_gfile_glob(fs, patchs, monkeypatch):
652669
_, path_to, _, write, _, join, _ = fs

tests/test_gcloud/testbench/testbench_utils.py

Lines changed: 26 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,9 @@
1818
import hashlib
1919
import json
2020
import random
21+
import re
22+
23+
field_match = re.compile(r"(?:(\w+)\((\w+(?:,\w+)*)\))|(\w+)")
2124

2225

2326
def filter_fields_from_response(fields, response):
@@ -33,23 +36,29 @@ def filter_fields_from_response(fields, response):
3336
if fields is None:
3437
return json.dumps(response)
3538
tmp = {}
36-
for key in fields.split(","):
37-
key.replace(" ", "")
38-
parentheses_idx = key.find("(")
39-
if parentheses_idx != -1:
40-
main_key = key[:parentheses_idx]
41-
child_key = key[parentheses_idx + 1 : -1]
42-
if main_key in response:
43-
children = response[main_key]
44-
if isinstance(children, list):
45-
tmp_list = []
46-
for value in children:
47-
tmp_list.append(value[child_key])
48-
tmp[main_key] = tmp_list
49-
elif isinstance(children, dict):
50-
tmp[main_key] = children[child_key]
51-
elif key in response:
52-
tmp[key] = response[key]
39+
fields.replace(" ", "")
40+
for keys in field_match.findall(fields):
41+
if keys[2]:
42+
if keys[2] not in response:
43+
continue
44+
tmp[keys[2]] = response[keys[2]]
45+
else:
46+
if keys[0] not in response:
47+
continue
48+
childrens = response[keys[0]]
49+
if isinstance(childrens, list):
50+
tmp_list = []
51+
for children in childrens:
52+
child = {}
53+
for child_key in keys[1].split(","):
54+
child[child_key] = children[child_key]
55+
tmp_list.append(child)
56+
tmp[keys[0]] = tmp_list
57+
elif isinstance(childrens, dict):
58+
child = {}
59+
for child_key in keys[1].split(","):
60+
child[child_key] = children[child_key]
61+
tmp[keys[0]] = child
5362
return json.dumps(tmp)
5463

5564

0 commit comments

Comments
 (0)