From 9e795910079d3277cba44f49b008ec093617ae57 Mon Sep 17 00:00:00 2001 From: halloju Date: Thu, 28 Oct 2021 17:46:44 +0800 Subject: [PATCH 1/3] feat. add get_item_links method --- src/solid/solid_api.py | 8 +++++++- tests/test_solid_api.py | 6 +++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/solid/solid_api.py b/src/solid/solid_api.py index 2b7c1d0..ed3b15d 100644 --- a/src/solid/solid_api.py +++ b/src/solid/solid_api.py @@ -219,7 +219,13 @@ def read_folder(self, url, options: ReadFolderOptions = ReadFolderOptions()) -> return parsed_folder def get_item_links(self, url, options: Dict = None) -> Response: - raise Exception('Not implemented') + if not self.item_exists(url): + raise Exception(f'Item not found: {url}') + + response = self.get(url) + + return response.links + def copy_file(self, _from, to, options: WriteOptions = None) -> Response: raise Exception('Not implemented') diff --git a/tests/test_solid_api.py b/tests/test_solid_api.py index 36f9748..02fbc93 100644 --- a/tests/test_solid_api.py +++ b/tests/test_solid_api.py @@ -171,6 +171,6 @@ def test_file(): patchedBody = '<> dct:title "This is a test file"; contact:personalTitle "Dr.".' assert parse_turtle(resp.text) == patchedBody - - - + # get item links + links = api.get_item_links(patchedUrl) + assert links['type'] == {'rel': 'type', 'url': 'http://www.w3.org/ns/ldp#Resource'} \ No newline at end of file From d5977a036c5584e789e57194cf6371078c940eb1 Mon Sep 17 00:00:00 2001 From: halloju Date: Sun, 31 Oct 2021 20:27:56 +0800 Subject: [PATCH 2/3] fix. get item links from parsing header --- src/solid/solid_api.py | 22 ++++++-- src/solid/utils/api_util.py | 105 ++++++++++++++++++++++++++++++++++++ tests/test_solid_api.py | 2 +- 3 files changed, 123 insertions(+), 6 deletions(-) diff --git a/src/solid/solid_api.py b/src/solid/solid_api.py index a8c1d99..e6ee461 100644 --- a/src/solid/solid_api.py +++ b/src/solid/solid_api.py @@ -5,7 +5,7 @@ from httpx import Response, HTTPStatusError from solid.auth import Auth -from solid.utils.api_util import get_root_url, LINK, get_parent_url, get_item_name +from solid.utils.api_util import get_root_url, LINK, get_parent_url, get_item_name, get_links_from_response from solid.utils.folder_utils import parse_folder_response @@ -215,14 +215,26 @@ def read_folder(self, url, options: ReadFolderOptions = ReadFolderOptions()) -> return parsed_folder - def get_item_links(self, url, options: Dict = None) -> Response: + def get_item_links(self, url, options: Dict = {}) -> Response: if not self.item_exists(url): raise Exception(f'Item not found: {url}') - - response = self.get(url) - return response.links + options.update({ + "links": LINKS.INCLUDE_POSSIBLE, + "withAcl": True, + "withMeta": True, + }) + if (options['links'] == LINKS.EXCLUDE): + raise Exception(f'Invalid option LINKS.EXCLUDE for getItemLinks') + + response = self.head(url) + links = get_links_from_response(response) + if (options['links'] == LINKS.INCLUDE): + if (not options.withAcl): del links['acl'] + if (not options.withMeta): del links['meta'] + + return links def copy_file(self, _from, to, options: WriteOptions = None) -> Response: raise Exception('Not implemented') diff --git a/src/solid/utils/api_util.py b/src/solid/utils/api_util.py index caefd5b..e9f6285 100644 --- a/src/solid/utils/api_util.py +++ b/src/solid/utils/api_util.py @@ -1,5 +1,6 @@ from enum import Enum from typing import List +import re class LINK(Enum): @@ -59,3 +60,107 @@ def are_folders(urls: List) -> bool: def are_files(urls: List) -> bool: pass + +def get_links_from_response(response) -> dict: + link_header = response.headers.get('link') + if (not link_header) or (link_header == ''): + return {} + else: + return parse_link_header(link_header, response.url) + +def parse_link_header(link_header: str, item_url) -> dict: + link_dict = {} + link_header_list = parse_link_header_to_array(link_header) + if len(link_header_list) > 0: + for link in link_header_list: + url = link[link.index('<')+ 1:link.index('>')] + original_rel = link[link.index('rel="')+ 5:link.rindex('"')] + if (original_rel.lower() == 'describedby'): + rel = 'meta' + else: + rel = original_rel + + if rel in ['meta', 'acl']: + link_dict[rel] = url_join(url, item_url) + + return link_dict + +def parse_link_header_to_array(link_header: str) -> list: + if (not link_header): return + linkexp = '<[^>]*>\s*(\s*;\s*[^()<>@,;:"/[\]?={} \t]+=(([^\(\)<>@,;:"\/\[\]\?={} \t]+)|("[^"]*")))*(,|$)' + match = re.finditer(linkexp, link_header) + links = [x.group() for x in match] + return links + +def url_join(given, base): + base = str(base) + base_hash = base.find('#') + if (base_hash > 0): + base = base[0:base_hash] + + if (len(given) == 0): + return base + + if (given.find('#') == 0): + return base + given + + colon = given.find(':') + if (colon >= 0) : + return given + + base_colon = base.find(':') + if (len(base) == 0) : + return given + + if (base_colon < 0) : + return given + + if (+ base_colon + 1): + end_index = +base_colon + 1 + else: + end_index = 9e9 + + base_scheme = base[:end_index] + if (given.find('//') == 0) : + return base_scheme + given + + if (base.find('//', base_colon) == base_colon + 1): + base_single = base.find('/', base_colon + 3) + if (base_single < 0): + if (len(base) - base_colon - 3 > 0): + return base + '/' + given + else: + return base_scheme + given + + else: + base_single = base.find('/', base_colon + 1) + if (base_single < 0): + if (len(base) - base_colon - 1 > 0) : + return base + '/' + given + else: + return base_scheme + given + + if (given.find('/') == 0) : + return base[:base_single] + given + + path = base[base_single:] + try: + last_slash = path.rindex('/') + except: + return base_scheme + given + + if (last_slash >= 0 and last_slash < len(path) - 1) : + if (+last_slash + 1): + end_index = +last_slash + 1 + else: + end_index = 9e9 + path = path[:end_index] + + path += given + while (re.match("[^\/]*\/\.\.\/", path)) : + path = re.sub("[^\/]*\/\.\.\/", '', path, 1) + + path = re.sub("\.\/", '', path) + path = re.sub("\/\.$", '/', path) + return base[:base_single] + path + diff --git a/tests/test_solid_api.py b/tests/test_solid_api.py index abccf71..167a9d0 100644 --- a/tests/test_solid_api.py +++ b/tests/test_solid_api.py @@ -169,4 +169,4 @@ def test_file(): # get item links links = api.get_item_links(patchedUrl) - assert links['type'] == {'rel': 'type', 'url': 'http://www.w3.org/ns/ldp#Resource'} \ No newline at end of file + assert links == {'acl': "{}.acl".format(patchedUrl), 'meta': "{}.meta".format(patchedUrl)} \ No newline at end of file From 2dbca658df0ef8e89854d36e03130aed04e55c02 Mon Sep 17 00:00:00 2001 From: halloju Date: Tue, 9 Nov 2021 20:41:47 +0800 Subject: [PATCH 3/3] fix. typing and f-string --- src/solid/solid_api.py | 2 +- src/solid/utils/api_util.py | 8 ++++---- tests/integration_test_solid_api.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/solid/solid_api.py b/src/solid/solid_api.py index e6ee461..52fb28c 100644 --- a/src/solid/solid_api.py +++ b/src/solid/solid_api.py @@ -215,7 +215,7 @@ def read_folder(self, url, options: ReadFolderOptions = ReadFolderOptions()) -> return parsed_folder - def get_item_links(self, url, options: Dict = {}) -> Response: + def get_item_links(self, url, options: Dict = {}) -> Links: if not self.item_exists(url): raise Exception(f'Item not found: {url}') diff --git a/src/solid/utils/api_util.py b/src/solid/utils/api_util.py index e9f6285..a36567c 100644 --- a/src/solid/utils/api_util.py +++ b/src/solid/utils/api_util.py @@ -1,5 +1,5 @@ from enum import Enum -from typing import List +from typing import List, Dict import re @@ -68,7 +68,7 @@ def get_links_from_response(response) -> dict: else: return parse_link_header(link_header, response.url) -def parse_link_header(link_header: str, item_url) -> dict: +def parse_link_header(link_header: str, item_url) -> Dict: link_dict = {} link_header_list = parse_link_header_to_array(link_header) if len(link_header_list) > 0: @@ -85,14 +85,14 @@ def parse_link_header(link_header: str, item_url) -> dict: return link_dict -def parse_link_header_to_array(link_header: str) -> list: +def parse_link_header_to_array(link_header: str) -> List: if (not link_header): return linkexp = '<[^>]*>\s*(\s*;\s*[^()<>@,;:"/[\]?={} \t]+=(([^\(\)<>@,;:"\/\[\]\?={} \t]+)|("[^"]*")))*(,|$)' match = re.finditer(linkexp, link_header) links = [x.group() for x in match] return links -def url_join(given, base): +def url_join(given, base) -> str: base = str(base) base_hash = base.find('#') if (base_hash > 0): diff --git a/tests/integration_test_solid_api.py b/tests/integration_test_solid_api.py index 167a9d0..b545181 100644 --- a/tests/integration_test_solid_api.py +++ b/tests/integration_test_solid_api.py @@ -169,4 +169,4 @@ def test_file(): # get item links links = api.get_item_links(patchedUrl) - assert links == {'acl': "{}.acl".format(patchedUrl), 'meta': "{}.meta".format(patchedUrl)} \ No newline at end of file + assert links == {'acl': f'{patchedUrl}.acl', 'meta': f'{patchedUrl}.meta'} \ No newline at end of file