From 3691a4248d7fab7842f05164760395f1351bd05a Mon Sep 17 00:00:00 2001 From: Gerardo Bort Date: Sun, 18 Feb 2018 11:31:13 -0300 Subject: [PATCH] Adding properties param for tokensregex, semgrex and regex --- example.py | 5 +++++ pycorenlp/corenlp.py | 20 ++++++++++++-------- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/example.py b/example.py index 4b7a358..82b3430 100644 --- a/example.py +++ b/example.py @@ -14,3 +14,8 @@ print(output) output = nlp.semgrex(text, pattern='{tag: VBD}', filter=False) print(output) + output = nlp.semgrex(text, pattern='{ner: PERS}', filter=False, properties={ + 'annotators': 'tokenize,ssplit,ner,depparse', + 'pipelineLanguage': 'en', + }) + print(output) diff --git a/pycorenlp/corenlp.py b/pycorenlp/corenlp.py index 6eb2175..7176ea3 100644 --- a/pycorenlp/corenlp.py +++ b/pycorenlp/corenlp.py @@ -36,21 +36,25 @@ def annotate(self, text, properties=None): pass return output - def tokensregex(self, text, pattern, filter): - return self.regex('/tokensregex', text, pattern, filter) + def tokensregex(self, text, pattern, filter, properties = None): + return self.regex('/tokensregex', text, pattern, filter, properties) - def semgrex(self, text, pattern, filter): - return self.regex('/semgrex', text, pattern, filter) + def semgrex(self, text, pattern, filter, properties = None): + return self.regex('/semgrex', text, pattern, filter, properties) - def regex(self, endpoint, text, pattern, filter): + def regex(self, endpoint, text, pattern, filter, properties = None): + assert isinstance(text, str) + data = text.encode() r = requests.get( self.server_url + endpoint, params={ - 'pattern': pattern, + 'pattern': pattern, + 'properties': str(properties or {}), 'filter': filter - }, data=text) + }, data=data) + r.encoding = 'utf-8' output = r.text try: - output = json.loads(r.text) + output = json.loads(r.text, encoding='utf-8', strict=True) except: pass return output