diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6778a55 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +*.swp +*.pyc +*.o +._* +*.bak +.DS_Store +build +__pycache__ +*.norm +*.egg-info* +docs/build diff --git a/barchybrid/src/arc_hybrid.py b/barchybrid/src/arc_hybrid.py index d40e182..05eaef9 100644 --- a/barchybrid/src/arc_hybrid.py +++ b/barchybrid/src/arc_hybrid.py @@ -8,7 +8,7 @@ class ArcHybridLSTM: def __init__(self, words, pos, rels, w2i, options): - self.model = Model() + self.model = ParameterCollection() self.trainer = AdamTrainer(self.model) random.seed(1) @@ -155,7 +155,7 @@ def Save(self, filename): def Load(self, filename): - self.model.load(filename) + self.model.populate(filename) def Init(self): evec = self.elookup[1] if self.external_embedding is not None else None @@ -406,5 +406,5 @@ def Train(self, conll_path): renew_cg() - self.trainer.update_epoch() + self.trainer.update() print "Loss: ", mloss/iSentence diff --git a/barchybrid/src/parser.py b/barchybrid/src/parser.py index 5d0f75f..7b3015d 100644 --- a/barchybrid/src/parser.py +++ b/barchybrid/src/parser.py @@ -35,6 +35,7 @@ (options, args) = parser.parse_args() print 'Using external embedding:', options.external_embedding + root_dir = os.path.dirname(os.path.abspath(sys.argv[0])) if not options.predictFlag: if not (options.rlFlag or options.rlMostFlag or options.headFlag): print 'You must use either --userlmost or --userl or --usehead (you can use multiple)' @@ -58,10 +59,10 @@ utils.write_conll(devpath, parser.Predict(options.conll_dev)) if not conllu: - os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') + os.system('perl %s/utils/eval.pl -g ' % root_dir + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') else: - os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt') - + os.system('python %s/utils/evaluation_script/conll17_ud_eval.py -v -w %s/utils/evaluation_script/weights.clas ' % (root_dir, root_dir) + options.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt') + print 'Finished predicting dev' parser.Save(os.path.join(options.output, options.model + str(epoch+1))) else: @@ -70,19 +71,31 @@ stored_opt.external_embedding = options.external_embedding + print 'Initialize ArcHybridLSTM parser' parser = ArcHybridLSTM(words, pos, rels, w2i, stored_opt) + print 'Start loading parameters' parser.Load(options.model) + print 'End of parameters loading' conllu = (os.path.splitext(options.conll_test.lower())[1] == '.conllu') tespath = os.path.join(options.output, 'test_pred.conll' if not conllu else 'test_pred.conllu') ts = time.time() - pred = list(parser.Predict(options.conll_test)) + + print 'Starting to parse' + pred = [] + for sentence in parser.Predict(options.conll_test): + pred.append(sentence) + if len(pred) % 100 == 0 and len(pred) > 0: + print '%i parsed sentences' % len(pred) + + #pred = list(parser.Predict(options.conll_test)) te = time.time() + print 'End of parsing, sending into %s' % tespath utils.write_conll(tespath, pred) if not conllu: - os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt') + os.system('perl %s/utils/eval.pl -g ' % root_dir + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt') else: - os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + testpath + '.txt') - + os.system('python %s/utils/evaluation_script/conll17_ud_eval.py -v -w %s/utils/evaluation_script/weights.clas ' % (root_dir, root_dir) + options.conll_test + ' ' + tespath + ' > ' + testpath + '.txt') + print 'Finished predicting test',te-ts diff --git a/bmstparser/src/mstlstm.py b/bmstparser/src/mstlstm.py index 2fa2209..0a3c4c6 100644 --- a/bmstparser/src/mstlstm.py +++ b/bmstparser/src/mstlstm.py @@ -7,7 +7,7 @@ class MSTParserLSTM: def __init__(self, vocab, pos, rels, w2i, options): - self.model = Model() + self.model = ParameterCollection() random.seed(1) self.trainer = AdamTrainer(self.model) @@ -136,7 +136,7 @@ def Save(self, filename): def Load(self, filename): - self.model.load(filename) + self.model.populate(filename) def Predict(self, conll_path): @@ -320,5 +320,5 @@ def Train(self, conll_path): renew_cg() - self.trainer.update_epoch() + self.trainer.update() print "Loss: ", mloss/iSentence diff --git a/bmstparser/src/parser.py b/bmstparser/src/parser.py index 28ba87c..d0a0ad7 100644 --- a/bmstparser/src/parser.py +++ b/bmstparser/src/parser.py @@ -1,5 +1,5 @@ from optparse import OptionParser -import pickle, utils, mstlstm, os, os.path, time +import pickle, utils, mstlstm, os, os.path, time, sys if __name__ == '__main__': @@ -33,6 +33,7 @@ print 'Using external embedding:', options.external_embedding + root_dir = os.path.dirname(os.path.abspath(sys.argv[0])) if options.predictFlag: with open(options.params, 'r') as paramsfp: words, w2i, pos, rels, stored_opt = pickle.load(paramsfp) @@ -53,9 +54,9 @@ utils.write_conll(tespath, test_res) if not conllu: - os.system('perl src/utils/eval.pl -g ' + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt') + os.system('perl %s/utils/eval.pl -g ' % root_dir + options.conll_test + ' -s ' + tespath + ' > ' + tespath + '.txt') else: - os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_test + ' ' + tespath + ' > ' + testpath + '.txt') + os.system('python %s/utils/evaluation_script/conll17_ud_eval.py -v -w %s/utils/evaluation_script/weights.clas ' % (root_dir, root_dir) + options.conll_test + ' ' + tespath + ' > ' + testpath + '.txt') else: print 'Preparing vocab' words, w2i, pos, rels = utils.vocab(options.conll_train) @@ -76,7 +77,7 @@ parser.Save(os.path.join(options.output, os.path.basename(options.model) + str(epoch+1))) if not conllu: - os.system('perl src/utils/eval.pl -g ' + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') + os.system('perl %s/utils/eval.pl -g ' % root_dir + options.conll_dev + ' -s ' + devpath + ' > ' + devpath + '.txt') else: - os.system('python src/utils/evaluation_script/conll17_ud_eval.py -v -w src/utils/evaluation_script/weights.clas ' + options.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt') + os.system('python %s/utils/evaluation_script/conll17_ud_eval.py -v -w %s/utils/evaluation_script/weights.clas ' % (root_dir, root_dir) + options.conll_dev + ' ' + devpath + ' > ' + devpath + '.txt')