diff --git a/README.md b/README.md
index 0eac12e..8cdf0af 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
### [gerrit-rechecks](gerrit-rechecks/README.md): Scripts to get the last comment in Gerrit by Data, Submit Changes to Gerrit Projects with "Depends-On" and Reverify Gerrit Changes.
### [EoD-stuff](EoD-stuff/README.md): Scripts for making life easier to the Engineer on Duty :)
### [provision](provision/README.md): Different implementation of ansible roles and playbooks to automate things.
+### [similarity-comparison](similarity-comparison/README.md): Scripts for comparison of infrared based jenkins jobs
Engineer on Duty
-

diff --git a/similarity-comparison/.gitignore b/similarity-comparison/.gitignore
new file mode 100644
index 0000000..513aea0
--- /dev/null
+++ b/similarity-comparison/.gitignore
@@ -0,0 +1,3 @@
+jjs.db
+jjs.xlsx
+venv/**
diff --git a/similarity-comparison/README.md b/similarity-comparison/README.md
new file mode 100644
index 0000000..fc854cf
--- /dev/null
+++ b/similarity-comparison/README.md
@@ -0,0 +1,9 @@
+HOWTO
+-----
+virtualenv venv
+. ./venv/bin/activate
+pip install -r requirements.txt
+python similarity-comparison.py
+
+
+
diff --git a/similarity-comparison/infrared_agrs_patch b/similarity-comparison/infrared_agrs_patch
new file mode 100644
index 0000000..26c532c
--- /dev/null
+++ b/similarity-comparison/infrared_agrs_patch
@@ -0,0 +1,36 @@
+diff --git a/infrared/api.py b/infrared/api.py
+index e88b2949..6fc7f77a 100644
+--- a/infrared/api.py
++++ b/infrared/api.py
+@@ -116,6 +116,12 @@ class InfraredPluginsSpec(SpecObject):
+ # unpack parsed arguments
+ nested_args, control_args, custom_args = parsed_args
+
++ # print to stdout and serialize nested agruments. Finally, terminate.
++ import pickle
++ print (nested_args)
++ with open('nested_args.pkl', 'wb') as f:
++ pickle.dump(nested_args, f)
++ exit(0)
+ if control_args.get('debug', None):
+ logger.LOG.setLevel(logging.DEBUG)
+
+@@ -198,12 +204,18 @@ class SpecManager(object):
+
+ def run_specs(self, args=None):
+ spec_args = vars(self.parser.parse_args(args))
++ print(spec_args)
+ subcommand = spec_args.get('subcommand', '')
+ if not spec_args.get('no_log_commands'):
+ if self.execution_logger is None:
+ self.execution_logger = CoreServices.execution_logger_manager()
+ self.execution_logger.command()
+
++ # serialize subcommand
++ import pickle
++ with open('subcommand.pkl', 'wb') as f:
++ pickle.dump(subcommand, f)
++
+ if subcommand in self.spec_objects:
+ return self.spec_objects[subcommand].spec_handler(
+ self.parser, args=args)
diff --git a/similarity-comparison/requirements.txt b/similarity-comparison/requirements.txt
new file mode 100644
index 0000000..9162eaf
--- /dev/null
+++ b/similarity-comparison/requirements.txt
@@ -0,0 +1,4 @@
+gitpython
+requests
+scikit-learn
+xlsxwriter
diff --git a/similarity-comparison/similarity_comparison.py b/similarity-comparison/similarity_comparison.py
new file mode 100644
index 0000000..a786400
--- /dev/null
+++ b/similarity-comparison/similarity_comparison.py
@@ -0,0 +1,362 @@
+import configparser
+import json
+import logging
+import os.path
+import re
+import requests
+import sqlite3
+import subprocess
+import sys
+import xlsxwriter
+
+from git import Repo
+from io import StringIO
+from os.path import expanduser
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+
+httpRequest = {
+ 'requestJobsAndBuildInfo':
+ "/api/json/?tree=jobs[name,lastBuild[result,number,timestamp]]",
+ 'requestJobs':
+ "/api/json?tree=jobs[name]",
+ 'requestStableBuildArtifact':
+ "/job/{jobName}/lastStableBuild/artifact/{artifactPath}",
+ 'requestArtifact':
+ "/job/{jobName}/lastSuccessfulBuild/artifact/{artifactPath}"
+}
+
+
+def get_base_prefix_compat():
+ """Get base/real prefix, or sys.prefix if there is none."""
+ return getattr(sys, "base_prefix", None) or getattr(sys, "real_prefix",
+ None) or sys.prefix
+
+def in_virtualenv():
+ return get_base_prefix_compat() != sys.prefix
+
+# JJSC - Jenkins Jobs Similarity Computation
+class JJSC(object):
+ def __init__(self, credentialsPath, artifactPath):
+ configParser = configparser.RawConfigParser()
+ print(configParser.read(credentialsPath))
+ sectionName = "jenkins"
+ dictionary = dict(configParser.items(sectionName))
+
+ self.url = dictionary['url']
+ self.artifactPath = artifactPath
+ self.credentials = (dictionary['user'], dictionary['password'])
+
+ # create (if !exists) a db to store
+ self.dbcon = sqlite3.connect('jjs.db')
+ print("Connected to SQLite jjs.db")
+ cursor = self.dbcon.cursor()
+ cursor.execute('''CREATE TABLE IF NOT EXISTS jjs
+ ( jobName text,
+ artifatcContent text,
+ artifactCtntNrmlzd text )''')
+ self.dbcon.commit()
+ cursor.close()
+ print("jjs table exists in jjs.db")
+
+ self.workbook = xlsxwriter.Workbook('jjs.xlsx')
+
+ logging.basicConfig(filename='sc.log',
+ level=logging.WARNING,
+ format='%(levelname)s %(asctime)s\n%(message)s\n',
+ datefmt='%Y-%m-%d %H:%M:%S')
+
+ def __del__(self):
+ if self.dbcon:
+ self.dbcon.close()
+ print("The SQLite connection is closed")
+ self.workbook.close()
+
+ def _prepare_arg_parsing_and_serialization(self):
+ # clone infrared
+ git_url = "https://github.com/redhat-openstack/infrared.git"
+ repo_dir = "/tmp/infrared"
+
+ if os.path.exists(repo_dir):
+ return
+ subprocess.call("rm -rf " + repo_dir, shell=True)
+ Repo.clone_from(git_url, repo_dir)
+
+ # apply the arg serialization patch
+ command = "cp infrared_agrs_patch " + repo_dir + ";" + \
+ "cd " + repo_dir + ";" + \
+ "git apply infrared_agrs_patch"
+ subprocess.call(command, shell=True)
+
+ #install infarred in a virtual environment
+ if (not in_virtualenv()):
+ raise Exception("This code installs pip packages and is " + \
+ "adviced to be executed in a virtual environment")
+
+ command = "cd " + repo_dir + ";" + \
+ "pip install - U pip;" + \
+ "pip install ."
+ subprocess.call(command, shell=True)
+
+ # add additional plugins for enhanced parsing
+ subprocess.call("infrared plugin add all", shell=True)
+
+ def _extract_ir_commands(self, file_context: str):
+ i = 0
+ extracts = []
+ REGEXP_START = r"\s*(infrared|ir)"
+ pattern_start = re.compile(REGEXP_START)
+
+ # reformat file content to un-split multiline bash commands
+ file = StringIO(file_context.replace("\\\n", " "))
+
+ for line in file:
+ i += 1 # line counting starts with 1
+ if pattern_start.match(line):
+ extracts.append((i, line.rstrip('\n')))
+
+ for line in extracts:
+
+ status, output = subprocess.getstatusoutput(line[1])
+ output = line[1].strip() + "\n" + output
+ print (output)
+ if status != 0:
+ logging.warning(output)
+ def _print_parsed_paramters(self):
+ # fetch unified jobs
+ sql_command = \
+ 'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \
+ '\'%DFG%\' AND jobName LIKE \'%unified%\' ORDER BY jobName'
+ unifiedJobs = self._fetch_jobs_from_DB(sql_command)
+ print("Total of unified jobs are: ", len(unifiedJobs))
+
+ for rowUnified in unifiedJobs:
+ jobNameUnified = str(rowUnified[0])
+ print(len(unifiedJobs))
+
+ releaseUnified = self._extractVersionFromJobName(
+ jobNameUnified)
+ ipVersionUnifed = self._extractIPVersionFromJobName(
+ jobNameUnified)
+
+ file_content = str(rowUnified[1])
+
+ print(self._extract_ir_commands(file_content))
+
+
+ def _insertDataIntoTable(self, jobName, artifatcContent):
+ try:
+ cursor = self.dbcon.cursor()
+ sqlite_insert_with_param = """INSERT INTO jjs
+ (jobName, artifatcContent)
+ VALUES (?, ?);"""
+ data_tuple = (jobName, artifatcContent)
+ cursor.execute(sqlite_insert_with_param, data_tuple)
+ self.dbcon.commit()
+ cursor.close()
+ return 0
+
+ except sqlite3.Error as error:
+ print("Failed to insert into sqlite table", error)
+ return -1
+
+ def populateDB(self):
+ # get all Jobs
+ request = requests.get(self.url + httpRequest['requestJobs'],
+ verify=False,
+ auth=self.credentials)
+ jobsInJSON = json.loads(request.text)
+ print(json.dumps(jobsInJSON, indent=4, sort_keys=True))
+
+ skipList = ["util"]
+
+ # get and store an artifact (if found)
+ okCounter = 0
+ insertCounter = 0
+ for element in jobsInJSON['jobs']:
+ print(element['name'])
+ jobName = element['name']
+ if jobName in skipList:
+ continue
+ requestStr = self.url + httpRequest['requestArtifact'].format(
+ jobName=jobName,
+ artifactPath=self.artifactPath)
+ request = requests.get(requestStr, verify=False,
+ auth=self.credentials)
+ print(requestStr)
+ if request.ok:
+ okCounter = okCounter + 1
+ if self._insertDataIntoTable(jobName, request.text) >= 0:
+ insertCounter = insertCounter + 1
+
+ print("From populateDB")
+ print("okCounter: " + str(okCounter))
+ print("insertCounter: " + str(insertCounter))
+ print("number of jobs: " + str(len(jobsInJSON['jobs'])))
+ assert (okCounter == insertCounter)
+
+ def _normilizeArtifact(self, artifact):
+ regex = r".*infrared (tripleo-undercloud|tripleo-overcloud) .*\\*"
+ plugin_names = "(tripleo-undercloud|tripleo-overcloud)"
+ regex = r".*infrared " + plugin_names + " .*(([\r\n]*).*){4}"
+ matches = re.finditer(regex, artifact, re.MULTILINE)
+ normalizedArtifact = ""
+ for matchNum, match in enumerate(matches, start=1):
+ print(
+ "Match {matchNum} was found at {start}-{end}: {match}".format(
+ matchNum=matchNum,
+ start=match.start(),
+ end=match.end(),
+ match=match.group()))
+ normalizedArtifact = normalizedArtifact + "\n" + match.group()
+
+ # TODO: filter out tempest invocation - DONE
+ return (normalizedArtifact)
+
+ def _extractVersionFromJobName(self, jobName):
+ # matches XY.Z XY XY_Z in job names
+ REGEXP = r'\s*([\d(.|_)]+)(_compact|-compact|_director|-director)\s*'
+
+ version = re.search(REGEXP, jobName).group(1)
+ version = version.replace("_", ".") # for jobs with XY_Z
+
+ return version
+
+ def _extractIPVersionFromJobName(self, jobName):
+ # matches XY.Z XY XY_Z in job names
+ REGEXP = r".*ipv([\d]+).*"
+
+ try:
+ version = re.search(REGEXP, jobName).group(1)
+ except AttributeError:
+ version = "NA"
+
+ return version
+
+ # return true if artifact contains any of filter out criteria
+ def _isFilteredOut(self, articact):
+ filter = ["infrared tripleo-inventory",
+ "infrared workspace import",
+ "sshpass -p stack ssh -o UserKnownHostsFile=/dev/null",
+ "infrared tripleo-upgrade"]
+
+ articactString = str(articact)
+
+ intersestoin = [value for value in filter if value in articactString]
+
+ return (len(intersestoin) > 0)
+
+ def _fetch_jobs_from_DB(self, sql_command):
+ cursor = self.dbcon.cursor()
+ cursor.execute(sql_command)
+ jobs = cursor.fetchall()
+ cursor.close()
+ return jobs
+
+ def analyseJJSTable(self):
+ # fetch unified jobs
+ sql_command = \
+ 'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \
+ '\'%unified%\' AND jobName LIKE \'%director%\' ORDER BY jobName'
+ unifiedJobs = self._fetch_jobs_from_DB(sql_command)
+ print("Total of unified jobs are: ", len(unifiedJobs))
+
+ # fetch other director jobs (including unified ones) to compare
+ # against the unified jobs
+ sql_command = \
+ 'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \
+ '\'%director%\' AND jobName NOT LIKE \'%compact%\''
+ directorJobs = self._fetch_jobs_from_DB(sql_command)
+ print("Total of director jobs are: ", len(directorJobs))
+
+ unifiedJobsCounter = 0
+ cell_format = self.workbook.add_format(
+ {'bold': True, 'font_color': 'red'})
+ for rowUnified in unifiedJobs:
+ jobNameUnified = str(rowUnified[0])
+ print(len(unifiedJobs))
+ try:
+ unifiedJobsCounter += 1
+ worksheet = self.workbook.add_worksheet(
+ jobNameUnified[1:28] + "--" + str(unifiedJobsCounter))
+ worksheet.set_column(0, 0, len(jobNameUnified))
+ worksheet.write(0, 0, jobNameUnified, cell_format)
+ row = 1
+ except xlsxwriter.exceptions.DuplicateWorksheetName:
+ continue
+ for rowDirector in directorJobs:
+ jobNameDirector = str(rowDirector[0])
+ releaseUnified = self._extractVersionFromJobName(
+ jobNameUnified)
+ releaseDirector = self._extractVersionFromJobName(
+ jobNameDirector)
+ ipVersionUnifed = self._extractIPVersionFromJobName(
+ jobNameUnified)
+ ipVersionDirector = self._extractIPVersionFromJobName(
+ jobNameDirector)
+ # if releaseUnified not in ["16.1", "16.2"]:
+ # continue
+
+ if jobNameUnified != jobNameDirector and \
+ releaseUnified == releaseDirector and \
+ ipVersionUnifed == ipVersionDirector:
+ artifactUnified = str(rowUnified[1])
+ artifactDirector = str(rowDirector[1])
+ if self._isFilteredOut(artifactDirector):
+ continue
+ normalizedUnified = self._normilizeArtifact(
+ artifactUnified)
+ normalizedDirector = self._normilizeArtifact(
+ artifactDirector)
+ try:
+ tfidf = TfidfVectorizer().fit_transform(
+ [normalizedUnified, normalizedDirector])
+ # no need to normalize, since Vectorizer will return
+ # normalized tf-idf
+ pairwise_similarity = tfidf * tfidf.T
+ except Exception:
+ print("Can not compare " + rowUnified[0] + " and " +
+ rowDirector[0] + "\n")
+ threshold = pairwise_similarity.data.min()
+
+ if threshold >= 0.0:
+ wordsUnified = set(normalizedUnified.split())
+ wordsDirector = set(normalizedDirector.split())
+ unifiedUniques = set(
+ sorted(wordsUnified.difference(wordsDirector)))
+ directorUniques = set(
+ sorted(wordsDirector.difference(wordsUnified)))
+ uniques = unifiedUniques.union(directorUniques)
+ print(jobNameUnified + "," + str(unifiedUniques))
+ print(jobNameDirector + "," + str(directorUniques))
+ fstr = 'Total uniques: {}, Pairwise Similarity: {}\n'
+ print(fstr.format(len(uniques), threshold))
+ try:
+ worksheet.set_column(row, 0, len(jobNameDirector))
+ worksheet.write(row, 0, jobNameDirector)
+
+ threshold = round(threshold, 3)
+ worksheet.set_column(row, 1, len(str(threshold)))
+ worksheet.write(row, 1, str(threshold))
+
+ row = row + 1
+ except Exception as e:
+ print(e)
+ continue
+
+
+
+credentialsPath = expanduser("~") + '/.config/jenkins_jobs/jenkins_jobs.ini'
+artifactPath = '.sh/run.sh'
+jjsc = JJSC(credentialsPath, artifactPath)
+
+
+
+#jjsc.populateDB()
+#jjsc.analyseJJSTable()
+
+jjsc._prepare_arg_parsing_and_serialization()
+jjsc._print_parsed_paramters()
+
+
+del jjsc