diff --git a/README.md b/README.md
index 0eac12e..8cdf0af 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 ### <span stype="color:blue">[gerrit-rechecks](gerrit-rechecks/README.md): Scripts to get the last comment in Gerrit by Data, Submit Changes to Gerrit Projects with "Depends-On" and Reverify Gerrit Changes.</span>
 ### <span stype="color:blue">[EoD-stuff](EoD-stuff/README.md): Scripts for making life easier to the Engineer on Duty :)</span>
 ### <span stype="color:blue">[provision](provision/README.md): Different implementation of ansible roles and playbooks to automate things.</span>
+### <span stype="color:blue">[similarity-comparison](similarity-comparison/README.md): Scripts for comparison of infrared based jenkins jobs</span>
 Engineer on Duty
 
-
 ![](https://github.com/RedHatCRE/toolbox/workflows/tests/badge.svg)
diff --git a/similarity-comparison/.gitignore b/similarity-comparison/.gitignore
new file mode 100644
index 0000000..513aea0
--- /dev/null
+++ b/similarity-comparison/.gitignore
@@ -0,0 +1,3 @@
+jjs.db
+jjs.xlsx
+venv/**
diff --git a/similarity-comparison/README.md b/similarity-comparison/README.md
new file mode 100644
index 0000000..fc854cf
--- /dev/null
+++ b/similarity-comparison/README.md
@@ -0,0 +1,9 @@
+HOWTO
+-----
+virtualenv venv
+. ./venv/bin/activate
+pip install -r requirements.txt
+python similarity-comparison.py 
+
+
+
diff --git a/similarity-comparison/infrared_agrs_patch b/similarity-comparison/infrared_agrs_patch
new file mode 100644
index 0000000..26c532c
--- /dev/null
+++ b/similarity-comparison/infrared_agrs_patch
@@ -0,0 +1,36 @@
+diff --git a/infrared/api.py b/infrared/api.py
+index e88b2949..6fc7f77a 100644
+--- a/infrared/api.py
++++ b/infrared/api.py
+@@ -116,6 +116,12 @@ class InfraredPluginsSpec(SpecObject):
+         # unpack parsed arguments
+         nested_args, control_args, custom_args = parsed_args
+ 
++        # print to stdout and serialize nested agruments. Finally, terminate.
++        import pickle
++        print (nested_args)
++        with open('nested_args.pkl', 'wb') as f:
++            pickle.dump(nested_args, f)
++        exit(0)
+         if control_args.get('debug', None):
+             logger.LOG.setLevel(logging.DEBUG)
+ 
+@@ -198,12 +204,18 @@ class SpecManager(object):
+ 
+     def run_specs(self, args=None):
+         spec_args = vars(self.parser.parse_args(args))
++        print(spec_args)
+         subcommand = spec_args.get('subcommand', '')
+         if not spec_args.get('no_log_commands'):
+             if self.execution_logger is None:
+                 self.execution_logger = CoreServices.execution_logger_manager()
+             self.execution_logger.command()
+ 
++        # serialize subcommand
++        import pickle
++        with open('subcommand.pkl', 'wb') as f:
++            pickle.dump(subcommand, f)
++
+         if subcommand in self.spec_objects:
+             return self.spec_objects[subcommand].spec_handler(
+                 self.parser, args=args)
diff --git a/similarity-comparison/requirements.txt b/similarity-comparison/requirements.txt
new file mode 100644
index 0000000..9162eaf
--- /dev/null
+++ b/similarity-comparison/requirements.txt
@@ -0,0 +1,4 @@
+gitpython
+requests
+scikit-learn
+xlsxwriter
diff --git a/similarity-comparison/similarity_comparison.py b/similarity-comparison/similarity_comparison.py
new file mode 100644
index 0000000..a786400
--- /dev/null
+++ b/similarity-comparison/similarity_comparison.py
@@ -0,0 +1,362 @@
+import configparser
+import json
+import logging
+import os.path
+import re
+import requests
+import sqlite3
+import subprocess
+import sys
+import xlsxwriter
+
+from git import Repo
+from io import StringIO
+from os.path import expanduser
+from sklearn.feature_extraction.text import TfidfVectorizer
+
+
+httpRequest = {
+    'requestJobsAndBuildInfo':
+        "/api/json/?tree=jobs[name,lastBuild[result,number,timestamp]]",
+    'requestJobs':
+        "/api/json?tree=jobs[name]",
+    'requestStableBuildArtifact':
+        "/job/{jobName}/lastStableBuild/artifact/{artifactPath}",
+    'requestArtifact':
+        "/job/{jobName}/lastSuccessfulBuild/artifact/{artifactPath}"
+}
+
+
+def get_base_prefix_compat():
+    """Get base/real prefix, or sys.prefix if there is none."""
+    return getattr(sys, "base_prefix", None) or getattr(sys, "real_prefix",
+                                                        None) or sys.prefix
+
+def in_virtualenv():
+    return get_base_prefix_compat() != sys.prefix
+
+# JJSC - Jenkins Jobs Similarity Computation
+class JJSC(object):
+    def __init__(self, credentialsPath, artifactPath):
+        configParser = configparser.RawConfigParser()
+        print(configParser.read(credentialsPath))
+        sectionName = "jenkins"
+        dictionary = dict(configParser.items(sectionName))
+
+        self.url = dictionary['url']
+        self.artifactPath = artifactPath
+        self.credentials = (dictionary['user'], dictionary['password'])
+
+        # create (if !exists) a db to store <jobName, artifact>
+        self.dbcon = sqlite3.connect('jjs.db')
+        print("Connected to SQLite jjs.db")
+        cursor = self.dbcon.cursor()
+        cursor.execute('''CREATE TABLE IF NOT EXISTS jjs
+                            ( jobName text,
+                              artifatcContent text,
+                              artifactCtntNrmlzd text )''')
+        self.dbcon.commit()
+        cursor.close()
+        print("jjs table exists in jjs.db")
+
+        self.workbook = xlsxwriter.Workbook('jjs.xlsx')
+
+        logging.basicConfig(filename='sc.log',
+                            level=logging.WARNING,
+                            format='%(levelname)s %(asctime)s\n%(message)s\n',
+                            datefmt='%Y-%m-%d %H:%M:%S')
+
+    def __del__(self):
+        if self.dbcon:
+            self.dbcon.close()
+            print("The SQLite connection is closed")
+        self.workbook.close()
+
+    def _prepare_arg_parsing_and_serialization(self):
+        # clone infrared
+        git_url = "https://github.com/redhat-openstack/infrared.git"
+        repo_dir = "/tmp/infrared"
+
+        if os.path.exists(repo_dir):
+            return
+        subprocess.call("rm -rf " + repo_dir, shell=True)
+        Repo.clone_from(git_url, repo_dir)
+
+        # apply the arg serialization patch
+        command = "cp infrared_agrs_patch " +  repo_dir + ";" + \
+                  "cd " + repo_dir + ";" + \
+                  "git apply infrared_agrs_patch"
+        subprocess.call(command, shell=True)
+
+        #install infarred in a virtual environment
+        if (not in_virtualenv()):
+            raise Exception("This code installs pip packages and is " + \
+                  "adviced to be executed in a virtual environment")
+
+        command = "cd " + repo_dir + ";" + \
+                  "pip install - U pip;" + \
+                  "pip install ."
+        subprocess.call(command, shell=True)
+
+        # add additional plugins for enhanced parsing
+        subprocess.call("infrared plugin add all", shell=True)
+
+    def _extract_ir_commands(self, file_context: str):
+        i = 0
+        extracts = []
+        REGEXP_START = r"\s*(infrared|ir)"
+        pattern_start = re.compile(REGEXP_START)
+
+        # reformat file content to un-split multiline bash commands
+        file = StringIO(file_context.replace("\\\n", " "))
+
+        for line in file:
+            i += 1  # line counting starts with 1
+            if pattern_start.match(line):
+                extracts.append((i, line.rstrip('\n')))
+
+        for line in extracts:
+
+            status, output = subprocess.getstatusoutput(line[1])
+            output = line[1].strip() + "\n" + output
+            print (output)
+            if status != 0:
+                logging.warning(output)
+    def _print_parsed_paramters(self):
+        # fetch unified jobs
+        sql_command = \
+            'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \
+            '\'%DFG%\' AND jobName LIKE \'%unified%\' ORDER BY jobName'
+        unifiedJobs = self._fetch_jobs_from_DB(sql_command)
+        print("Total of unified jobs are:  ", len(unifiedJobs))
+
+        for rowUnified in unifiedJobs:
+            jobNameUnified = str(rowUnified[0])
+            print(len(unifiedJobs))
+
+            releaseUnified = self._extractVersionFromJobName(
+                jobNameUnified)
+            ipVersionUnifed = self._extractIPVersionFromJobName(
+                jobNameUnified)
+
+            file_content = str(rowUnified[1])
+
+            print(self._extract_ir_commands(file_content))
+
+
+    def _insertDataIntoTable(self, jobName, artifatcContent):
+        try:
+            cursor = self.dbcon.cursor()
+            sqlite_insert_with_param = """INSERT INTO jjs
+                              (jobName, artifatcContent)
+                              VALUES (?, ?);"""
+            data_tuple = (jobName, artifatcContent)
+            cursor.execute(sqlite_insert_with_param, data_tuple)
+            self.dbcon.commit()
+            cursor.close()
+            return 0
+
+        except sqlite3.Error as error:
+            print("Failed to insert into sqlite table", error)
+            return -1
+
+    def populateDB(self):
+        # get all Jobs
+        request = requests.get(self.url + httpRequest['requestJobs'],
+                               verify=False,
+                               auth=self.credentials)
+        jobsInJSON = json.loads(request.text)
+        print(json.dumps(jobsInJSON, indent=4, sort_keys=True))
+
+        skipList = ["util"]
+
+        # get and store an artifact (if found)
+        okCounter = 0
+        insertCounter = 0
+        for element in jobsInJSON['jobs']:
+            print(element['name'])
+            jobName = element['name']
+            if jobName in skipList:
+                continue
+            requestStr = self.url + httpRequest['requestArtifact'].format(
+                jobName=jobName,
+                artifactPath=self.artifactPath)
+            request = requests.get(requestStr, verify=False,
+                                   auth=self.credentials)
+            print(requestStr)
+            if request.ok:
+                okCounter = okCounter + 1
+                if self._insertDataIntoTable(jobName, request.text) >= 0:
+                    insertCounter = insertCounter + 1
+
+        print("From populateDB")
+        print("okCounter: " + str(okCounter))
+        print("insertCounter: " + str(insertCounter))
+        print("number of jobs: " + str(len(jobsInJSON['jobs'])))
+        assert (okCounter == insertCounter)
+
+    def _normilizeArtifact(self, artifact):
+        regex = r".*infrared (tripleo-undercloud|tripleo-overcloud) .*\\*"
+        plugin_names = "(tripleo-undercloud|tripleo-overcloud)"
+        regex = r".*infrared " + plugin_names + " .*(([\r\n]*).*){4}"
+        matches = re.finditer(regex, artifact, re.MULTILINE)
+        normalizedArtifact = ""
+        for matchNum, match in enumerate(matches, start=1):
+            print(
+                "Match {matchNum} was found at {start}-{end}: {match}".format(
+                    matchNum=matchNum,
+                    start=match.start(),
+                    end=match.end(),
+                    match=match.group()))
+            normalizedArtifact = normalizedArtifact + "\n" + match.group()
+
+        # TODO: filter out tempest invocation - DONE
+        return (normalizedArtifact)
+
+    def _extractVersionFromJobName(self, jobName):
+        # matches XY.Z XY XY_Z in job names
+        REGEXP = r'\s*([\d(.|_)]+)(_compact|-compact|_director|-director)\s*'
+
+        version = re.search(REGEXP, jobName).group(1)
+        version = version.replace("_", ".")  # for jobs with XY_Z
+
+        return version
+
+    def _extractIPVersionFromJobName(self, jobName):
+        # matches XY.Z XY XY_Z in job names
+        REGEXP = r".*ipv([\d]+).*"
+
+        try:
+            version = re.search(REGEXP, jobName).group(1)
+        except AttributeError:
+            version = "NA"
+
+        return version
+
+    # return true if artifact contains any of filter out criteria
+    def _isFilteredOut(self, articact):
+        filter = ["infrared tripleo-inventory",
+                  "infrared workspace import",
+                  "sshpass -p stack ssh -o UserKnownHostsFile=/dev/null",
+                  "infrared tripleo-upgrade"]
+
+        articactString = str(articact)
+
+        intersestoin = [value for value in filter if value in articactString]
+
+        return (len(intersestoin) > 0)
+
+    def _fetch_jobs_from_DB(self, sql_command):
+        cursor = self.dbcon.cursor()
+        cursor.execute(sql_command)
+        jobs = cursor.fetchall()
+        cursor.close()
+        return jobs
+
+    def analyseJJSTable(self):
+        # fetch unified jobs
+        sql_command = \
+            'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \
+            '\'%unified%\' AND jobName LIKE \'%director%\' ORDER BY jobName'
+        unifiedJobs = self._fetch_jobs_from_DB(sql_command)
+        print("Total of unified jobs are:  ", len(unifiedJobs))
+
+        # fetch other director jobs (including unified ones) to compare
+        # against the unified jobs
+        sql_command = \
+            'SELECT DISTINCT * FROM jjs WHERE jobName LIKE ' + \
+            '\'%director%\' AND jobName NOT LIKE \'%compact%\''
+        directorJobs = self._fetch_jobs_from_DB(sql_command)
+        print("Total of director jobs are:  ", len(directorJobs))
+
+        unifiedJobsCounter = 0
+        cell_format = self.workbook.add_format(
+            {'bold': True, 'font_color': 'red'})
+        for rowUnified in unifiedJobs:
+            jobNameUnified = str(rowUnified[0])
+            print(len(unifiedJobs))
+            try:
+                unifiedJobsCounter += 1
+                worksheet = self.workbook.add_worksheet(
+                    jobNameUnified[1:28] + "--" + str(unifiedJobsCounter))
+                worksheet.set_column(0, 0, len(jobNameUnified))
+                worksheet.write(0, 0, jobNameUnified, cell_format)
+                row = 1
+            except xlsxwriter.exceptions.DuplicateWorksheetName:
+                continue
+            for rowDirector in directorJobs:
+                jobNameDirector = str(rowDirector[0])
+                releaseUnified = self._extractVersionFromJobName(
+                    jobNameUnified)
+                releaseDirector = self._extractVersionFromJobName(
+                    jobNameDirector)
+                ipVersionUnifed = self._extractIPVersionFromJobName(
+                    jobNameUnified)
+                ipVersionDirector = self._extractIPVersionFromJobName(
+                    jobNameDirector)
+                # if releaseUnified not in ["16.1", "16.2"]:
+                #     continue
+
+                if jobNameUnified != jobNameDirector and \
+                        releaseUnified == releaseDirector and \
+                        ipVersionUnifed == ipVersionDirector:
+                    artifactUnified = str(rowUnified[1])
+                    artifactDirector = str(rowDirector[1])
+                    if self._isFilteredOut(artifactDirector):
+                        continue
+                    normalizedUnified = self._normilizeArtifact(
+                        artifactUnified)
+                    normalizedDirector = self._normilizeArtifact(
+                        artifactDirector)
+                    try:
+                        tfidf = TfidfVectorizer().fit_transform(
+                            [normalizedUnified, normalizedDirector])
+                        # no need to normalize, since Vectorizer will return
+                        # normalized tf-idf
+                        pairwise_similarity = tfidf * tfidf.T
+                    except Exception:
+                        print("Can not compare " + rowUnified[0] + " and " +
+                              rowDirector[0] + "\n")
+                    threshold = pairwise_similarity.data.min()
+
+                    if threshold >= 0.0:
+                        wordsUnified = set(normalizedUnified.split())
+                        wordsDirector = set(normalizedDirector.split())
+                        unifiedUniques = set(
+                            sorted(wordsUnified.difference(wordsDirector)))
+                        directorUniques = set(
+                            sorted(wordsDirector.difference(wordsUnified)))
+                        uniques = unifiedUniques.union(directorUniques)
+                        print(jobNameUnified + "," + str(unifiedUniques))
+                        print(jobNameDirector + "," + str(directorUniques))
+                        fstr = 'Total uniques: {}, Pairwise Similarity: {}\n'
+                        print(fstr.format(len(uniques), threshold))
+                        try:
+                            worksheet.set_column(row, 0, len(jobNameDirector))
+                            worksheet.write(row, 0, jobNameDirector)
+
+                            threshold = round(threshold, 3)
+                            worksheet.set_column(row, 1, len(str(threshold)))
+                            worksheet.write(row, 1, str(threshold))
+
+                            row = row + 1
+                        except Exception as e:
+                            print(e)
+                            continue
+
+
+
+credentialsPath = expanduser("~") + '/.config/jenkins_jobs/jenkins_jobs.ini'
+artifactPath = '.sh/run.sh'
+jjsc = JJSC(credentialsPath, artifactPath)
+
+
+
+#jjsc.populateDB()
+#jjsc.analyseJJSTable()
+
+jjsc._prepare_arg_parsing_and_serialization()
+jjsc._print_parsed_paramters()
+
+
+del jjsc