epfldata · alirezamoosio · Jul 24, 2019 · Jul 24, 2019 · Jul 25, 2019 · Jul 25, 2019
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,18 @@
+.idea/
+project/
+target/
+
+# python
+__pycache__/
+
+# conda env config file
+*.cfg
+
+# models and parameters
+*.json
+!supplementary/params/params.json
+!supplementary/simulation.json
+!data_vec.json
+
+# debug
+debug/
diff --git a/README.md b/README.md
diff --git a/ml/README.md b/ml/README.md
diff --git a/build.sbt → ml/build.sbt b/build.sbt → ml/build.sbt
@@ -5,14 +5,24 @@ lazy val root = (project in file(".")).settings(
   scalaVersion := "2.11.8"
 )
 
+mainClass in (Compile, packageBin) := Some("ml_supplements.Main")
+mainClass in (Compile, run) := Some("ml_supplements.Main")
+
+
 // libraryDependencies += "com.quantifind" %% "wisp" % "0.0.4"
 
 libraryDependencies  ++= Seq(
 //  "com.github.fommil.netlib" % "all" % "1.1.2",
   "org.scalanlp" %% "breeze" % "0.12",
 //  "org.scalanlp" %% "breeze-natives" % "0.12",
   "org.scalanlp" %% "breeze-viz" % "0.12",
-  "org.scalatest" %% "scalatest" % "3.0.0" % "test"
+  "org.scalatest" %% "scalatest" % "3.0.0" % "test",
+
+  "io.spray" %%  "spray-json" % "1.3.5",
+
+  // https://mvnrepository.com/artifact/org.apache.commons/commons-csv
+  "org.apache.commons" % "commons-csv" % "1.7"
+
 )
 
 resolvers += "Sonatype Releases" at "https://oss.sonatype.org/content/repositories/releases/"

diff --git a/ml/src/main/python/aggregator.py b/ml/src/main/python/aggregator.py
@@ -0,0 +1,66 @@
+from abc import ABC, abstractmethod
+
+import pandas as pd
+import tensorflow as tf
+
+
+class Aggregator(ABC):
+    def __init__(self, output_names):
+        super().__init__()
+        self._output_names = output_names
+
+    @abstractmethod
+    def aggregate(self, output_tensors, n_samples, indices):
+        pass
+
+    def aggregate_pd(self, agent_outputs):
+        output_tensors = {agent: tf.constant(agent_outputs[agent].to_numpy()) for agent in
+                          agent_outputs}
+        n_samples = agent_outputs[list(agent_outputs.keys())[0]].shape[0]
+        indices = {agent: {name: i for i, name in enumerate(agent_outputs[agent].columns.values)} for agent in agent_outputs}
+        return pd.DataFrame(tf.keras.backend.eval(self.aggregate(output_tensors, n_samples, indices)), columns=self._output_names)
+
+
+class GlobalStateAggregator(Aggregator):
+    def __init__(self, np_population, output_names):
+        super().__init__(output_names)
+        self._np_population = np_population
+
+    """
+    When speaking of agents, nodes can also be meant
+    """
+    def aggregate(self, output_tensors, n_samples, indices):
+        dtype = output_tensors[list(output_tensors.keys())[0]].dtype
+        average_names = ["capital", "total_value_destroyed", "happiness", "valueProduced", "goodwill"]
+        result = {
+            average_name: tf.concat([
+                tf.reshape(output_tensors[agent][:, indices[agent]["{}.var_{}Mu".format(agent.name, average_name)]], (n_samples, 1))
+                for agent in output_tensors if "{}.var_{}Mu".format(agent.name, average_name) in indices[agent]
+            ], axis=1)
+            for average_name in average_names
+        }
+        result = {average_name: tf.math.reduce_mean(result[average_name], axis=1) for average_name in average_names}
+        employees = tf.concat([tf.reshape(output_tensors[agent][:, indices[agent][agent.name + ".var_employeesMu"]], (n_samples, 1))
+                               for agent in output_tensors if agent.name + ".var_employeesMu" in indices[agent]], axis=1)
+        employees = tf.math.reduce_sum(employees, axis=1)
+        result["unemploymentRate"] = tf.ones(n_samples, dtype) - tf.math.divide(employees, tf.constant(self._np_population.values, dtype))
+        return tf.concat([tf.reshape(result[output_name], (n_samples, 1)) for output_name in result], axis=1)
+
+
+class DummyAggregator(Aggregator):
+    def __init__(self, output_names):
+        super().__init__(output_names)
+
+    def aggregate(self, output_tensors, n_samples, indices):
+        dtype = output_tensors[list(output_tensors.keys())[0]].dtype
+        result = tf.zeros(shape=(n_samples, 2), dtype=dtype)
+        p3 = tf.zeros(n_samples, dtype=dtype)
+        for agent in output_tensors:
+            i_s = indices[agent]
+            result += tf.slice(output_tensors[agent], [0, 0], result.shape)
+            if "p3" in i_s:
+                p3 += output_tensors[agent][:, i_s["p3"]]
+        s1 = tf.reshape(result[:, 0], shape=(n_samples, 1))
+        s2 = tf.reshape(tf.math.multiply(result[:, 1], p3), shape=(n_samples, 1))
+        result = tf.concat([s1, s2], axis=1)
+        return result
diff --git a/ml/src/main/python/bayesian_optimizer.py b/ml/src/main/python/bayesian_optimizer.py
@@ -0,0 +1,132 @@
+import json
+import os
+import subprocess
+
+import numpy as np
+import sys
+from bayes_opt import BayesianOptimization, UtilityFunction
+
+
+def toJson(params):
+    agentTypes = {
+        "constants": set(),
+        "variables": set()
+    }
+    for key in params:
+        split = key.split("-")
+        agentTypes[split[0]].add(split[1])
+    return {
+        type: {
+            agentType: {
+                key.split("-")[2]: params[key] for key in params if key.split("-")[0:2] == [type, agentType]
+            } for agentType in agentTypes[type]
+        } for type in agentTypes
+    }
+
+
+def black_box_function(stepSize, entry, **params):
+    all_params.update(toJson(params))
+    f = open(json_temp, "w")
+    f.write(json.dumps(all_params))
+    f.close()
+
+    result = runCmd('sbt --warn "run evaluate {} {} {}"'.format(json_temp, stepSize, entry))
+    target = -float(result.decode("utf-8")[:-1])
+    print(target)
+    return target
+
+
+def runCmd(cmd):
+    print("Run command:", cmd)
+    process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    (result, error) = process.communicate()
+    rc = process.wait()
+    if rc != 0:
+        print("Error: failed to execute command:", cmd)
+        print(result.decode("utf-8"), error.decode("utf-8"))
+        sys.exit(rc)
+    return result
+
+
+if __name__ == '__main__':
+    if len(sys.argv) != 2:
+        raise Exception("number of bo iterations required")
+
+    os.chdir('../../..')  # going to the root of the project
+    json_original = 'supplementary/params/params.json'
+    json_optimize = "supplementary/params/optimize.json"
+    json_temp = 'supplementary/params/temp.json'
+    json_result = 'supplementary/params/bo-result.json'
+
+    f = open(json_original, "r")
+    all_params = json.loads(f.read())
+    f.close()
+
+    f = open(json_optimize, "r")
+    params = json.loads(f.read())
+    f.close()
+
+    pbounds = {}
+    constants = params["constants"]
+    variables = params["variables"]
+    for agentType in constants:
+        append = {
+            "number": (100, 200) if agentType == "Person" else (2, 10),
+            **{gender: (0, 1) for gender in constants[agentType] if "gender" in gender.lower()},
+            **{units: (1, 10) for units in constants[agentType] if units in ["buyMu", "buySigma", "consumeMu", "consumeSigma"]},
+            **{buyFood: (0, 1) for buyFood in constants[agentType] if "buy" in buyFood.lower() and buyFood not in ["buyMu", "buySigma"]},
+            **{consumeFood: (0, 1) for consumeFood in constants[agentType] if "consume" in consumeFood.lower()
+               and consumeFood not in ["consumeMu", "consumeSigma"]},
+            **{enjoyment: (0, 1000) for enjoyment in constants[agentType] if "enjoy" in enjoyment.lower()},
+            **{edu: (0, 10) for edu in constants[agentType] if "edu" in edu.lower()},
+            **{bonusSal: (0, 1000) for bonusSal in constants[agentType] if "bonussal" in bonusSal.lower()},
+            **{active: (0, 1) for active in constants[agentType] if "active" in active.lower()},
+            **{salary: (10 ** 4, 10 ** 6) for salary in constants[agentType] if "salary" in salary.lower()},
+            **{iters: (0, 20) for iters in constants[agentType] if "iters" in iters.lower()},
+            **{tactics: (0, 1) for tactics in constants[agentType] if "tactics" in tactics.lower()},
+            **{plUnits: (1, 20) for plUnits in constants[agentType] if plUnits in ["required", "consumed", "produced", "time"]},
+            **{units: (1, 1000) for units in ["units"] if agentType == "Landlord"},
+            **{price: (10 ** 5, 10 ** 7) for price in ["price"] if agentType == "Landlord"}
+        }
+        pbounds = {**pbounds, **{("constants-{}-{}".format(agentType, key)): append[key] for key in append}}
+    for agentType in variables:
+        append = {
+            **{capital: (0, 10000) for capital in variables[agentType] if "capital" in capital.lower()},
+            **{value_destroyed: (0, 100) for value_destroyed in variables[agentType] if "value_destroyed" in value_destroyed.lower()},
+            **{happiness: (0, 100) for happiness in variables[agentType] if "happiness" in happiness.lower()},
+            **{salary: (10 ** 4, 10 ** 6) for salary in variables[agentType] if "salary" in salary.lower()}
+        }
+        pbounds = {**pbounds, **{("variables-{}-{}".format(agentType, key)): append[key] for key in append}}
+
+    stepSizes = [20, 50, 100]
+    sampleSize = 5
+    nSteps = 1
+    optimization_iters = int(sys.argv[1])
+    params_result = {"stepSize-{}".format(stepSize): {"entry-{}".format(entry): {} for entry in range(sampleSize)} for stepSize in stepSizes}
+
+    runCmd("sbt clean compile")
+    for stepSize in stepSizes:
+        runCmd('sbt "run generate {} {} {} {}"'.format(json_original, sampleSize, nSteps, stepSize))
+        targets = np.empty(shape=sampleSize)
+        for entry in range(sampleSize):
+            optimizer = BayesianOptimization(None, pbounds, 10)
+            utility = UtilityFunction(kind="ei", kappa=2.5, xi=0.0)
+            for i in range(optimization_iters):
+                print("stepSize:", stepSize, "entry:", entry, "iteration:", i)
+                next_point = optimizer.suggest(utility)
+                target = black_box_function(stepSize, entry, **next_point)
+                optimizer.register(params=next_point, target=target)
+                print()
+
+            all_params.update(toJson(optimizer.max['params']))
+            params_result["stepSize-{}".format(stepSize)]["entry-{}".format(entry)].update(all_params)
+            targets[entry] = optimizer.max["target"]
+            params_result["stepSize-{}".format(stepSize)]["entry-{}".format(entry)]["target"] = targets[entry]
+
+            f = open(json_result, "w")
+            f.write(json.dumps(params_result))
+            f.close()
+        params_result["stepSize-{}".format(stepSize)]["mean-target"] = targets.mean()
+        f = open(json_result, "w")
+        f.write(json.dumps(params_result))
+        f.close()