diff --git a/ChangeLog.md b/ChangeLog.md index b6e1f127..be66ab2a 100644 --- a/ChangeLog.md +++ b/ChangeLog.md @@ -5,6 +5,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd ## Upcoming - Added `--explain-type` option to `%%gremlin` ([Link to PR](https://github.com/aws/graph-notebook/pull/503)) - Fixed kernel crashing with ZMQ errors on magic execution ([Link to PR](https://github.com/aws/graph-notebook/pull/517)) +- Added Memgraph as an additional graph database and the supply chain analysis notebook ([Link to PR](https://github.com/aws/graph-notebook/pull/522)) ## Release 3.8.2 (June 5, 2023) - New Sample Applications - Healthcare and Life Sciences notebooks ([Link to PR](https://github.com/aws/graph-notebook/pull/484)) diff --git a/README.md b/README.md index 4418d613..4c88e8c5 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ Instructions for connecting to the following graph databases: | [Blazegraph](#blazegraph) | RDF | SPARQL | |[Amazon Neptune](#amazon-neptune)| property graph or RDF | Gremlin or SPARQL | | [Neo4J](#neo4j) | property graph | Cypher | +| [Memgraph](#memgraph) | property graph | Cypher | We encourage others to contribute configurations they find useful. There is an [`additional-databases`](https://github.com/aws/graph-notebook/blob/main/additional-databases) folder where more information can be found. @@ -192,6 +193,7 @@ Configuration options can be set using the `%graph_notebook_config` magic comman | sparql | SPARQL connection object | ``` { "path": "sparql" } ``` | string | | gremlin | Gremlin connection object | ``` { "username": "", "password": "", "traversal_source": "g", "message_serializer": "graphsonv3" } ```| string | | neo4j | Neo4J connection object |``` { "username": "neo4j", "password": "password", "auth": true, "database": null } ``` | string | +| memgraph | Memgraph connection object |``` { "username": "", "password": "", "auth": false, "database": "memgraph" } ``` | string | ### Gremlin Server @@ -345,6 +347,31 @@ Ensure that you also specify the `%%oc bolt` option when submitting queries to t To setup a new local Neo4J Desktop database for use with the graph notebook, check out the [Neo4J Desktop User Interface Guide](https://neo4j.com/developer/neo4j-desktop/). +### Memgraph + +Change the configuration using `%%graph_notebook_config` and modify the fields for `host` and `port`, `ssl`. + +After local setup of Memgraph is complete, set the following configuration to connect from graph-notebook: + +``` +%%graph_notebook_config +{ + "host": "localhost", + "port": 7687, + "ssl": false +} +``` + +Ensure that you specify the `%%oc bolt` option when submitting queries to the Bolt endpoint. For example, a correct way of running a Cypher query via Bolt protocol is: + +``` +%%oc bolt +MATCH (n) +RETURN count(n) +``` + +For more details on how to run Memgraph, refer to its [notebook guide](./additional-databases/memgraph/README.md). + ## Building From Source A pre-release distribution can be built from the graph-notebook repository via the following steps: diff --git a/additional-databases/memgraph/README.md b/additional-databases/memgraph/README.md new file mode 100644 index 00000000..9556a02c --- /dev/null +++ b/additional-databases/memgraph/README.md @@ -0,0 +1,49 @@ +## Connecting graph notebook to Memgraph Bolt Endpoint + +[Memgraph](https://memgraph.com/) is an open-source in-memory graph database built for highly performant and advanced analytical insights. Memgraph is Neo4J Bolt protocol compatible and it uses the standardized Cypher query language. + +For a quick start, run the following command in your terminal to start Memgraph Platform in a Docker container: + +``` +docker run -it -p 7687:7687 -p 7444:7444 -p 3000:3000 -e MEMGRAPH="--bolt-server-name-for-init=Neo4j/" memgraph/memgraph-platform +``` + +The above command started Memgraph database, MAGE (graph algorithms library) and Memgraph Lab (visual user interface). For additional instructions on setting up and running Memgraph locally, refer to the [Memgraph documentation](https://memgraph.com/docs/memgraph/installation). Connection to the graph notebook works if the `--bolt-server-name-for-init` setting is modified. For more information on changing configuration settings, refer to our [how-to guide](https://memgraph.com/docs/memgraph/how-to-guides/config-logs). + + +After local setup of Memgraph is complete, set the following configuration to connect from graph-notebook: + +``` +%%graph_notebook_config +{ + "host": "localhost", + "port": 7687, + "ssl": false +} +``` + +If you set up an authentication on your Memgraph instance, you can provide login details via configuration. For example, if you created user `username` identified by `password`, then the following configuration is the correct one: + +%%graph_notebook_config +{ + "host": "localhost", + "port": 7687, + "ssl": false, + "memgraph": { + "username": "username", + "password": "password", + "auth": true + } +} + +To learn how to manage users in Memgraph, refer to [Memgraph documentation](https://memgraph.com/docs/memgraph/reference-guide/users). + +You can query Memgraph via Bolt protocol which was designed for efficient communication with graph databases. Memgraph supports versions 1 and 4 of the protocol. Ensure that you specify the `%%oc bolt` option when submitting queries to the Bolt endpoint. For example, a correct way of running a Cypher query via Bolt protocol is: + +``` +%%oc bolt +MATCH (n) +RETURN count(n) +``` + +Another way of ensuring that Memgraph is running, head to `localhost:3000` and check out Memgraph Lab, a visual user interface. You can see node and relationship count there, explore, query and visualize data. If you get stuck and have more questions, [let's talk at Memgraph Discord community](https://www.discord.gg/memgraph). diff --git a/src/graph_notebook/configuration/generate_config.py b/src/graph_notebook/configuration/generate_config.py index d8720952..0f527f7c 100644 --- a/src/graph_notebook/configuration/generate_config.py +++ b/src/graph_notebook/configuration/generate_config.py @@ -10,6 +10,7 @@ from graph_notebook.neptune.client import SPARQL_ACTION, DEFAULT_PORT, DEFAULT_REGION, DEFAULT_GREMLIN_SERIALIZER, \ DEFAULT_GREMLIN_TRAVERSAL_SOURCE, DEFAULT_NEO4J_USERNAME, DEFAULT_NEO4J_PASSWORD, DEFAULT_NEO4J_DATABASE, \ + DEFAULT_MEMGRAPH_USERNAME, DEFAULT_MEMGRAPH_PASSWORD, DEFAULT_MEMGRAPH_DATABASE, \ NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, false_str_variants, \ GRAPHSONV3_VARIANTS, GRAPHSONV2_VARIANTS, GRAPHBINARYV1_VARIANTS @@ -115,6 +116,34 @@ def to_dict(self): return self.__dict__ +class MemgraphSection(object): + """ + Used for Memgraph-specific settings in a notebook's configuration + """ + + def __init__(self, username: str = "", password: str = "", auth: bool = False, database: str = ""): + """ + :param username: login user for the Memgraph endpoint + :param password: login password for the Memgraph endpoint + :param auth: authentication switch for the Memgraph endpoint + :param database: database used at Memgraph endpoint + """ + + if username == "": + username = DEFAULT_MEMGRAPH_USERNAME + if password == "": + password = DEFAULT_MEMGRAPH_PASSWORD + if database == "": + database = DEFAULT_MEMGRAPH_DATABASE + + self.username = username + self.password = password + self.auth = True if auth in [True, "True", "true", "TRUE"] else False + self.database = database + + def to_dict(self): + return self.__dict__ + class Configuration(object): def __init__(self, host: str, port: int, auth_mode: AuthModeEnum = DEFAULT_AUTH_MODE, @@ -122,6 +151,7 @@ def __init__(self, host: str, port: int, proxy_host: str = '', proxy_port: int = DEFAULT_PORT, sparql_section: SparqlSection = None, gremlin_section: GremlinSection = None, neo4j_section: Neo4JSection = None, + memgraph_section: MemgraphSection = None, neptune_hosts: list = NEPTUNE_CONFIG_HOST_IDENTIFIERS): self._host = host.strip() self.port = port @@ -140,10 +170,12 @@ def __init__(self, host: str, port: int, self.aws_region = aws_region self.gremlin = GremlinSection() self.neo4j = Neo4JSection() + self.memgraph = MemgraphSection() else: self.is_neptune_config = False self.gremlin = gremlin_section if gremlin_section is not None else GremlinSection() self.neo4j = neo4j_section if neo4j_section is not None else Neo4JSection() + self.memgraph = memgraph_section if memgraph_section is not None else MemgraphSection() @property def host(self): @@ -175,7 +207,8 @@ def to_dict(self) -> dict: 'aws_region': self.aws_region, 'sparql': self.sparql.to_dict(), 'gremlin': self.gremlin.to_dict(), - 'neo4j': self.neo4j.to_dict() + 'neo4j': self.neo4j.to_dict(), + 'memgraph': self.memgraph.to_dict() } else: return { @@ -187,7 +220,8 @@ def to_dict(self) -> dict: 'ssl_verify': self.ssl_verify, 'sparql': self.sparql.to_dict(), 'gremlin': self.gremlin.to_dict(), - 'neo4j': self.neo4j.to_dict() + 'neo4j': self.neo4j.to_dict(), + 'memgraph': self.memgraph.to_dict() } def write_to_file(self, file_path=DEFAULT_CONFIG_LOCATION): @@ -202,11 +236,11 @@ def generate_config(host, port, auth_mode: AuthModeEnum = AuthModeEnum.DEFAULT, ssl_verify: bool = True, load_from_s3_arn='', aws_region: str = DEFAULT_REGION, proxy_host: str = '', proxy_port: int = DEFAULT_PORT, sparql_section: SparqlSection = SparqlSection(), gremlin_section: GremlinSection = GremlinSection(), - neo4j_section=Neo4JSection(), neptune_hosts: list = NEPTUNE_CONFIG_HOST_IDENTIFIERS): + neo4j_section=Neo4JSection(), memgraph_section=MemgraphSection(), neptune_hosts: list = NEPTUNE_CONFIG_HOST_IDENTIFIERS): use_ssl = False if ssl in false_str_variants else True verify_ssl = False if ssl_verify in false_str_variants else True c = Configuration(host, port, auth_mode, load_from_s3_arn, use_ssl, verify_ssl, aws_region, proxy_host, proxy_port, - sparql_section, gremlin_section, neo4j_section, neptune_hosts) + sparql_section, gremlin_section, neo4j_section, memgraph_section, neptune_hosts) return c @@ -256,6 +290,14 @@ def generate_default_config(): default=True) parser.add_argument("--neo4j_database", help="the name of the database to use for Neo4J", default=DEFAULT_NEO4J_DATABASE) + parser.add_argument("--memgraph_username", help="the username to use for Memgraph connections", + default=DEFAULT_MEMGRAPH_USERNAME) + parser.add_argument("--memgraph_password", help="the password to use for Memgraph connections", + default=DEFAULT_MEMGRAPH_PASSWORD) + parser.add_argument("--memgraph_auth", help="whether to use auth for Memgraph connections or not [True|False]", + default=True) + parser.add_argument("--memgraph_database", help="the name of the database to use for Memgraph", + default=DEFAULT_MEMGRAPH_DATABASE) args = parser.parse_args() auth_mode_arg = args.auth_mode if args.auth_mode != '' else AuthModeEnum.DEFAULT.value @@ -266,6 +308,8 @@ def generate_default_config(): args.gremlin_password, args.gremlin_serializer), Neo4JSection(args.neo4j_username, args.neo4j_password, args.neo4j_auth, args.neo4j_database), + MemgraphSection(args.memgraph_username, args.memgraph_password, + args.memgraph_auth, args.memgraph_database), args.neptune_hosts) config.write_to_file(args.config_destination) diff --git a/src/graph_notebook/configuration/get_config.py b/src/graph_notebook/configuration/get_config.py index 5a8dad1e..e01f23eb 100644 --- a/src/graph_notebook/configuration/get_config.py +++ b/src/graph_notebook/configuration/get_config.py @@ -6,9 +6,10 @@ import json from graph_notebook.configuration.generate_config import DEFAULT_CONFIG_LOCATION, Configuration, AuthModeEnum, \ - SparqlSection, GremlinSection, Neo4JSection + SparqlSection, GremlinSection, Neo4JSection, MemgraphSection from graph_notebook.neptune.client import NEPTUNE_CONFIG_HOST_IDENTIFIERS, is_allowed_neptune_host, false_str_variants, \ - DEFAULT_NEO4J_USERNAME, DEFAULT_NEO4J_PASSWORD, DEFAULT_NEO4J_DATABASE + DEFAULT_NEO4J_USERNAME, DEFAULT_NEO4J_PASSWORD, DEFAULT_NEO4J_DATABASE, DEFAULT_MEMGRAPH_USERNAME, DEFAULT_MEMGRAPH_PASSWORD, \ + DEFAULT_MEMGRAPH_DATABASE neptune_params = ['auth_mode', 'load_from_s3_arn', 'aws_region'] @@ -21,6 +22,7 @@ def get_config_from_dict(data: dict, neptune_hosts: list = NEPTUNE_CONFIG_HOST_I sparql_section = SparqlSection(**data['sparql']) if 'sparql' in data else SparqlSection('') gremlin_section = GremlinSection(**data['gremlin']) if 'gremlin' in data else GremlinSection() neo4j_section = Neo4JSection(**data['neo4j']) if 'neo4j' in data else Neo4JSection('', '', True, '') + memgraph_section = (MemgraphSection(**data["memgraph"]) if "memgraph" in data else MemgraphSection("", "", False, "")) proxy_host = str(data['proxy_host']) if 'proxy_host' in data else '' proxy_port = int(data['proxy_port']) if 'proxy_port' in data else 8182 @@ -34,10 +36,19 @@ def get_config_from_dict(data: dict, neptune_hosts: list = NEPTUNE_CONFIG_HOST_I print('Ignoring Neo4J custom authentication, Amazon Neptune does not support this functionality.\n') if neo4j_section.to_dict()['database'] != DEFAULT_NEO4J_DATABASE: print('Ignoring Neo4J custom database, Amazon Neptune does not support multiple databases.\n') + if memgraph_section.to_dict()["username"] != DEFAULT_MEMGRAPH_USERNAME \ + or memgraph_section.to_dict()["password"] != DEFAULT_MEMGRAPH_PASSWORD: + print( + "Ignoring Memgraph custom authentication, Amazon Neptune does not support this functionality.\n" + ) + if memgraph_section.to_dict()["database"] != DEFAULT_MEMGRAPH_DATABASE: + print( + "Ignoring Memgraph custom database, Amazon Neptune does not support multiple databases.\n" + ) config = Configuration(host=data['host'], port=data['port'], auth_mode=AuthModeEnum(data['auth_mode']), ssl=data['ssl'], ssl_verify=ssl_verify, load_from_s3_arn=data['load_from_s3_arn'], aws_region=data['aws_region'], sparql_section=sparql_section, - gremlin_section=gremlin_section, neo4j_section=neo4j_section, + gremlin_section=gremlin_section, neo4j_section=neo4j_section, memgraph_section=memgraph_section, proxy_host=proxy_host, proxy_port=proxy_port, neptune_hosts=neptune_hosts) else: excluded_params = [] @@ -50,7 +61,7 @@ def get_config_from_dict(data: dict, neptune_hosts: list = NEPTUNE_CONFIG_HOST_I config = Configuration(host=data['host'], port=data['port'], ssl=data['ssl'], ssl_verify=ssl_verify, sparql_section=sparql_section, gremlin_section=gremlin_section, neo4j_section=neo4j_section, - proxy_host=proxy_host, proxy_port=proxy_port) + memgraph_section=memgraph_section, proxy_host=proxy_host, proxy_port=proxy_port) return config diff --git a/src/graph_notebook/magics/graph_magic.py b/src/graph_notebook/magics/graph_magic.py index 1eb3d90f..976bd497 100644 --- a/src/graph_notebook/magics/graph_magic.py +++ b/src/graph_notebook/magics/graph_magic.py @@ -320,7 +320,9 @@ def _generate_client_from_config(self, config: Configuration): .with_gremlin_login(config.gremlin.username, config.gremlin.password) \ .with_gremlin_serializer(config.gremlin.message_serializer) \ .with_neo4j_login(config.neo4j.username, config.neo4j.password, config.neo4j.auth, - config.neo4j.database) + config.neo4j.database) \ + .with_memgraph_login(config.memgraph.username, config.memgraph.password, config.memgraph.auth, + config.memgraph.database) self.client = builder.build() diff --git a/src/graph_notebook/neptune/client.py b/src/graph_notebook/neptune/client.py index b4e868c6..b4239beb 100644 --- a/src/graph_notebook/neptune/client.py +++ b/src/graph_notebook/neptune/client.py @@ -37,6 +37,9 @@ DEFAULT_NEO4J_USERNAME = 'neo4j' DEFAULT_NEO4J_PASSWORD = 'password' DEFAULT_NEO4J_DATABASE = DEFAULT_DATABASE +DEFAULT_MEMGRAPH_USERNAME = "" +DEFAULT_MEMGRAPH_PASSWORD = "" +DEFAULT_MEMGRAPH_DATABASE = "memgraph" NEPTUNE_SERVICE_NAME = 'neptune-db' logger = logging.getLogger('client') @@ -143,6 +146,8 @@ def __init__(self, host: str, port: int = DEFAULT_PORT, ssl: bool = True, ssl_ve gremlin_serializer: str = DEFAULT_GREMLIN_SERIALIZER, neo4j_username: str = DEFAULT_NEO4J_USERNAME, neo4j_password: str = DEFAULT_NEO4J_PASSWORD, neo4j_auth: bool = True, neo4j_database: str = DEFAULT_NEO4J_DATABASE, + memgraph_username: str = DEFAULT_MEMGRAPH_USERNAME, memgraph_password: str = DEFAULT_MEMGRAPH_PASSWORD, + memgraph_auth: bool = False, memgraph_database: str = DEFAULT_MEMGRAPH_DATABASE, auth=None, session: Session = None, proxy_host: str = '', proxy_port: int = DEFAULT_PORT, neptune_hosts: list = None): @@ -161,6 +166,10 @@ def __init__(self, host: str, port: int = DEFAULT_PORT, ssl: bool = True, ssl_ve self.neo4j_password = neo4j_password self.neo4j_auth = neo4j_auth self.neo4j_database = neo4j_database + self.memgraph_username = memgraph_username + self.memgraph_password = memgraph_password + self.memgraph_auth = memgraph_auth + self.memgraph_database = memgraph_database self.region = region self._auth = auth self._session = session @@ -370,6 +379,7 @@ def opencypher_http(self, query: str, headers: dict = None, explain: str = None, res = self._http_session.send(req, verify=self.ssl_verify) return res + # TODO Check this for Memgraph + typo on Cypher def opencyper_bolt(self, query: str, **kwargs): driver = self.get_opencypher_driver() with driver.session(database=self.neo4j_database) as session: @@ -417,11 +427,13 @@ def get_opencypher_driver(self): password = DEFAULT_NEO4J_PASSWORD auth_final = (user, password) else: - if self.neo4j_auth: + # user changed default Memgraph auth to True + if self.memgraph_auth: + auth_final = (self.memgraph_username, self.memgraph_password) + elif self.neo4j_auth: auth_final = (self.neo4j_username, self.neo4j_password) else: auth_final = None - driver = GraphDatabase.driver(url, auth=auth_final, encrypted=self.ssl) return driver @@ -865,6 +877,13 @@ def with_neo4j_login(self, username: str, password: str, auth: bool, database: s self.args['neo4j_database'] = database return ClientBuilder(self.args) + def with_memgraph_login(self, username: str, password: str, auth: bool, database: str): + self.args["memgraph_username"] = username + self.args["memgraph_password"] = password + self.args["memgraph_auth"] = auth + self.args["memgraph_database"] = database + return ClientBuilder(self.args) + def with_tls(self, tls: bool): self.args['ssl'] = tls return ClientBuilder(self.args) diff --git a/src/graph_notebook/notebooks/01-Getting-Started/06-Supply-Chain-Analysis-with-Memgraph.ipynb b/src/graph_notebook/notebooks/01-Getting-Started/06-Supply-Chain-Analysis-with-Memgraph.ipynb new file mode 100644 index 00000000..93146621 --- /dev/null +++ b/src/graph_notebook/notebooks/01-Getting-Started/06-Supply-Chain-Analysis-with-Memgraph.ipynb @@ -0,0 +1,462 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Supply Chain Analysis with Memgraph\n", + "\n", + "## Table of contents \n", + "1. [Introduction](#introduction)\n", + "2. [Connect to Memgraph](#connect-to-memgraph)\n", + "3. [Create the dataset](#create-the-dataset)\n", + "4. [Supply Chain Analysis](#supply-chain-analysis)\n", + " - [Acquiring critical hubs in the network with betweenness centrality](#critical-hubs)\n", + " - [Get ingredients provided by the supplier](#get-ingredients)\n", + " - [Pathfinding for necessary ingredients](#pathfinding)\n", + " - [Checking dependencies of the product with ancestors](#ancestors)\n", + " - [Ancestors graph](#ancestors-graph)\n", + " - [Checking possible products for production with descendants](#descendants)\n", + " - [Descendants graph](#descendants-graph)\n", + " - [Getting the order of execution with topological sort](#topological-sort)\n", + "5. [Conclusion](#conclusion)\n", + "\n", + "## 1. Introduction\n", + "\n", + "In supply chain management, a network of process steps is drawn to minimize product delivery time from production to shipping. Up to this day, optimizations in process steps are mostly carried out by staff members, who can be prone to errors and under-optimized solutions. Moreover, it takes them a reasonable amount of time to design an optimal schedule when they could have been utilized for processes requiring more expertise and knowledge with an automated process scheduling the supply chain.\n", + "\n", + "In this notebook, you'll learn how to start Memgraph, connect to it and run Cypher queries to explore the supply chain and learn more about the power of graphs in that domain. \n", + "\n", + "## 2. Connect to Memgraph\n", + "\n", + "[Memgraph](https://memgraph.com/) is an open-source in-memory graph database built for highly performant and advanced analytical insights. Memgraph is Neo4j Bolt protocol compatible and uses the standardized Cypher query language. \n", + "\n", + "For a quick start, run the following command in your terminal to start the Memgraph Platform in a Docker container: \n", + "\n", + "```\n", + "docker run -it -p 7687:7687 -p 7444:7444 -p 3000:3000 --name memgraph memgraph/memgraph-platform\n", + "```\n", + "\n", + "The above command starts the Memgraph database, MAGE (graph algorithms library) and Memgraph Lab (visual user interface). For additional instructions on setting up and running Memgraph locally, refer to the [Memgraph documentation](https://memgraph.com/docs/memgraph/installation). \n", + "\n", + "For Memgraph < 2.11, in order for the Neo4j driver to work, you need [modify configuration setting](https://memgraph.com/docs/configuration/configuration-settings) `--bolt-server-name-for-init`. When running Memgraph, set `--bolt-server-name-for-init=Neo4j/5.2.0`. If you use other version of Neo4j driver, make sure to put the appropriate version number.\n", + "\n", + "After the local setup of Memgraph is complete, set the following configuration to connect from the Graph Notebook:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "%%graph_notebook_config\n", + "{\n", + " \"host\": \"localhost\",\n", + " \"port\": 7687,\n", + " \"ssl\": false\n", + "}\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Test the connection" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%oc bolt\n", + "MATCH (n)\n", + "RETURN count(n);\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Create the dataset\n", + "\n", + "You can query Memgraph via Bolt protocol designed for efficient communication with graph databases. Memgraph supports versions 1.0, 4.0, 4.1, 4.3, 5.2 of the protocol. Specify the `%%oc bolt` option when submitting queries to the Bolt endpoint.\n", + "\n", + "Before we analyze the dataset, we have to import it. The easiest way to do that with the `graph-notebook` is to run `CREATE` Cypher queries. Once you run the code cell below, the Memgraph database will be populated with a supply chain dataset. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%oc bolt\n", + "CREATE (sup1:Supplier {id: 1, name: \"Supplissimus\"})\n", + "CREATE (sup2:Supplier {id: 2, name: \"Supplionis\"})\n", + "CREATE (sup3:Supplier {id: 3, name: \"MegaSupplies\"})\n", + "CREATE (sup4:Supplier {id: 4, name: \"Supplies4you\"})\n", + "CREATE (ing1:Ingredient {id: 1, name: \"Ingredient 1\"})\n", + "CREATE (ing2:Ingredient {id: 2, name: \"Ingredient 2\"})\n", + "CREATE (ing3:Ingredient {id: 3, name: \"Ingredient 3\"})\n", + "CREATE (ing4:Ingredient {id: 4, name: \"Ingredient 4\"})\n", + "CREATE (ing5:Ingredient {id: 5, name: \"Ingredient 5\"})\n", + "CREATE (ing6:Ingredient {id: 6, name: \"Ingredient 6\"})\n", + "CREATE (ing7:Ingredient {id: 7, name: \"Ingredient 7\"})\n", + "CREATE (ing8:Ingredient {id: 8, name: \"Ingredient 8\"})\n", + "CREATE (ing9:Ingredient {id: 9, name: \"Ingredient 9\"})\n", + "CREATE (ing10:Ingredient {id: 10, name: \"Ingredient 10\"})\n", + "CREATE (pro1:Product {id: 1, name: \"Intermediate product 1\"})\n", + "CREATE (pro2:Product {id: 2, name: \"Intermediate product 2\"})\n", + "CREATE (pro3:Product {id: 3, name: \"Intermediate product 3\"})\n", + "CREATE (pro4:Product {id: 4, name: \"Intermediate product 4\"})\n", + "CREATE (pro5:Product {id: 5, name: \"Intermediate product 5\"})\n", + "CREATE (pro6:FinalProduct:Product {id: 6, name: \"Final product 1\"})\n", + "CREATE (pro7:FinalProduct:Product {id: 7, name: \"Final product 2\"})\n", + "CREATE (pro8:FinalProduct:Product {id: 8, name: \"Final product 3\"})\n", + "CREATE (shi1:Shipping {id: 1, name: \"Shipping point 1\"})\n", + "CREATE (shi2:Shipping {id: 2, name: \"Shipping point 2\"})\n", + "CREATE (rec1:Recipe {id: 1, name: \"Recipe for product 1\"})\n", + "CREATE (rec2:Recipe {id: 2, name: \"Recipe for product 2\"})\n", + "CREATE (rec3:Recipe {id: 3, name: \"Recipe for product 3\"})\n", + "CREATE (rec4:Recipe {id: 4, name: \"Recipe for product 4\"})\n", + "CREATE (rec5:Recipe {id: 5, name: \"Recipe for product 5\"})\n", + "CREATE (rec6:Recipe {id: 6, name: \"Recipe for final product 1\"})\n", + "CREATE (rec7:Recipe {id: 7, name: \"Recipe for final product 2\"})\n", + "CREATE (rec8:Recipe {id: 8, name: \"Recipe for final product 3 - variant 1\"})\n", + "CREATE (rec9:Recipe {id: 9, name: \"Recipe for final product 3 - variant 2\"})\n", + "CREATE (rec10:Recipe {id: 10, name: \"Recipe for final product 3 - variant 3\"})\n", + "CREATE (sup1)-[:SUPPLIES]->(ing1)\n", + "CREATE (sup1)-[:SUPPLIES]->(ing2)\n", + "CREATE (sup1)-[:SUPPLIES]->(ing3)\n", + "CREATE (sup1)-[:SUPPLIES]->(ing4)\n", + "CREATE (sup2)-[:SUPPLIES]->(ing5)\n", + "CREATE (sup2)-[:SUPPLIES]->(ing6)\n", + "CREATE (sup2)-[:SUPPLIES]->(ing7)\n", + "CREATE (sup3)-[:SUPPLIES]->(ing8)\n", + "CREATE (sup3)-[:SUPPLIES]->(ing9)\n", + "CREATE (sup4)-[:SUPPLIES]->(ing10)\n", + "CREATE (pro1)-[:FORMS {quantity: 15}]->(rec6)\n", + "CREATE (pro2)-[:FORMS {quantity: 25}]->(rec6)\n", + "CREATE (pro2)-[:FORMS {quantity: 65}]->(rec7)\n", + "CREATE (pro2)-[:FORMS {quantity: 100}]->(rec9)\n", + "CREATE (pro3)-[:FORMS {quantity: 35}]->(rec6)\n", + "CREATE (pro3)-[:FORMS {quantity: 120}]->(rec7)\n", + "CREATE (pro4)-[:FORMS {quantity: 130}]->(rec7)\n", + "CREATE (pro4)-[:FORMS {quantity: 140}]->(rec8)\n", + "CREATE (pro5)-[:FORMS {quantity: 85}]->(rec8)\n", + "CREATE (ing1)-[:FORMS {quantity: 30}]->(rec1)\n", + "CREATE (ing2)-[:FORMS {quantity: 50}]->(rec1)\n", + "CREATE (ing2)-[:FORMS {quantity: 100}]->(rec2)\n", + "CREATE (ing2)-[:FORMS {quantity: 50}]->(rec10)\n", + "CREATE (ing3)-[:FORMS {quantity: 80}]->(rec1)\n", + "CREATE (ing3)-[:FORMS {quantity: 200}]->(rec2)\n", + "CREATE (ing4)-[:FORMS {quantity: 150}]->(rec2)\n", + "CREATE (ing4)-[:FORMS {quantity: 70}]->(rec10)\n", + "CREATE (ing5)-[:FORMS {quantity: 10}]->(rec3)\n", + "CREATE (ing6)-[:FORMS {quantity: 90}]->(rec3)\n", + "CREATE (ing7)-[:FORMS {quantity: 100}]->(rec3)\n", + "CREATE (ing8)-[:FORMS {quantity: 200}]->(rec3)\n", + "CREATE (ing9)-[:FORMS {quantity: 300}]->(rec4)\n", + "CREATE (ing9)-[:FORMS {quantity: 80}]->(rec5)\n", + "CREATE (ing10)-[:FORMS {quantity: 120}]->(rec4)\n", + "CREATE (ing10)-[:FORMS {quantity: 5}]->(rec5)\n", + "CREATE (ing10)-[:FORMS {quantity: 100}]->(rec9)\n", + "CREATE (pro6)-[:SHIPS_WITH]->(shi1)\n", + "CREATE (pro7)-[:SHIPS_WITH]->(shi1)\n", + "CREATE (pro8)-[:SHIPS_WITH]->(shi2)\n", + "CREATE (rec1)-[:PRODUCES {quantity: 1}]->(pro1)\n", + "CREATE (rec2)-[:PRODUCES {quantity: 1}]->(pro2)\n", + "CREATE (rec3)-[:PRODUCES {quantity: 1}]->(pro3)\n", + "CREATE (rec4)-[:PRODUCES {quantity: 1}]->(pro4)\n", + "CREATE (rec5)-[:PRODUCES {quantity: 1}]->(pro5)\n", + "CREATE (rec6)-[:PRODUCES {quantity: 1}]->(pro6)\n", + "CREATE (rec7)-[:PRODUCES {quantity: 1}]->(pro7)\n", + "CREATE (rec8)-[:PRODUCES {quantity: 1}]->(pro8)\n", + "CREATE (rec9)-[:PRODUCES {quantity: 1}]->(pro8)\n", + "CREATE (rec10)-[:PRODUCES {quantity: 1}]->(pro8)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To ensure the data is stored in Memgraph, head to `localhost:3000` and check out Memgraph Lab, a visual user interface. You can see node and relationship count there, explore, query and visualize data. Besides that, you can head over to the Graph Schema tab to check if the imported data is appropriately modeled.\n", + "\n", + "\"drawing\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Great! The data is imported into Memgraph, and we can start analyzing it!\n", + "\n", + "## 4. Supply Chain Analysis\n", + "\n", + "### Acquiring critical hubs in the network with betweenness centrality\n", + "\n", + "If, at some point, a critical path of the pipeline fails, it could mean that some products won't get constructed. Some pipeline failures don't affect as many products and don't need much attention fixing (if the priority isn't high). Some, on the other hand, need immediate attention. \n", + "\n", + "An algorithm like *betweenness centrality* does just that. It detects hubs on the network based on the number of paths that cross a node from all the pairs of nodes in the graph. \n", + "\n", + "By running the query below, we can see that some Intermediate products, if missing, could result in having all of the final products not produced, which is a massive error in the pipeline, and needs extra care to prevent that from happening (by having some alternative measures, additional monitoring of intermediate product production, etc.).\n", + "\n", + "Memgraph's support of betweenness centrality is done through the **betweenness_centrality_online.set()** method, which also works in streaming examples." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "%%oc bolt\n", + "CALL betweenness_centrality.get() YIELD betweenness_centrality, node\n", + "SET node.centrality = betweenness_centrality;\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%oc bolt\n", + "MATCH (n:Ingredient)-[r]->(m)\n", + "RETURN n, r, m\n", + "ORDER BY n.centrality DESC;\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Get ingredients provided by the supplier\n", + "\n", + "Since a graph database can be the ultimate source of truth between different data sources, it makes sense if all the information about our suppliers is stored in Memgraph.\n", + "\n", + "From there, we can query, for example, which ingredients are supplied by the supplier *Supplissimus*." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%oc bolt\n", + "MATCH (s:Supplier {name:\"Supplissimus\"})-[r:SUPPLIES]->(i:Ingredient)\n", + "RETURN i;\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Pathfinding for necessary ingredients\n", + "\n", + "We have seen a 1-hop query, which is essentially looking for the nearest neighbors in the network.\n", + "\n", + "Memgraph supports graph traversals, e.g., **Breadth-first search (BFS)**. With it, we can see which ingredients are used to form the product with the ID of 6." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%oc bolt\n", + "MATCH p=(i:Ingredient)-[*BFS]->(f:FinalProduct {id:6})\n", + "RETURN p\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Checking dependencies of the product with ancestors\n", + "\n", + "But traversals are not only a part of graph databases, as whole graph algorithms can be exploited on graph storage like Memgraph. \n", + "\n", + "This query determines what happens before the **:FinalProduct** with the ID 6 gets produced. It is done using the **graph_util.ancestors** procedure captures all the nodes from which a path to the destination node (FinalProduct) exists. " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%oc bolt\n", + "MATCH (f:FinalProduct {id:6})\n", + "CALL graph_util.ancestors(f) YIELD ancestors\n", + "UNWIND ancestors AS ancestor\n", + "RETURN ancestor;\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Ancestors graph\n", + "\n", + "The previous procedure has yielded us all the precedent nodes, but it only means a little since we don't know how they are connected. \n", + "\n", + "To connect the nodes, we can use another MAGE extension procedure called **graph_util.connect_nodes**, which will connect the nodes with corresponding relationships between them." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "%%oc bolt\n", + "MATCH (f:FinalProduct {id:6})\n", + "CALL graph_util.ancestors(f) YIELD ancestors\n", + "WITH ancestors + [f] AS nodes\n", + "CALL graph_util.connect_nodes(nodes) YIELD connections\n", + "UNWIND nodes + connections AS graph\n", + "RETURN graph;\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Checking possible products for production with descendants\n", + "\n", + "We might look at the pipeline from the other direction. From the supplier's view, we can see how many products or operations in the pipeline are affected by him. In case he is unavailable, this information could be helpful to minimize the risk.\n", + "\n", + "Just as with ancestors, we use the procedure **graph_util.descendants**, which yields all the nodes to which a path exists from the source node (supplier *Supplissimus* in this case)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "%%oc bolt\n", + "MATCH (s:Supplier {name: \"Supplissimus\"})\n", + "CALL graph_util.descendants(s) YIELD descendants\n", + "UNWIND descendants AS descendant\n", + "RETURN descendant;\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Descendants graph\n", + "\n", + "We do the same as before and connect the nodes with the **graph_util.connect_nodes** procedure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "%%oc bolt\n", + "MATCH (s:Supplier {name: \"Supplissimus\"})\n", + "CALL nxalg.descendants(s) YIELD descendants\n", + "WITH descendants + [s] AS nodes\n", + "CALL graph_util.connect_nodes(nodes) YIELD connections\n", + "UNWIND nodes + connections AS graph\n", + "RETURN graph;\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Getting the order of execution with topological sort\n", + "\n", + "There are cases when some operations can't start before others finish, which causes problems because it blocks the pipeline until the process or a job with no dependencies or bottlenecks finishes. Then, some jobs are released and resolved of their dependencies, and they can start executing again. \n", + "\n", + "In graph theory, that's precisely what topological sort does. It sorts the nodes to yield the ones (jobs, operations, or products) that get executed or produced first, followed by those that can start after the previous ones have started.\n", + "\n", + "For sorting the nodes topologically, we will use **graph_util.topological_sort** procedure." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "%%oc bolt\n", + "MATCH p=(r:Recipe)-[*bfs]->(f:FinalProduct)\n", + "WITH project(p) AS graph\n", + "CALL graph_util.topological_sort(graph) YIELD sorted_nodes\n", + "UNWIND sorted_nodes AS nodes\n", + "RETURN nodes.name;\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 5. Conclusion\n", + "\n", + "Hopefully, you learned about Memgraph, supply chains and how it's intuitive to analyze them with Cypher queries. If you want to understand why graph databases are the future of network resource optimization, head over to [Memgraph's blog post](https://memgraph.com/blog/graphs-databases-are-the-future-for-network-resource-optimization). For any questions regarding this notebook, Cypher, Memgraph or graphs in general, [join our Discord community](https://www.discord.gg/memgraph). \n", + "\n", + "

⬆️ GO TO TOP ⬆️

" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.12" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/test/integration/IntegrationTest.py b/test/integration/IntegrationTest.py index abf7e577..7939206e 100644 --- a/test/integration/IntegrationTest.py +++ b/test/integration/IntegrationTest.py @@ -26,7 +26,8 @@ def setup_client_builder(config: Configuration) -> ClientBuilder: .with_sparql_path(config.sparql.path) \ .with_gremlin_traversal_source(config.gremlin.traversal_source) \ .with_gremlin_serializer(config.gremlin.message_serializer) \ - .with_neo4j_login(config.neo4j.username, config.neo4j.password, config.neo4j.auth, config.neo4j.database) + .with_neo4j_login(config.neo4j.username, config.neo4j.password, config.neo4j.auth, config.neo4j.database) \ + .with_memgraph_login(config.memgraph.username, config.memgraph.password, config.memgraph.auth, config.memgraph.database) if config.auth_mode == AuthModeEnum.IAM: builder = builder.with_iam(get_session()) else: @@ -41,7 +42,8 @@ def setup_client_builder(config: Configuration) -> ClientBuilder: .with_gremlin_traversal_source(config.gremlin.traversal_source) \ .with_gremlin_login(config.gremlin.username, config.gremlin.password) \ .with_gremlin_serializer(config.gremlin.message_serializer) \ - .with_neo4j_login(config.neo4j.username, config.neo4j.password, config.neo4j.auth, config.neo4j.database) + .with_neo4j_login(config.neo4j.username, config.neo4j.password, config.neo4j.auth, config.neo4j.database) \ + .with_memgraph_login(config.memgraph.username, config.memgraph.password, config.memgraph.auth, config.memgraph.database) return builder diff --git a/test/integration/iam/ml/__init__.py b/test/integration/iam/ml/__init__.py index ec2a1f6b..27633ed5 100644 --- a/test/integration/iam/ml/__init__.py +++ b/test/integration/iam/ml/__init__.py @@ -23,6 +23,7 @@ def setup_iam_client(config: Configuration) -> Client: .with_gremlin_login(config.gremlin.username, config.gremlin.password) \ .with_gremlin_serializer(config.gremlin.message_serializer) \ .with_neo4j_login(config.neo4j.username, config.neo4j.password, config.neo4j.auth, config.neo4j.database) \ + .with_memgraph_login(config.memgraph.username, config.memgraph.password, config.memgraph.auth, config.memgraph.database) \ .with_iam(get_session()) \ .build() @@ -40,6 +41,10 @@ def setup_iam_client(config: Configuration) -> Client: assert client.neo4j_password == config.neo4j.password assert client.neo4j_auth == config.neo4j.auth assert client.neo4j_database == config.neo4j.database + assert client.memgraph_username == config.memgraph.username + assert client.memgraph_password == config.memgraph.password + assert client.memgraph_auth == config.memgraph.auth + assert client.memgraph_database == config.memgraph.database assert client.ssl is config.ssl assert client.ssl_verify is config.ssl_verify return client diff --git a/test/integration/iam/notebook/test_open_cypher_graph_notebook.py b/test/integration/iam/notebook/test_open_cypher_graph_notebook.py index 15d28a30..01ecf03c 100644 --- a/test/integration/iam/notebook/test_open_cypher_graph_notebook.py +++ b/test/integration/iam/notebook/test_open_cypher_graph_notebook.py @@ -123,6 +123,12 @@ def test_load_opencypher_config(self): "auth": true, "database": "" } + "memgraph": { + "username": "", + "password": "", + "auth": false, + "database": "memgraph" + } }''' self.ip.run_cell_magic('graph_notebook_config', '', config)