diff --git a/notebooks/integrations/gemini/google-vertex-ai-chat-completion-notebook.ipynb b/notebooks/integrations/gemini/google-vertex-ai-chat-completion-notebook.ipynb new file mode 100644 index 000000000..3f967821a --- /dev/null +++ b/notebooks/integrations/gemini/google-vertex-ai-chat-completion-notebook.ipynb @@ -0,0 +1,639 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "IHR_5ZfW69Mq" + }, + "source": [ + "# Google Vertex AI Chat completion with Elastic\n", + "\n", + "This notebook shows how to use Elastic Inference API to interact with Google Vertex AI models to perform Chat completion tasks.\n", + "\n", + "You will need access to a Google Cloud project and enable the Vertex AI APIs, the GCP console will help you do that, follow the instructions. Please note that costs could derive from the use of Vertex AI.\n", + "\n", + "For more info please refer to https://cloud.google.com/vertex-ai" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eq9qSQkFsa3H" + }, + "source": [ + "# Install dependencies" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KROQ28gAsoqa" + }, + "source": [ + "**Install Python dependencies**\n", + "\n", + "We will use the `elasticsearch` python library to create the inference endpoint and the `requests` library to make HTTP Calls to the Elastic Inference API." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Vrs0aI1fstxJ", + "outputId": "3fc26e2b-e381-4c3d-fc3b-7c0334496229" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting elasticsearch\n", + " Downloading elasticsearch-9.0.1-py3-none-any.whl.metadata (8.5 kB)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (2.32.3)\n", + "Collecting elastic-transport<9,>=8.15.1 (from elasticsearch)\n", + " Downloading elastic_transport-8.17.1-py3-none-any.whl.metadata (3.8 kB)\n", + "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.11/dist-packages (from elasticsearch) (2.9.0.post0)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.11/dist-packages (from elasticsearch) (4.13.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests) (3.4.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests) (2.4.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests) (2025.4.26)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil->elasticsearch) (1.17.0)\n", + "Downloading elasticsearch-9.0.1-py3-none-any.whl (905 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m905.5/905.5 kB\u001b[0m \u001b[31m10.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading elastic_transport-8.17.1-py3-none-any.whl (64 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m65.0/65.0 kB\u001b[0m \u001b[31m2.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: elastic-transport, elasticsearch\n", + "Successfully installed elastic-transport-8.17.1 elasticsearch-9.0.1\n" + ] + } + ], + "source": [ + "!pip install elasticsearch requests" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OAfRaQwisu9j" + }, + "source": [ + "**Import Required Libraries**\n", + "\n", + "Now import the necessary modules, including `requests` for making HTTP calls, `json` for manipulating JSON payloads, and `getpass` for secure input of username, password and API keys.\n", + "\n", + "**In production you want to use a secure secret management to handle your sensitive data like usernames, paswords and API keys.**\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3ukQv4R1s-dc" + }, + "outputs": [], + "source": [ + "from elasticsearch import Elasticsearch, helpers\n", + "from urllib.request import urlopen\n", + "from getpass import getpass\n", + "import json\n", + "import time\n", + "import requests\n", + "from base64 import b64encode" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9GUxCn0qsglg" + }, + "source": [ + "# Create Elastic client and Inference endpoint" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UtXAKUbxtCB1" + }, + "source": [ + "**Instantiate the Elasticsearch Client**\n", + "\n", + "This section sets up your Elasticsearch client. For demonstration purposes, we're using a local Elasticsearch instance with default credentials. Adjust these settings for your specific environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "WQidM7qotF5U" + }, + "outputs": [], + "source": [ + "ELASTIC_USER = getpass(\"ELASTIC USER: \")\n", + "ELASTIC_PASSWORD = getpass(\"ELASTIC PASSWORD: \")\n", + "host = \"\" # use your own host\n", + "\n", + "client = Elasticsearch(\n", + " hosts=[f\"http://{host}/\"],\n", + " basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fGf_duY5tPi2" + }, + "source": [ + "Confirm the client connected by getting its metadata:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-vEgkrzytTy5", + "outputId": "d3b0cf48-315e-4ab7-b33c-1b67372f1ff4" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'name': 'elastic-search-dev-server', 'cluster_name': 'elasticsearch', 'cluster_uuid': '7VG3VX_6SbWnOx8Q2dEMCw', 'version': {'number': '9.1.0-SNAPSHOT', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': 'c14e6020c122d595d8952e4b32edc1cce534441a', 'build_date': '2025-05-30T14:39:15.558708831Z', 'build_snapshot': True, 'lucene_version': '10.2.1', 'minimum_wire_compatibility_version': '8.19.0', 'minimum_index_compatibility_version': '8.0.0'}, 'tagline': 'You Know, for Search'}\n" + ] + } + ], + "source": [ + "print(client.info())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZMLEKVjtUkM" + }, + "source": [ + "**Create an Inference Endpoint using Gemini**\n", + "\n", + "In this step we create the Inference endpoint to allow calling for Chat completion tasks.\n", + "\n", + "For this you will need to get the Service account key file from GCP.\n", + "\n", + "\n", + "**Get the service account credentials**\n", + "\n", + "You will need a SA (Service Account) and its credentials so the Elasticsearch server can access the service.\n", + "\n", + "Go to https://console.cloud.google.com/iam-admin/serviceaccounts\n", + "\n", + " 1. Click the button Create service account\n", + " 2. Write a name that it’s suitable for you.\n", + " 3. Click Create and continue\n", + " 4. Grant the role Vertex AI User.\n", + " 5. Click `Add another role` and then grant the role Service account token creator. This role is needed to allow the SA to generate the necessary access tokens\n", + " 6. Click Done.\n", + "\n", + "After creating the Service account you need to get the JSON key file:\n", + "\n", + "Go to https://console.cloud.google.com/iam-admin/serviceaccounts and click on the SA just created.\n", + "\n", + "Go to the keys tab and click Add key -> Create new key -> JSON -> Click on Create\n", + "\n", + "If you get an error message Service account key creation is disabled your administrator needs to change the organization policy *iam.disableServiceAccountKeyCreation* or grant an exception.\n", + "The service account keys should be downloaded to your PC automatically.\n", + "\n", + "Once you donwload the JSON file, open it with you favorite editor and copy its contents. Paste the contents when prompted on the step below.\n", + "\n", + "\n", + "---\n", + "\n", + "**IMPORTANT**\n", + "\n", + "Note that the use of this service account may have an impact in the GCP billing.\n", + "\n", + "Service account keys can be vulnerable, remember to always:\n", + "\n", + "**KEEP SA KEYS SAFE**\n", + "\n", + "**ENFORCE LEAST PRIVILEGE**\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hkM9wey7rKhQ", + "outputId": "8f7e53b5-c9f0-4bbb-d2fc-416f51476e73" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter Google Service account API key: ··········\n" + ] + } + ], + "source": [ + "GOOGLE_API_KEY = getpass(\"Enter Google Service account API key: \")\n", + "inference_id = \"chat_completion-notebook-test1\" # set the inference ID for the endpoint\n", + "project_id = \"\" # use your GCP project\n", + "location = \"\" # set the location in which Vertex AI models live e.g us-central1\n", + "\n", + "model_id = \"gemini-2.5-flash-preview-05-20\" # choose the model, you could use any model from your Vertex AI." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jlFGs66Fuqth" + }, + "source": [ + "**Generate a Chat Completion Inference**\n", + "\n", + "Using the requests library, create a POST request to the Elastic Stream API for Chat completion inference task." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "rchfBBa-tfHJ", + "outputId": "ed5fae6d-c8b7-40c9-91df-2505e4a25596" + }, + "outputs": [ + { + "data": { + "text/plain": [ + "ObjectApiResponse({'inference_id': 'chat_completion-notebook-test1', 'task_type': 'chat_completion', 'service': 'googlevertexai', 'service_settings': {'project_id': 'lhoet-elastic-demo', 'location': 'us-central1', 'model_id': 'gemini-2.5-flash-preview-05-20', 'rate_limit': {'requests_per_minute': 1000}}})" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "client.inference.put(\n", + " task_type=\"chat_completion\",\n", + " inference_id=inference_id,\n", + " body={\n", + " \"service\": \"googlevertexai\",\n", + " \"service_settings\": {\n", + " \"service_account_json\": GOOGLE_API_KEY,\n", + " \"model_id\": model_id,\n", + " \"location\": location,\n", + " \"project_id\": project_id\n", + " },\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nrlBxff9spM9" + }, + "source": [ + "# Call the Inference API for Chat Completion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NP92IpAJ0usP" + }, + "outputs": [], + "source": [ + "api_key = b64encode(f\"{ELASTIC_USER}:{ELASTIC_PASSWORD}\".encode())\n", + "\n", + "def extract_content(json_data) -> str:\n", + " try:\n", + " data = json.loads(json_data)\n", + " if \"choices\" in data and len(data[\"choices\"]) > 0:\n", + " choice = data[\"choices\"][0]\n", + " if \"delta\" in choice and \"content\" in choice[\"delta\"]:\n", + " return choice[\"delta\"][\"content\"]\n", + " except:\n", + " pass\n", + " return \"\"\n", + "\n", + "def extract_content_sse(chunk: bytearray):\n", + " chunk_str :str = chunk.decode(\"utf-8\")\n", + " _, data = chunk_str.split(\"data: \")\n", + " return extract_content(data)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jlfINrhYuteF", + "outputId": "07aa9171-94f5-462e-8282-fbfd01c8bcaa" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "**Elastic** is a company and a suite of software products known primarily for its **Elastic Stack** (formerly known as the ELK Stack), which is a powerful set of open-source (though now with different licensing tiers) tools designed for search, logging, analytics, and security use cases.\n", + "\n", + "At its core, Elastic provides the technology to make vast amounts of data searchable and actionable in near real-time.\n", + "\n", + "Here's a breakdown of what \"Elastic\" refers to:\n", + "\n", + "1. **Elastic (The Company):**\n", + " Elastic is the company that develops and maintains the Elastic Stack. They offer cloud services (Elastic Cloud) and support for their products.\n", + "\n", + "2. **The Elastic Stack (formerly ELK Stack):**\n", + " This is the flagship offering and the primary reason Elastic is so well-known. It comprises four main components:\n", + "\n", + " * **E**lasticsearch: This is the heart of the stack. It's a distributed, RESTful search and analytics engine built on Apache Lucene. It's designed for high scalability and can index and search massive volumes of data quickly. It's excellent for full-text search, structured search, analytics, and time-series data.\n", + "\n", + " * **L**ogstash: This is a server-side data processing pipeline that ingests data from multiple sources simultaneously, transforms it, and then sends it to a \"stash\" like Elasticsearch. It's used for collecting, parsing, and enriching logs and other event data.\n", + "\n", + " * **K**ibana: This is a powerful open-source data visualization and management tool for Elasticsearch. It provides a user interface to explore, analyze, and visualize data stored in Elasticsearch indices. You can create dashboards, charts, and reports to gain insights.\n", + "\n", + " * **Beats:** These are lightweight, single-purpose data shippers that send data from hundreds or thousands of machines to Logstash or Elasticsearch. Examples include Filebeat (for log files), Metricbeat (for system metrics), Winlogbeat (for Windows event logs), and Packetbeat (for network data). They are designed to be resource-efficient and easy to deploy.\n", + "\n", + "**Key Use Cases and Solutions Powered by Elastic:**\n", + "\n", + "Elastic's technology is incredibly versatile and is used for a wide range of applications:\n", + "\n", + "1. **Observability:**\n", + " * **Log Management:** Centralized logging for applications, infrastructure, and security events.\n", + " * **Metrics Monitoring:** Collecting and analyzing performance metrics from servers, networks, and applications.\n", + " * **APM (Application Performance Monitoring):** Tracing requests through distributed systems to identify bottlenecks and errors.\n", + " * **Uptime Monitoring:** Monitoring the availability and responsiveness of services.\n", + " * *Goal:* To understand the health and performance of your entire technology stack.\n", + "\n", + "2. **Security:**\n", + " * **SIEM (Security Information and Event Management):** Ingesting, analyzing, and correlating security data to detect threats and respond to incidents.\n", + " * **Endpoint Security:** Protecting endpoints (laptops, servers) from various attacks.\n", + " * *Goal:* To identify and mitigate security risks across your organization.\n", + "\n", + "3. **Enterprise Search:**\n", + " * **Website Search:** Powering the search functionality on websites (e.g., e-commerce product search, content search).\n", + " * **Workplace Search:** Providing unified search across internal documents, applications, and knowledge bases for employees.\n", + " * **App Search:** Embedding search capabilities directly into applications.\n", + " * *Goal:* To provide fast, relevant, and powerful search experiences for users, whether internal or external.\n", + "\n", + "**Why is Elastic Popular?**\n", + "\n", + "* **Speed:** It's designed for near real-time search and analytics.\n", + "* **Scalability:** It can easily scale horizontally to handle vast amounts of data and high query loads.\n", + "* **Flexibility:** It can handle various data types (structured, semi-structured, unstructured).\n", + "* **Rich Ecosystem:** The combination of Elasticsearch, Kibana, Logstash, and Beats provides a comprehensive solution for data ingestion, storage, analysis, and visualization.\n", + "* **Powerful Query Language:** Its RESTful API and JSON-based queries are very expressive.\n", + "\n", + "In essence, **Elastic provides the tools and platform to turn raw, diverse data into actionable insights, enabling companies to monitor their systems, secure their environments, and enhance search experiences.**" + ] + } + ], + "source": [ + "url = f\"http://{host}/_inference/chat_completion/{inference_id}/_stream\"\n", + "headers = {\n", + " \"Authorization\": f\"Basic {api_key}\",\n", + " \"Content-Type\": \"application/json\",\n", + "}\n", + "data = {\n", + " \"model\": \"gemini-2.5-flash-preview-05-20\",\n", + " \"messages\": [{\"role\": \"user\", \"content\": \"What is Elastic?\"}],\n", + "}\n", + "\n", + "post_response = requests.post(url, headers=headers, json=data, stream=True)\n", + "\n", + "for chunk in post_response.iter_content(chunk_size=None):\n", + " #extract_content_sse(chunk)\n", + " print(extract_content_sse(chunk), end=\"\")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NPQz0Qmnp0qF" + }, + "source": [ + "**Call the Inference using Tools**\n", + "\n", + "You can also include the usage of tools on chat completion inference tasks." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "i-Oiyak3p6AH", + "outputId": "edffd50f-fd0f-4465-9ea6-60b4ffde079f" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Post inference response:\n", + "event: message\n", + "data: {\"id\":\"W0A3aPaDI6qvgLUP1Lz-8As\",\"choices\":[{\"delta\":{\"role\":\"model\",\"tool_calls\":[{\"index\":0,\"id\":\"get_current_weather\",\"function\":{\"arguments\":\"{\\\"location\\\":\\\"Boston, MA\\\"}\",\"name\":\"get_current_weather\"},\"type\":\"function\"}]},\"finish_reason\":\"STOP\",\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\",\"usage\":{\"completion_tokens\":9,\"prompt_tokens\":45,\"total_tokens\":233}}\n", + "\n", + "event: message\n", + "data: [DONE]\n", + "\n" + ] + } + ], + "source": [ + "url = f\"http://{host}/_inference/chat_completion/{inference_id}/_stream\"\n", + "headers = {\n", + " \"Authorization\": f\"Basic {api_key}\",\n", + " \"Content-Type\": \"application/json\",\n", + "}\n", + "data = {\n", + " \"model\": \"gemini-2.5-flash-preview-05-20\",\n", + " \"messages\": [{\"role\": \"user\", \"content\": \"What is the weather like in Boston today?\"}],\n", + " \"tools\": [\n", + " {\n", + " \"type\": \"function\",\n", + " \"function\": {\n", + " \"name\": \"get_current_weather\",\n", + " \"description\": \"Get the current weather in a given location\",\n", + " \"parameters\": {\n", + " \"type\": \"object\",\n", + " \"properties\": {\n", + " \"location\": {\n", + " \"type\": \"string\",\n", + " \"description\": \"The city and state, e.g. San Francisco, CA\"\n", + " },\n", + " \"unit\": {\n", + " \"type\": \"string\",\n", + " \"enum\": [\"celsius\", \"fahrenheit\"]\n", + " }\n", + " },\n", + " \"required\": [\"location\"]\n", + " }\n", + " }\n", + " }\n", + " ],\n", + " \"tool_choice\": \"auto\"\n", + "}\n", + "\n", + "post_response = requests.post(url, headers=headers, json=data, stream=True)\n", + "\n", + "print(\"Post inference response:\")\n", + "for chunk in post_response.iter_content(chunk_size=None):\n", + " print(chunk.decode(\"utf-8\"), end=\"\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Ntn_iCanrbV7" + }, + "source": [ + "**Calling the chat completion inference task with system messages**\n", + "\n", + "System messages can be included on the messages payload to give the agent more context regarding the conversation." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "aJoZhHSDrilJ", + "outputId": "a11eb4a0-a3dd-4c3d-b46f-1c16c612ce38" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Post inference response:\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\"The \\\"best\\\" time to visit Japan really depends on what you want to experience! Each season offers a unique charm.\\n\\nHere's a breakdown by season to help you decide:\\n\\n1. **Spring (March - May): Generally considered the most popular time.**\\n * **Pros:**\\n \",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\"* **Cherry Blossoms (Sakura):** This is the main draw, usually peaking late March to early April in Tokyo/Kyoto, moving north. It's an incredibly beautiful and iconic experience.\\n * **Mild Temperatures:** Comfortable for sightseeing, generally warm and sunny.\\n * **Beautiful Sc\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\"enery:** Everything is blooming and green.\\n * **Cons:**\\n * **Crowds:** Especially during cherry blossom season, popular spots are extremely crowded.\\n * **Higher Prices:** Flights and accommodation can be significantly more expensive due to demand.\\n * **Golden Week (late\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\" April - early May):** A major Japanese holiday period. Travel within Japan is extremely difficult, crowded, and expensive during this time, and many businesses may be closed.\\n\\n2. **Autumn (September - November): Another highly recommended period.**\\n * **Pros:**\\n * **Fall Foliage (Koyo\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\"):** Stunning vibrant reds, oranges, and yellows across the country, usually peaking late October to early November in Tokyo/Kyoto.\\n * **Comfortable Temperatures:** Crisp, cool, and pleasant weather, ideal for hiking and exploring.\\n * **Clear Skies:** Often sunny and bright.\\n * **Cons\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\":**\\n * **Crowds:** Also very popular, especially for viewing fall foliage, though generally less intense than cherry blossom season.\\n * **Typhoon Season (early September):** The tail end of typhoon season can still bring heavy rains or disruptions, though the risk decreases significantly by mid-September\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\".\\n\\n3. **Summer (June - August): Hot and humid, but great for specific activities.**\\n * **Pros:**\\n * **Festivals (Matsuri):** Many vibrant local festivals (e.g., Gion Matsuri in Kyoto, Nebuta Matsuri in Aomori) take place during\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\" summer.\\n * **Beach/Outdoor Activities:** Good for visiting beaches, hiking in cooler mountain regions (like the Japanese Alps), or exploring Hokkaido (which has milder summers).\\n * **Mount Fuji Climbing Season:** The official climbing season is typically July to early September.\\n * **Cons:**\\n *\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\" **High Heat & Humidity:** Especially in Tokyo, Kyoto, and southern Japan, it can be very hot and uncomfortably humid.\\n * **Rainy Season (Tsuyu):** June is often the rainy season, though it tends to be more consistent drizzle than constant downpours.\\n \",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\"* **Typhoon Season:** August and September are peak typhoon season, which can cause travel disruptions.\\n * **Obon (mid-August):** Another major Japanese holiday similar to Golden Week, causing crowds and price hikes.\\n\\n4. **Winter (December - February): Great for snow\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\" sports and fewer crowds (outside of holidays).**\\n * **Pros:**\\n * **Skiing & Snowboarding:** World-class powder snow in regions like Hokkaido (Niseko, Rusutsu) and the Japanese Alps.\\n * **Winter Illuminations:** Cities are beautifully lit up with elaborate\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\" light displays.\\n * **Fewer Crowds:** Generally less crowded in major cities compared to spring/autumn (excluding New Year's).\\n * **Onsen (Hot Springs):** Perfect for enjoying a relaxing soak in a hot spring, especially with snow around.\\n * **\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\"Sapporo Snow Festival:** A famous snow and ice sculpture festival in February.\\n * **Cons:**\\n * **Cold Temperatures:** Can be very cold, especially in the north and mountainous areas.\\n * **New Year's Holiday (late Dec - early Jan):** Many businesses,\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\" shops, and attractions may be closed for several days around this time. Travel can also be busy.\\n\\n**In summary:**\\n\\n* **For first-timers and general sightseeing:** **Spring (late March-April for blossoms)** or **Autumn (late Oct-Nov for foliage)** are generally recommended for their beautiful scenery and comfortable weather\",\"role\":\"model\"},\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\"}\n", + "\n", + "event: message\n", + "data: {\"id\":\"ckA3aJ74EdTGgLUPhZr8mAI\",\"choices\":[{\"delta\":{\"content\":\".\\n* **For budget travelers and avoiding crowds (somewhat):** Early December or late January/February (avoiding New Year's).\\n* **For snow sports or specific festivals:** Winter or Summer, respectively.\\n\\nConsider what you want to prioritize for your trip to pick your ideal time!\",\"role\":\"model\"},\"finish_reason\":\"STOP\",\"index\":0}],\"model\":\"gemini-2.5-flash-preview-05-20\",\"object\":\"chat.completion.chunk\",\"usage\":{\"completion_tokens\":979,\"prompt_tokens\":33,\"total_tokens\":1349}}\n", + "\n", + "event: message\n", + "data: [DONE]\n", + "\n" + ] + } + ], + "source": [ + "url = f\"http://{host}/_inference/chat_completion/{inference_id}/_stream\"\n", + "headers = {\n", + " \"Authorization\": f\"Basic {api_key}\",\n", + " \"Content-Type\": \"application/json\",\n", + "}\n", + "data = {\n", + " \"model\": \"gemini-2.5-flash-preview-05-20\",\n", + " \"messages\": [\n", + " {\n", + " \"role\": \"system\",\n", + " \"content\": \"You are an AI travel assistant that can read images, call functions, and interpret structured data. Be helpful and accurate.\"\n", + " },\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": \"When is the best time to visit Japan?\"}\n", + " ],\n", + "}\n", + "\n", + "post_response = requests.post(url, headers=headers, json=data, stream=True)\n", + "\n", + "print(\"Post inference response:\")\n", + "for chunk in post_response.iter_content(chunk_size=None):\n", + " print(chunk.decode(\"utf-8\"), end=\"\")" + ] + } + ], + "metadata": { + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff --git a/notebooks/integrations/gemini/google-vertex-ai-completion-notebook.ipynb b/notebooks/integrations/gemini/google-vertex-ai-completion-notebook.ipynb new file mode 100644 index 000000000..5d0258a8e --- /dev/null +++ b/notebooks/integrations/gemini/google-vertex-ai-completion-notebook.ipynb @@ -0,0 +1,598 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "C0Rdc95b07J2" + }, + "source": [ + "# Google Vertex AI Completion with Elastic\n", + "\n", + "This notebook shows how to use Elastic Inference API to interact with Google Vertex AI models to perform completion tasks.\n", + "\n", + "You will need access to a Google Cloud project and enable the Vertex AI APIs, the GCP console will help you do that, follow the instructions. Please note that costs could derive from the use of Vertex AI.\n", + "\n", + "For more info please refer to https://cloud.google.com/vertex-ai\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "eq9qSQkFsa3H" + }, + "source": [ + "# Install dependencies" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "KROQ28gAsoqa" + }, + "source": [ + "**Install Python dependencies**\n", + "\n", + "\n", + "We will use the `elasticsearch` python library to create the inference endpoint and the `requests` library to make HTTP Calls to the Elastic Stream API.\n", + "\n", + "You may choose a different HTTP library." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Vrs0aI1fstxJ", + "outputId": "6669078a-ddeb-44e3-8fd0-904cb614634d" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: elasticsearch in /usr/local/lib/python3.11/dist-packages (9.0.1)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (2.32.3)\n", + "Requirement already satisfied: elastic-transport<9,>=8.15.1 in /usr/local/lib/python3.11/dist-packages (from elasticsearch) (8.17.1)\n", + "Requirement already satisfied: python-dateutil in /usr/local/lib/python3.11/dist-packages (from elasticsearch) (2.9.0.post0)\n", + "Requirement already satisfied: typing-extensions in /usr/local/lib/python3.11/dist-packages (from elasticsearch) (4.13.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests) (3.4.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests) (2.4.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests) (2025.4.26)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil->elasticsearch) (1.17.0)\n" + ] + } + ], + "source": [ + "!pip install elasticsearch requests" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "OAfRaQwisu9j" + }, + "source": [ + "**Import Required Libraries**\n", + "\n", + "Now import the necessary modules, including `requests` for making HTTP calls, `json` for manipulating JSON payloads, and `getpass` for secure input of username, password and API keys.\n", + "\n", + "**In production you want to use a secure secret management to handle your sensitive data like usernames, paswords and API keys.**" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "3ukQv4R1s-dc" + }, + "outputs": [], + "source": [ + "from elasticsearch import Elasticsearch, helpers\n", + "from urllib.request import urlopen\n", + "from getpass import getpass\n", + "import json\n", + "import time\n", + "import requests\n", + "from base64 import b64encode" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "9GUxCn0qsglg" + }, + "source": [ + "# Create Elastic client and Inference endpoint" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "UtXAKUbxtCB1" + }, + "source": [ + "**Instantiate the Elasticsearch Client**\n", + "\n", + "This section sets up your Elasticsearch client. For demonstration purposes, we're using a local Elasticsearch instance with default credentials. Adjust these settings for your specific environment." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "WQidM7qotF5U", + "outputId": "65790699-deea-4d59-d1a1-d061fb510a46" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ELASTIC USER: ··········\n", + "ELASTIC PASSWORD: ··········\n" + ] + } + ], + "source": [ + "ELASTIC_USER = getpass(\"ELASTIC USER: \")\n", + "ELASTIC_PASSWORD = getpass(\"ELASTIC PASSWORD: \")\n", + "host = \"\" # use your Elastic host here\n", + "client = Elasticsearch(\n", + " hosts=[f\"http://{host}/\"],\n", + " basic_auth=(ELASTIC_USER, ELASTIC_PASSWORD),\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fGf_duY5tPi2" + }, + "source": [ + "Confirm the Elsatic client connected by looking at its metadata:\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-vEgkrzytTy5", + "outputId": "a02be608-0a39-4d68-ad53-3d801fb62c3b" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{'name': 'elastic-search-dev-server', 'cluster_name': 'elasticsearch', 'cluster_uuid': 'qJgHGKd8RQKAvtruZO2Qzw', 'version': {'number': '9.1.0-SNAPSHOT', 'build_flavor': 'default', 'build_type': 'tar', 'build_hash': '6cf0c0b3ed79c5f371bd9c2ccd78903f33f913b9', 'build_date': '2025-06-04T15:00:33.105576944Z', 'build_snapshot': True, 'lucene_version': '10.2.1', 'minimum_wire_compatibility_version': '8.19.0', 'minimum_index_compatibility_version': '8.0.0'}, 'tagline': 'You Know, for Search'}\n" + ] + } + ], + "source": [ + "print(client.info())" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "JZMLEKVjtUkM" + }, + "source": [ + "**Create an Inference Endpoint using Gemini**\n", + "\n", + "In this step we create the Inference endpoint to allow calling for Completion tasks.\n", + "\n", + "For this you will need to get the Service account key file from GCP.\n", + "\n", + "\n", + "**Get the service account credentials**\n", + "\n", + "You will need a SA (Service Account) and its credentials so the Elasticsearch server can access the service.\n", + "\n", + "Go to https://console.cloud.google.com/iam-admin/serviceaccounts\n", + "\n", + " 1. Click the button Create service account\n", + " 2. Write a name that it’s suitable for you.\n", + " 3. Click Create and continue\n", + " 4. Grant the role Vertex AI User.\n", + " 5. Click `Add another role` and then grant the role Service account token creator. This role is needed to allow the SA to generate the necessary access tokens\n", + " 6. Click Done.\n", + "\n", + "After creating the Service account you need to get the JSON key file:\n", + "\n", + "Go to https://console.cloud.google.com/iam-admin/serviceaccounts and click on the SA just created.\n", + "\n", + "Go to the keys tab and click Add key -> Create new key -> JSON -> Click on Create\n", + "\n", + "If you get an error message Service account key creation is disabled your administrator needs to change the organization policy *iam.disableServiceAccountKeyCreation* or grant an exception.\n", + "The service account keys should be downloaded to your PC automatically.\n", + "\n", + "Once you donwload the JSON file, open it with you favorite editor and copy its contents. Paste the contents when prompted on the step below.\n", + "\n", + "\n", + "---\n", + "\n", + "**IMPORTANT**\n", + "\n", + "Note that the use of this service account may have an impact in the GCP billing.\n", + "\n", + "Service account keys can be vulnerable, remember to always:\n", + "\n", + "**KEEP SA KEYS SAFE**\n", + "\n", + "**ENFORCE LEAST PRIVILEGE**\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "hkM9wey7rKhQ", + "outputId": "5ec4948e-0173-4ad1-a346-2e9f0f4ff1d6" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Enter Google Service account API key: ··········\n" + ] + } + ], + "source": [ + "GOOGLE_API_KEY = getpass(\"Enter Google Service account API key: \")\n", + "inference_id = \"completion-notebook-test1\" # set the inference ID for the endpoint\n", + "project_id = \"\" # use your GCP project\n", + "location = \"\" # set the region in which the model lives e.g us-central1\n", + "\n", + "model_id = \"gemini-2.5-flash-preview-05-20\" # choose the model, you could use any model from your Vertex AI." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jlFGs66Fuqth" + }, + "source": [ + "**Generate a Completion Inference**\n", + "\n", + "Using the requests library, create a POST request to the Elastic Stream API for Completion inference task." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/", + "height": 356 + }, + "id": "rchfBBa-tfHJ", + "outputId": "d12bcbb7-fe5c-4f45-af4c-bd46f5cb24d1" + }, + "outputs": [ + { + "ename": "BadRequestError", + "evalue": "BadRequestError(400, 'resource_already_exists_exception', 'Inference endpoint [completion-notebook-test1] already exists')", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mBadRequestError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m client.inference.put(\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mtask_type\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"completion\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0minference_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0minference_id\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m body={\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\"service\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"googlevertexai\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/elasticsearch/_sync/client/utils.py\u001b[0m in \u001b[0;36mwrapped\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 413\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 415\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mapi\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 416\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 417\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m \u001b[0;31m# type: ignore[return-value]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/elasticsearch/_sync/client/inference.py\u001b[0m in \u001b[0;36mput\u001b[0;34m(self, inference_id, inference_config, body, task_type, error_trace, filter_path, human, pretty)\u001b[0m\n\u001b[1;32m 419\u001b[0m \u001b[0m__body\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0minference_config\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0minference_config\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0m__headers\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"accept\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"application/json\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"content-type\"\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m\"application/json\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 421\u001b[0;31m return self.perform_request( # type: ignore[return-value]\n\u001b[0m\u001b[1;32m 422\u001b[0m \u001b[0;34m\"PUT\"\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 423\u001b[0m \u001b[0m__path\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/elasticsearch/_sync/client/_base.py\u001b[0m in \u001b[0;36mperform_request\u001b[0;34m(self, method, path, params, headers, body, endpoint_id, path_parts)\u001b[0m\n\u001b[1;32m 420\u001b[0m \u001b[0;31m# Use the internal clients .perform_request() implementation\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 421\u001b[0m \u001b[0;31m# so we take advantage of their transport options.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 422\u001b[0;31m return self._client.perform_request(\n\u001b[0m\u001b[1;32m 423\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 424\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/elasticsearch/_sync/client/_base.py\u001b[0m in \u001b[0;36mperform_request\u001b[0;34m(self, method, path, params, headers, body, endpoint_id, path_parts)\u001b[0m\n\u001b[1;32m 269\u001b[0m \u001b[0mpath_parts\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpath_parts\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 270\u001b[0m ) as otel_span:\n\u001b[0;32m--> 271\u001b[0;31m response = self._perform_request(\n\u001b[0m\u001b[1;32m 272\u001b[0m \u001b[0mmethod\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 273\u001b[0m \u001b[0mpath\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/elasticsearch/_sync/client/_base.py\u001b[0m in \u001b[0;36m_perform_request\u001b[0;34m(self, method, path, params, headers, body, otel_span)\u001b[0m\n\u001b[1;32m 349\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 350\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 351\u001b[0;31m raise HTTP_EXCEPTIONS.get(meta.status, ApiError)(\n\u001b[0m\u001b[1;32m 352\u001b[0m \u001b[0mmessage\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmessage\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmeta\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mmeta\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbody\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mresp_body\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 353\u001b[0m )\n", + "\u001b[0;31mBadRequestError\u001b[0m: BadRequestError(400, 'resource_already_exists_exception', 'Inference endpoint [completion-notebook-test1] already exists')" + ] + } + ], + "source": [ + "client.inference.put(\n", + " task_type=\"completion\",\n", + " inference_id=inference_id,\n", + " body={\n", + " \"service\": \"googlevertexai\",\n", + " \"service_settings\": {\n", + " \"service_account_json\": GOOGLE_API_KEY,\n", + " \"model_id\": model_id,\n", + " \"location\": location,\n", + " \"project_id\": project_id\n", + " },\n", + " },\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nrlBxff9spM9" + }, + "source": [ + "# Call the Inference API for Completion" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "id": "NP92IpAJ0usP" + }, + "outputs": [], + "source": [ + "api_key = b64encode(f\"{ELASTIC_USER}:{ELASTIC_PASSWORD}\".encode())\n", + "\n", + "def extract_content(json_data) -> str:\n", + " try:\n", + " data = json.loads(json_data)\n", + " if \"choices\" in data and len(data[\"choices\"]) > 0:\n", + " choice = data[\"choices\"][0]\n", + " if \"delta\" in choice and \"content\" in choice[\"delta\"]:\n", + " return choice[\"delta\"][\"content\"]\n", + " except:\n", + " pass\n", + " return \"\"\n", + "\n", + "def extract_content_sse(chunk):\n", + " \"\"\"\n", + " Extracts the 'delta' content from an SSE chunk with the specific\n", + " {\"completion\":[{\"delta\":\"...\"}]} structure.\n", + " Handles 'data: [DONE]' messages.\n", + " \"\"\"\n", + " try:\n", + " chunk_str = chunk.decode('utf-8')\n", + " lines = chunk_str.split('\\n')\n", + " extracted_deltas = []\n", + "\n", + " for line in lines:\n", + " line = line.strip()\n", + " if not line:\n", + " continue\n", + "\n", + " if line.startswith('data:'):\n", + " json_data_str = line[len('data:'):].strip()\n", + " if json_data_str == '[DONE]':\n", + " return \"\"\n", + " try:\n", + " data_obj = json.loads(json_data_str)\n", + " if \"completion\" in data_obj and isinstance(data_obj[\"completion\"], list):\n", + " for item in data_obj[\"completion\"]:\n", + " if \"delta\" in item:\n", + " extracted_deltas.append(item[\"delta\"])\n", + " except json.JSONDecodeError:\n", + " pass\n", + " elif line.startswith('event: message'):\n", + " pass\n", + "\n", + " return \"\".join(extracted_deltas)\n", + "\n", + " except Exception as e:\n", + " return \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "jlfINrhYuteF", + "outputId": "5d7c3747-4b8e-4e9e-e02d-f4a3bb91a7d5" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Status Code: 200\n", + "Response Body:\n", + "{\n", + " \"completion\": [\n", + " {\n", + " \"result\": \"The word \\\"elastic\\\" has several meanings, depending on the context. Here are the most common ones:\\n\\n1. **As a Physical Property/Material (most common usage):**\\n * **Definition:** The ability of a material or object to return to its original shape, size, and form after being stretched, compressed, or deformed by an external force.\\n * **Key Idea:** It can deform but then *recovers*.\\n * **Examples:** A rubber band, a spring, a bungee cord, spandex fabric, and even steel (within its elastic limit) are all considered elastic.\\n * **Contrast:** This is contrasted with **plastic** materials, which deform permanently and do not return to their original shape (like clay or play-doh).\\n\\n2. **In Physics and Material Science (Elasticity):**\\n * **Definition:** The intrinsic property of a material that allows it to undergo elastic deformation. When a stress (force per unit area) is applied to an elastic material, it experiences strain (relative deformation), and upon removal of the stress, the material returns to its initial state.\\n * **Key Concepts:**\\n * **Elastic Limit:** The maximum stress a material can withstand without undergoing permanent deformation. Beyond this point, it becomes plastically deformed.\\n * **Hooke's Law:** For many elastic materials (especially springs), the force required to stretch or compress them is directly proportional to the amount of stretch or compression (F = -kx).\\n * **Modulus of Elasticity (Young's Modulus):** A measure of the stiffness of an elastic material.\\n\\n3. **In Economics (Elasticity):**\\n * **Definition:** A measure of the responsiveness of one economic variable to a change in another. It quantifies how much one factor changes in response to a change in another, usually expressed as a percentage.\\n * **Key Ideas:**\\n * **Price Elasticity of Demand:** How much the quantity demanded of a good changes in response to a change in its price.\\n * **Elastic Demand:** A large change in quantity demanded for a small change in price (e.g., luxury goods).\\n * **Inelastic Demand:** A small change in quantity demanded for a large change in price (e.g., essential medicines, gasoline).\\n * **Price Elasticity of Supply:** How much the quantity supplied of a good changes in response to a change in its price.\\n * **Purpose:** Helps businesses and governments understand market behavior and make pricing or policy decisions.\\n\\n4. **Figurative/General Usage:**\\n * **Meaning:** Flexible, adaptable, resilient, or capable of being extended or expanded easily.\\n * **Examples:**\\n * \\\"An **elastic** schedule\\\" means it's flexible and can be adjusted.\\n * \\\"An **elastic** interpretation of the rules\\\" suggests a broad or adaptable reading.\\n * \\\"An **elastic** waistband\\\" refers to clothing that stretches to fit different sizes.\\n\\n5. **In Computing (referring to \\\"Elastic\\\" technologies):**\\n * While not a direct definition of *elastic* itself, many cloud computing services and software products use \\\"elastic\\\" in their names (e.g., Amazon Web Services' **Elastic** Compute Cloud (EC2), **Elasticsearch**, **Elastic** Kubernetes Service).\\n * **Meaning:** In this context, \\\"elastic\\\" signifies the ability to scale resources (compute power, storage, etc.) up or down quickly and automatically in response to changing demand, providing flexibility and efficiency.\\n\\nIn summary, \\\"elastic\\\" most commonly refers to the **ability to stretch or deform and then return to the original shape**. However, depending on the field, it can also describe the **responsiveness of variables** (economics) or the **adaptability and scalability of systems** (computing).\"\n", + " }\n", + " ]\n", + "}\n" + ] + } + ], + "source": [ + "url_completion = f\"http://{host}/_inference/completion/{inference_id}\"\n", + "headers = {\n", + " 'Authorization': f\"Basic {api_key}\",\n", + " 'content-type': 'application/json'\n", + "}\n", + "data_completion = {\n", + " 'input': 'What is elastic?'\n", + "}\n", + "\n", + "try:\n", + " response = requests.post(url_completion, headers=headers, json=data_completion)\n", + " response.raise_for_status()\n", + "\n", + " print(f\"Status Code: {response.status_code}\")\n", + " print(\"Response Body:\")\n", + " print(json.dumps(response.json(), indent=2))\n", + "\n", + "except requests.exceptions.RequestException as e:\n", + " print(f\"Error during regular completion request: {e}\")\n", + " if hasattr(e, 'response') and e.response is not None:\n", + " print(f\"Response content: {e.response.text}\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "NPQz0Qmnp0qF" + }, + "source": [ + "**Call the Inference using Streaming**\n", + "\n", + "The API will stream the LLM response." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "i-Oiyak3p6AH", + "outputId": "4b8d4c79-38f8-4cce-9e7f-8a9734b5a368" + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Status Code (Stream): 200\n", + "Streaming Response:\n", + "Alright, me buckos! Gather 'round, because if you've ever wondered how Mr. Krabs keeps track of every last Krabby Patty, every penny, and every time Plankton tries to steal the secret formula without losing his tiny little mind... well, he probably *should* be using something like **Elastic**!\n", + "\n", + "Imagine **Elastic** not just as a tool, but as the ultimate, super-duper, hyper-organized, all-seeing, all-knowing, digital **Krusty Krab K-S.I.G.H.T. System** (that's Krabs' Super-Insight Generating, High-Tech System, for short!).\n", + "\n", + "Here's how it breaks down, Spongebob style:\n", + "\n", + "---\n", + "\n", + "### **The Problem: Mr. Krabs's Chaotic Krusty Krab**\n", + "\n", + "Mr. Krabs is drowning in data!\n", + "* How many Krabby Patties sold yesterday?\n", + "* Which customers complained about soggy buns?\n", + "* Did Spongebob use too much mustard on batch 37?\n", + "* Is Plankton trying to sneak in *again* through the ventilation shaft, or just the front door?\n", + "* Why is the fryer overheating every Thursday morning?\n", + "\n", + "He's got handwritten notes, scribbled receipts, Spongebob's overly enthusiastic verbal reports, and Squidward's eye-rolls. It's a mess! He can't get any *insights* to make more money or stop Plankton!\n", + "\n", + "### **The Solution: Elastic - The Krusty Krab K-S.I.G.H.T. System!**\n", + "\n", + "Elastic is a company that makes a *suite* of software, like a whole team of specialized helpers for Mr. Krabs. Let's meet the key players:\n", + "\n", + "---\n", + "\n", + "#### 1. **Elasticsearch (The Vault of Infinite Krabby Patty Knowledge)**\n", + "\n", + "* **What it is:** This is the heart of Elastic. It's an incredibly powerful, lightning-fast search and analytics engine. It's not just a place to store data; it's a place where you can find *anything* about that data, super quickly.\n", + "* **Spongebob Lore:** Imagine deep within the Krusty Krab, beyond the freezer and the employee break room, is a massive, meticulously organized, digital **Vault of Infinite Krabby Patty Knowledge.** Every single thing that ever happens at the Krusty Krab gets logged and stored here – but not just stored! It's indexed, cross-referenced, and ready to be searched in a blink.\n", + " * **Example:** Mr. Krabs can instantly type in: \"Show me all patties sold with extra pickles on a Tuesday when it rained\" and BAM! The vault immediately spits out the answer. It can handle millions of Krabby Patty transactions and find just the one he's looking for. It even knows if the bun was toasted to perfection!\n", + "\n", + "#### 2. **Kibana (Mr. Krabs's Giant Dashboard & Spyglass)**\n", + "\n", + "* **What it is:** This is the visualization layer. It lets you explore, visualize, and build dashboards on top of the data stored in Elasticsearch. It's how you make sense of all that information.\n", + "* **Spongebob Lore:** This is Mr. Krabs's fancy, glowing **Giant Dashboard** mounted in his office, right next to his safe! Instead of squinting at piles of receipts, he sees beautiful charts and graphs:\n", + " * \"**Krabby Patty Sales Today:** *Live Count!*\"\n", + " * \"**Customer Satisfaction Rating:** *Current Mood of Bikini Bottom!*\"\n", + " * \"**Ingredient Levels:** *Are We Running Low on Pickles?!*\"\n", + " * \"**Plankton Infiltration Attempts:** *Red Alert!*\"\n", + " * Squidward might even use a smaller version to see how many minutes until his next break, updated in real-time. It's his magical spyglass into the workings of the Krusty Krab!\n", + "\n", + "#### 3. **Beats (Spongebob's Little Helpers)**\n", + "\n", + "* **What it is:** These are lightweight data shippers that collect data from various sources and send it to Elasticsearch. They're like tiny, dedicated data collection agents.\n", + "* **Spongebob Lore:** These are like little, highly specialized **Mini-Spongebobs** or **Tiny Krabby Patty Scouts** scurrying around the Krusty Krab, each with a specific job:\n", + " * **Filebeat:** A tiny Spongebob at the grill, logging every time a patty is cooked (grill temperature, time, who cooked it).\n", + " * **Metricbeat:** A tiny Spongebob with a thermometer and pressure gauge, measuring the fryer's heat, the soda machine's water pressure, and how many bubbles are in the air.\n", + " * **Packetbeat:** A tiny Spongebob hidden under the counter, listening to every order placed and every payment made.\n", + " * **Auditbeat:** A security Spongebob, logging every time someone tries to open the safe or peek at the secret formula.\n", + " * They collect their specific \"notes\" and send them straight to the Vault of Infinite Krabby Patty Knowledge (Elasticsearch)!\n", + "\n", + "#### 4. **Logstash (The Krabby Patty Data Prep Station)**\n", + "\n", + "* **What it is:** This is a data processing pipeline. It can collect data from various sources, transform it (clean it up, enrich it), and then send it to Elasticsearch or other destinations.\n", + "* **Spongebob Lore:** This is the **Krabby Patty Data Prep Station** in the back. Sometimes, the data from the Little Helpers is a bit messy – Spongebob might scribble \"patty good\" or Squidward might just write \"customer annoying.\"\n", + " * Logstash takes these raw notes, cleans them up (e.g., changes \"patty good\" to \"Krabby Patty quality: Excellent\"), adds more context (e.g., \"customer annoyed: reason - too many bubbles in Kelp Shake\"), and makes sure everything is perfectly formatted before it goes into the super-organized Vault. It's the chef that ensures all data is perfectly spiced and prepared!\n", + "\n", + "---\n", + "\n", + "### **Beyond the Basics: Elastic for Krusty Krab Superpowers!**\n", + "\n", + "* **Security (Mr. Krabs's Plankton Alarm System):** Elastic can act as a **Plankton Infiltration Detection System (PIDS)**. By analyzing all the logs and network activity collected by the Beats, it can spot suspicious patterns – like unusual activity near the secret formula vault, or too many failed login attempts to the register. It then screams \"HOO-HAH! Plankton Alert!\" on Mr. Krabs's Kibana dashboard.\n", + "* **Observability (Keeping the Krusty Krab Running Smoothly):** This is about understanding the *health* and *performance* of the entire Krusty Krab operation.\n", + " * **APM (Application Performance Monitoring):** How long does it take from when a customer orders to when they get their patty? Is the cashier slow? Is Spongebob burning too many patties?\n", + " * **Metrics:** How many patties per hour are we making? What's the average wait time?\n", + " * **Traces:** Following a single customer's journey from them walking in the door, ordering, getting their food, and leaving. If something goes wrong (like a dropped Krabby Patty), Elastic can pinpoint exactly where the problem occurred in that journey.\n", + "\n", + "---\n", + "\n", + "So, in a nutshell, **Elastic is a powerful suite of tools that helps you collect, store, search, analyze, and visualize *all sorts* of data from your systems in real-time.** It helps Mr. Krabs turn chaotic information into actionable insights, so he can sell more Krabby Patties, keep his customers happy, and stop Plankton from ever getting his grubby little hands on that secret formula!\n", + "\n", + "Are ya ready, kids? **AYE AYE, CAPTAIN!** That's Elastic!\n", + "--- End of Stream ---\n" + ] + } + ], + "source": [ + "url_stream_completion = f\"http://{host}/_inference/completion/{inference_id}/_stream\"\n", + "headers_stream = {\n", + " 'Authorization': f\"Basic {api_key}\",\n", + " 'content-type': 'application/json'\n", + "}\n", + "data_stream_completion = {\n", + " 'input': 'What is Elastic? (use spongebob lore to explain)'\n", + "}\n", + "\n", + "try:\n", + "\n", + " post_response_stream = requests.post(\n", + " url_stream_completion,\n", + " headers=headers_stream,\n", + " json=data_stream_completion,\n", + " stream=True\n", + " )\n", + " post_response_stream.raise_for_status()\n", + "\n", + " print(f\"Status Code (Stream): {post_response_stream.status_code}\")\n", + " print(\"Streaming Response:\")\n", + "\n", + " # Iterate over the content in chunks\n", + " for chunk in post_response_stream.iter_content(chunk_size=None):\n", + " print(extract_content_sse(chunk), end=\"\")\n", + "\n", + " print(\"\\n--- End of Stream ---\")\n", + "\n", + "except requests.exceptions.RequestException as e:\n", + " print(f\"Error during streaming completion request: {e}\")\n", + " if hasattr(e, 'response') and e.response is not None:\n", + " print(f\"Response content: {e.response.text}\")" + ] + } + ], + "metadata": { + "colab": { + "gpuType": "T4", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +}