From b165eebba393236d02a953377eb417eb16e7aa63 Mon Sep 17 00:00:00 2001 From: Phil Miesle Date: Thu, 3 Oct 2024 21:13:39 +0100 Subject: [PATCH] adding Download functionality, validating on Windows --- README.md | 12 +- .../2024_Comparison/2024_Comparison.ipynb | 211 ++++++++---------- utils/__init__.py | 2 + utils/download.py | 24 ++ 4 files changed, 129 insertions(+), 120 deletions(-) create mode 100644 utils/download.py diff --git a/README.md b/README.md index c08e062..bcc3f92 100644 --- a/README.md +++ b/README.md @@ -7,4 +7,14 @@ To install a dependency from a repository, we can use commands like: ``` %pip install --force-reinstall git+https://github.com/datastax/ragstack-ai.git@starting-points#subdirectory=libs/knowledge-store %pip install --force-reinstall git+https://github.com/bjchambers/langchain.git@graph-vector-stores-visualization#subdirectory=libs/community -``` \ No newline at end of file +``` + +## Installing + +1. Create a `.env` file in the root folder: +```bash +OPENAI_API_KEY= +ASTRA_DB_DATABASE_ID= +ASTRA_DB_APPLICATION_TOKEN= +``` + diff --git a/notebooks/2024_Comparison/2024_Comparison.ipynb b/notebooks/2024_Comparison/2024_Comparison.ipynb index 9f73d0a..d224ff0 100644 --- a/notebooks/2024_Comparison/2024_Comparison.ipynb +++ b/notebooks/2024_Comparison/2024_Comparison.ipynb @@ -12,90 +12,9 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: python-dotenv<2,>=1.0.1 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from -r ../../common_requirements.txt (line 1)) (1.0.1)\n", - "Requirement already satisfied: langchain-core==0.2.27 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from -r ../../common_requirements.txt (line 2)) (0.2.27)\n", - "Requirement already satisfied: langchain-community==0.2.11 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from -r ../../common_requirements.txt (line 3)) (0.2.11)\n", - "Requirement already satisfied: langchain-openai==0.1.20 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from -r ../../common_requirements.txt (line 4)) (0.1.20)\n", - "Requirement already satisfied: langchainhub==0.1.21 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from -r ../../common_requirements.txt (line 5)) (0.1.21)\n", - "Requirement already satisfied: langsmith==0.1.99 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from -r ../../common_requirements.txt (line 6)) (0.1.99)\n", - "Requirement already satisfied: ragstack-ai-knowledge-store<0.3,>=0.2.1 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from -r ../../common_requirements.txt (line 7)) (0.2.1)\n", - "Requirement already satisfied: simsimd<6.0.0,>=5.0.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from -r ../../common_requirements.txt (line 8)) (5.4.3)\n", - "Requirement already satisfied: tqdm<4.67,>=4.66.4 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from -r ../../common_requirements.txt (line 9)) (4.66.5)\n", - "Requirement already satisfied: networkx in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from -r ../../common_requirements.txt (line 10)) (3.3)\n", - "Requirement already satisfied: langchain_experimental in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from -r requirements.txt (line 3)) (0.0.64)\n", - "Collecting langchain_experimental (from -r requirements.txt (line 3))\n", - " Using cached langchain_experimental-0.3.2-py3-none-any.whl.metadata (1.7 kB)\n", - "Collecting neo4j (from -r requirements.txt (line 4))\n", - " Downloading neo4j-5.25.0-py3-none-any.whl.metadata (5.7 kB)\n", - "Requirement already satisfied: PyYAML>=5.3 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-core==0.2.27->-r ../../common_requirements.txt (line 2)) (6.0.2)\n", - "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-core==0.2.27->-r ../../common_requirements.txt (line 2)) (1.33)\n", - "Requirement already satisfied: packaging<25,>=23.2 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-core==0.2.27->-r ../../common_requirements.txt (line 2)) (24.1)\n", - "Requirement already satisfied: pydantic<3,>=1 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-core==0.2.27->-r ../../common_requirements.txt (line 2)) (2.8.2)\n", - "Requirement already satisfied: tenacity!=8.4.0,<9.0.0,>=8.1.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-core==0.2.27->-r ../../common_requirements.txt (line 2)) (8.5.0)\n", - "Requirement already satisfied: typing-extensions>=4.7 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-core==0.2.27->-r ../../common_requirements.txt (line 2)) (4.12.2)\n", - "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (2.0.32)\n", - "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (3.10.3)\n", - "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (0.6.7)\n", - "Requirement already satisfied: langchain<0.3.0,>=0.2.12 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (0.2.12)\n", - "Requirement already satisfied: numpy<2,>=1 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (1.26.4)\n", - "Requirement already satisfied: requests<3,>=2 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (2.32.3)\n", - "Requirement already satisfied: openai<2.0.0,>=1.32.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-openai==0.1.20->-r ../../common_requirements.txt (line 4)) (1.40.3)\n", - "Requirement already satisfied: tiktoken<1,>=0.7 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain-openai==0.1.20->-r ../../common_requirements.txt (line 4)) (0.7.0)\n", - "Requirement already satisfied: types-requests<3.0.0.0,>=2.31.0.2 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchainhub==0.1.21->-r ../../common_requirements.txt (line 5)) (2.32.0.20240712)\n", - "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langsmith==0.1.99->-r ../../common_requirements.txt (line 6)) (3.10.7)\n", - "Requirement already satisfied: cassio<0.2.0,>=0.1.7 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from ragstack-ai-knowledge-store<0.3,>=0.2.1->-r ../../common_requirements.txt (line 7)) (0.1.8)\n", - "INFO: pip is looking at multiple versions of langchain-experimental to determine which version is compatible with other requirements. This could take a while.\n", - "Collecting langchain_experimental (from -r requirements.txt (line 3))\n", - " Using cached langchain_experimental-0.3.1.post1-py3-none-any.whl.metadata (1.7 kB)\n", - " Using cached langchain_experimental-0.3.1-py3-none-any.whl.metadata (1.7 kB)\n", - " Using cached langchain_experimental-0.3.0-py3-none-any.whl.metadata (1.7 kB)\n", - " Using cached langchain_experimental-0.0.65-py3-none-any.whl.metadata (1.7 kB)\n", - "Collecting pytz (from neo4j->-r requirements.txt (line 4))\n", - " Downloading pytz-2024.2-py2.py3-none-any.whl.metadata (22 kB)\n", - "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (2.3.5)\n", - "Requirement already satisfied: aiosignal>=1.1.2 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (1.3.1)\n", - "Requirement already satisfied: attrs>=17.3.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (24.2.0)\n", - "Requirement already satisfied: frozenlist>=1.1.1 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (1.4.1)\n", - "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (6.0.5)\n", - "Requirement already satisfied: yarl<2.0,>=1.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (1.9.4)\n", - "Requirement already satisfied: cassandra-driver<4.0.0,>=3.28.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from cassio<0.2.0,>=0.1.7->ragstack-ai-knowledge-store<0.3,>=0.2.1->-r ../../common_requirements.txt (line 7)) (3.29.1)\n", - "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (3.21.3)\n", - "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (0.9.0)\n", - "Requirement already satisfied: jsonpointer>=1.9 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from jsonpatch<2.0,>=1.33->langchain-core==0.2.27->-r ../../common_requirements.txt (line 2)) (3.0.0)\n", - "Requirement already satisfied: langchain-text-splitters<0.3.0,>=0.2.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from langchain<0.3.0,>=0.2.12->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (0.2.2)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from openai<2.0.0,>=1.32.0->langchain-openai==0.1.20->-r ../../common_requirements.txt (line 4)) (4.4.0)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from openai<2.0.0,>=1.32.0->langchain-openai==0.1.20->-r ../../common_requirements.txt (line 4)) (1.9.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from openai<2.0.0,>=1.32.0->langchain-openai==0.1.20->-r ../../common_requirements.txt (line 4)) (0.27.0)\n", - "Requirement already satisfied: jiter<1,>=0.4.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from openai<2.0.0,>=1.32.0->langchain-openai==0.1.20->-r ../../common_requirements.txt (line 4)) (0.5.0)\n", - "Requirement already satisfied: sniffio in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from openai<2.0.0,>=1.32.0->langchain-openai==0.1.20->-r ../../common_requirements.txt (line 4)) (1.3.1)\n", - "Requirement already satisfied: annotated-types>=0.4.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from pydantic<3,>=1->langchain-core==0.2.27->-r ../../common_requirements.txt (line 2)) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.20.1 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from pydantic<3,>=1->langchain-core==0.2.27->-r ../../common_requirements.txt (line 2)) (2.20.1)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from requests<3,>=2->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (3.3.2)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from requests<3,>=2->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (3.7)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from requests<3,>=2->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (2.2.2)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from requests<3,>=2->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (2024.7.4)\n", - "Requirement already satisfied: regex>=2022.1.18 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from tiktoken<1,>=0.7->langchain-openai==0.1.20->-r ../../common_requirements.txt (line 4)) (2024.7.24)\n", - "Requirement already satisfied: geomet<0.3,>=0.1 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from cassandra-driver<4.0.0,>=3.28.0->cassio<0.2.0,>=0.1.7->ragstack-ai-knowledge-store<0.3,>=0.2.1->-r ../../common_requirements.txt (line 7)) (0.2.1.post1)\n", - "Requirement already satisfied: httpcore==1.* in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.32.0->langchain-openai==0.1.20->-r ../../common_requirements.txt (line 4)) (1.0.5)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.32.0->langchain-openai==0.1.20->-r ../../common_requirements.txt (line 4)) (0.14.0)\n", - "Requirement already satisfied: mypy-extensions>=0.3.0 in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community==0.2.11->-r ../../common_requirements.txt (line 3)) (1.0.0)\n", - "Requirement already satisfied: click in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from geomet<0.3,>=0.1->cassandra-driver<4.0.0,>=3.28.0->cassio<0.2.0,>=0.1.7->ragstack-ai-knowledge-store<0.3,>=0.2.1->-r ../../common_requirements.txt (line 7)) (8.1.7)\n", - "Requirement already satisfied: six in /Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages (from geomet<0.3,>=0.1->cassandra-driver<4.0.0,>=3.28.0->cassio<0.2.0,>=0.1.7->ragstack-ai-knowledge-store<0.3,>=0.2.1->-r ../../common_requirements.txt (line 7)) (1.16.0)\n", - "Downloading neo4j-5.25.0-py3-none-any.whl (296 kB)\n", - "Downloading pytz-2024.2-py2.py3-none-any.whl (508 kB)\n", - "Installing collected packages: pytz, neo4j\n", - "Successfully installed neo4j-5.25.0 pytz-2024.2\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "#@ Install modules\n", "%pip install -U -r requirements.txt" @@ -126,7 +45,25 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "File '../../datasets/wikimultihop/para_with_hyperlink.zip' already exists, skipping download.\n" + ] + } + ], + "source": [ + "from utils import download_file\n", + "download_file(\"https://www.dropbox.com/s/wlhw26kik59wbh8/para_with_hyperlink.zip?dl=1\", \"../../datasets/wikimultihop/para_with_hyperlink.zip\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -148,15 +85,15 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Loaded (but NOT written) 100 in 405.93s\n", - "OpenAI stats: prompt tokens 116180, completion tokens 27081, total cost 0.9871149999999999\n" + "Loaded (but NOT written) 100 in 477.67s\n", + "OpenAI stats: prompt tokens 116180, completion tokens 26145, total cost 0.0\n" ] } ], @@ -183,16 +120,33 @@ " print(f\"OpenAI stats: prompt tokens {cb.prompt_tokens}, completion tokens {cb.completion_tokens}, total cost {cb.total_cost}\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Start a Neo4j Docker instance:\n", + "\n", + "### Unix\n", + "```bash\n", + "docker run -d -p=7474:7474 -p=7687:7687 -e NEO4J_AUTH=neo4j/password -e NEO4J_PLUGINS=\\[\\\"apoc\\\"\\] neo4j\n", + "```\n", + "\n", + "### Powershell\n", + "```bash\n", + "docker run -d -p=7474:7474 -p=7687:7687 -e NEO4J_AUTH=neo4j/password -e NEO4J_PLUGINS='\"[\\\"apoc\\\"]\"' neo4j\n", + "```" + ] + }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Written in 2.39s\n" + "Written in 12.77s\n" ] } ], @@ -203,7 +157,7 @@ "from time import perf_counter\n", "start = perf_counter()\n", "\n", - "entity_centric_store = Neo4jGraph(url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"update-boxer-percent-slang-salad-9579\")\n", + "entity_centric_store = Neo4jGraph(url=\"bolt://localhost:7687\", username=\"neo4j\", password=\"password\")\n", "entity_centric_store.add_graph_documents(graph_documents)\n", "\n", "end = perf_counter()\n", @@ -220,7 +174,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -232,18 +186,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Clearing data...\n", - "Done\n" - ] - } - ], + "outputs": [], "source": [ "#@ Empty the table (optional)\n", "if input(\"clear data(y/N): \").lower() == \"y\":\n", @@ -260,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -271,13 +216,13 @@ "content_centric_store = CassandraGraphVectorStore(\n", " embedding = OpenAIEmbeddings(),\n", " node_table=TABLE_NAME,\n", - " insert_timeout = 1000.0,\n", + " #insert_timeout = 1000.0,\n", ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -308,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -316,7 +261,7 @@ "output_type": "stream", "text": [ "Loading entity-centric data...\n", - "Loaded (and written) in 1.43s\n" + "Loaded (and written) 100 in 1.89s\n" ] } ], @@ -351,14 +296,14 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "/Users/benjamin.chambers/code/graph-vectorstore-examples/.venv/lib/python3.11/site-packages/langchain/hub.py:86: DeprecationWarning: The `langchainhub sdk` is deprecated.\n", + "c:\\Users\\phil\\git\\graph-vectorstore-examples\\.venv\\Lib\\site-packages\\langchain\\hub.py:86: DeprecationWarning: The `langchainhub sdk` is deprecated.\n", "Please use the `langsmith sdk` instead:\n", " pip install langsmith\n", "Use the `pull_prompt` method.\n", @@ -390,7 +335,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -400,7 +345,7 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 26, "metadata": {}, "outputs": [ { @@ -409,23 +354,51 @@ "text": [ "Entity Centric\n", "--------------\n", - "Question 1: When was 'The Circle' released?\n", + "Question 1: When was 'The Circle' released?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.UnknownPropertyKeyWarning} {category: UNRECOGNIZED} {title: The provided property key is not in the database} {description: One of the property names in your query is not available in the database, make sure you didn't misspell it or that the label is available when you run this statement in your application (the missing property name is: title)} {position: line: 2, column: 17, offset: 23} for query: \"cypher\\nMATCH (m:Movie {title: 'The Circle'}) RETURN m.releaseDate\\n\"\n", + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.UnknownLabelWarning} {category: UNRECOGNIZED} {title: The provided label is not in the database.} {description: One of the labels in your query is not available in the database, make sure you didn't misspell it or that the label is available when you run this statement in your application (the missing label name is: Movie)} {position: line: 2, column: 10, offset: 16} for query: \"cypher\\nMATCH (m:Movie {title: 'The Circle'}) RETURN m.releaseDate\\n\"\n", + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.UnknownPropertyKeyWarning} {category: UNRECOGNIZED} {title: The provided property key is not in the database} {description: One of the property names in your query is not available in the database, make sure you didn't misspell it or that the label is available when you run this statement in your application (the missing property name is: releaseDate)} {position: line: 2, column: 48, offset: 54} for query: \"cypher\\nMATCH (m:Movie {title: 'The Circle'}) RETURN m.releaseDate\\n\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "{'query': \"When was 'The Circle' released?\", 'result': \"I don't know the answer.\"}\n", "\n", - "Question 2: Where is Urup located?\n", + "Question 2: Where is Urup located?\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.UnknownPropertyKeyWarning} {category: UNRECOGNIZED} {title: The provided property key is not in the database} {description: One of the property names in your query is not available in the database, make sure you didn't misspell it or that the label is available when you run this statement in your application (the missing property name is: name)} {position: line: 2, column: 11, offset: 17} for query: \"cypher\\nMATCH (n {name: 'Urup'})-[:LOCATED_IN]->(location)\\nRETURN location\\n\"\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "{'query': 'Where is Urup located?', 'result': \"I don't know the answer.\"}\n", - "Entity Centric Time in 2.51s\n", - "OpenAI stats: prompt tokens 7324, completion tokens 80, total cost 0.03782\n", + "Entity Centric Time in 5.36s\n", + "OpenAI stats: prompt tokens 582, completion tokens 61, total cost 0.0\n", "\n", "Content Centric\n", "---------------\n", "Question 1: When was 'The Circle' released?\n", - "'The Circle' was released in 1988.\n", + "The Circle was released in 1988.\n", "\n", "Question 2: Where is Urup located?\n", "Urup is located in Badakhshan Province in north-eastern Afghanistan.\n", - "Content Centric Time in 1.88s\n", - "OpenAI stats: prompt tokens 450, completion tokens 26, total cost 0.00264\n" + "Content Centric Time in 1.96s\n", + "OpenAI stats: prompt tokens 450, completion tokens 24, total cost 0.0\n" ] } ], @@ -510,7 +483,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/utils/__init__.py b/utils/__init__.py index 2839798..3c77643 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -1,5 +1,7 @@ from .env import initialize_environment +from .download import download_file __ALL__ = [ "initialize_environment", + "download_file", ] \ No newline at end of file diff --git a/utils/download.py b/utils/download.py new file mode 100644 index 0000000..0bb1245 --- /dev/null +++ b/utils/download.py @@ -0,0 +1,24 @@ +import os +import requests +import sys + +def download_file(url, destination_file): + # Check if the file already exists + if not os.path.exists(destination_file): + # Ensure the destination directory exists + os.makedirs(os.path.dirname(destination_file), exist_ok=True) + + # Download the file from the given URL + print(f"Downloading the file from {url}...") + response = requests.get(url, stream=True) + + # Save the file to the destination + with open(destination_file, 'wb') as file: + for chunk in response.iter_content(chunk_size=8192): + if chunk: + file.write(chunk) + + print(f"File downloaded and saved as {destination_file}") + else: + print(f"File '{destination_file}' already exists, skipping download.") +