From c0986223aaf62c11522e32ba3a0eafe8c0fd42ca Mon Sep 17 00:00:00 2001 From: mhavey Date: Mon, 22 Jan 2024 15:31:54 -0500 Subject: [PATCH 01/25] rebase --- libs/community/tests/unit_tests/graphs/test_imports.py | 1 + libs/community/tests/unit_tests/graphs/test_neptune_graph.py | 1 + 2 files changed, 2 insertions(+) diff --git a/libs/community/tests/unit_tests/graphs/test_imports.py b/libs/community/tests/unit_tests/graphs/test_imports.py index 653d7d540ba5f..202ecefa24997 100644 --- a/libs/community/tests/unit_tests/graphs/test_imports.py +++ b/libs/community/tests/unit_tests/graphs/test_imports.py @@ -6,6 +6,7 @@ "Neo4jGraph", "NebulaGraph", "NeptuneGraph", + "NeptuneRdfGraph", "KuzuGraph", "HugeGraph", "RdfGraph", diff --git a/libs/community/tests/unit_tests/graphs/test_neptune_graph.py b/libs/community/tests/unit_tests/graphs/test_neptune_graph.py index e3d986f2eb04b..3cc961c483727 100644 --- a/libs/community/tests/unit_tests/graphs/test_neptune_graph.py +++ b/libs/community/tests/unit_tests/graphs/test_neptune_graph.py @@ -1,2 +1,3 @@ def test_import() -> None: from langchain_community.graphs import NeptuneGraph # noqa: F401 + from langchain_community.graphs import NeptuneRdfGraph # noqa: F401 From f6c1fba697f4a5c9c0b78e731741e34f9d8474f8 Mon Sep 17 00:00:00 2001 From: mhavey Date: Mon, 22 Jan 2024 15:32:56 -0500 Subject: [PATCH 02/25] rebase --- .../langchain_community/graphs/__init__.py | 2 + .../graphs/neptune_rdf_graph.py | 237 ++++++++++++++++++ 2 files changed, 239 insertions(+) create mode 100644 libs/community/langchain_community/graphs/neptune_rdf_graph.py diff --git a/libs/community/langchain_community/graphs/__init__.py b/libs/community/langchain_community/graphs/__init__.py index bd15f6465d1b4..c1fc640c4b1e7 100644 --- a/libs/community/langchain_community/graphs/__init__.py +++ b/libs/community/langchain_community/graphs/__init__.py @@ -8,6 +8,7 @@ from langchain_community.graphs.nebula_graph import NebulaGraph from langchain_community.graphs.neo4j_graph import Neo4jGraph from langchain_community.graphs.neptune_graph import NeptuneGraph +from langchain_community.graphs.neptune_rdf_graph import NeptuneRdfGraph from langchain_community.graphs.networkx_graph import NetworkxEntityGraph from langchain_community.graphs.ontotext_graphdb_graph import OntotextGraphDBGraph from langchain_community.graphs.rdf_graph import RdfGraph @@ -19,6 +20,7 @@ "Neo4jGraph", "NebulaGraph", "NeptuneGraph", + "NeptuneRdfGraph", "KuzuGraph", "HugeGraph", "RdfGraph", diff --git a/libs/community/langchain_community/graphs/neptune_rdf_graph.py b/libs/community/langchain_community/graphs/neptune_rdf_graph.py new file mode 100644 index 0000000000000..a0a6a472ddc81 --- /dev/null +++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py @@ -0,0 +1,237 @@ +import boto3 +import json +import requests +import urllib.parse + +from botocore.auth import SigV4Auth +from botocore.awsrequest import AWSRequest +from botocore.credentials import ReadOnlyCredentials +from types import SimpleNamespace + +from typing import ( + TYPE_CHECKING, + List, + Optional, +) + +CLASS_QUERY = """ +SELECT DISTINCT ?elem ?com +WHERE { + ?instance a ?elem . + OPTIONAL { ?instance rdf:type/rdfs:subClassOf* ?elem } . + #FILTER (isIRI(?elem)) . + OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")} +} +""" + +REL_QUERY = """ +SELECT DISTINCT ?elem ?com +WHERE { + ?subj ?elem ?obj . + OPTIONAL { + ?elem rdf:type/rdfs:subPropertyOf* ?proptype . + VALUES ?proptype { rdf:Property owl:DatatypeProperty owl:ObjectProperty } . + } . + OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")} +} +""" + +DTPROP_QUERY = """ +SELECT DISTINCT ?elem ?com +WHERE { + ?subj ?elem ?obj . + OPTIONAL { + ?elem rdf:type/rdfs:subPropertyOf* ?proptype . + ?proptype a owl:DatatypeProperty . + } . + OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")} +} +""" + +OPROP_QUERY = """ +SELECT DISTINCT ?elem ?com +WHERE { + ?subj ?elem ?obj . + OPTIONAL { + ?elem rdf:type/rdfs:subPropertyOf* ?proptype . + ?proptype a owl:ObjectProperty . + } . + OPTIONAL { ?elem rdfs:comment ?com filter (lang(?com) = "en")} +} +""" + +ELEM_TYPES = { + 'classes': CLASS_QUERY, + 'rels': REL_QUERY, + 'dtprops': DTPROP_QUERY, + 'oprops': OPROP_QUERY +} + +class NeptuneRdfGraph: + """Neptune wrapper for RDF graph operations. + + Args: + query_endpoint: SPARQL endpoint for Neptune + use_iam_auth: boolean indicating IAM auth is enabled in Neptune cluster + region_name: AWS region required if use_iam_auth is True, e.g., us-west-2 + hide_comments: whether to include ontology comments in schema for prompt + + Example: + .. code-block:: python + + graph = NeptuneRdfGraph( + query_endpoint='', + use_iam_auth=False + ) + schema = graph.get_schema() + + OR + graph = NeptuneRdfGraph( + query_endpoint='', + use_iam_auth=False + ) + schema_elem = graph.get_schema_elements() + ... change schema_elements ... + graph.load_from_schema_elements(schema_elem) + schema = graph.get_schema() + + *Security note*: Make sure that the database connection uses credentials + that are narrowly-scoped to only include necessary permissions. + Failure to do so may result in data corruption or loss, since the calling + code may attempt commands that would result in deletion, mutation + of data if appropriately prompted or reading sensitive data if such + data is present in the database. + The best way to guard against such negative outcomes is to (as appropriate) + limit the permissions granted to the credentials used with this tool. + + See https://python.langchain.com/docs/security for more information. + """ + def __init__( + self, + query_endpoint: str, + use_iam_auth: bool = False, + region_name: Optional[str] = None, + hide_comments: bool = False # we introspect comments, but they might bloat the prompt + ) -> None: + self.use_iam_auth = use_iam_auth + self.region_name = region_name + self.query_endpoint = query_endpoint + self.hide_comments = hide_comments + + # Set schema + self.schema = "" + self.schema_elements = {} + self.load_schema() + + @property + def get_schema(self) -> str: + """ + Returns the schema of the graph database. + """ + return self.schema + + @property + def get_schema_elements(self): + return self.schema_elements + + ''' + Run Neptune query. + ''' + def query( + self, + query: str, + ): + session = boto3.Session() + request_data = { + "query": query + } + data = request_data + request_hdr = None + + if self.use_iam_auth: + credentials = session.get_credentials() + credentials = credentials.get_frozen_credentials() + access_key = credentials.access_key + secret_key = credentials.secret_key + service = 'neptune-db' + session_token = credentials.token + params=None + creds = SimpleNamespace( + access_key=access_key, secret_key=secret_key, token=session_token, region=self.region_name) + request = AWSRequest(method='POST', url=self.query_endpoint, data=data, params=params) + SigV4Auth(creds, service, self.region_name).add_auth(request) + request.headers['Content-Type']= 'application/x-www-form-urlencoded' + request_hdr = request.headers + else: + request_hdr = {} + request_hdr['Content-Type']= 'application/x-www-form-urlencoded' + + queryres = requests.request(method='POST', url=self.query_endpoint, headers=request_hdr, data=data) + json_resp = json.loads(queryres.text) + return json_resp + + ''' + This is a public method that allows the user to create schema from their own + schema_elements. The anticipated use is that the user prunes the introspected schema. + ''' + def load_from_schema_elements(self, schema_elements): + + elemstr={} + for elem in ELEM_TYPES: + reslist = [] + for elemrec in self.schema_elements[elem]: + uri = elemrec['uri'] + local = elemrec['local'] + str = f"<{uri}> ({local})" + if self.hide_comments is False: + str = str + f", {comment}" + reslist.append(str) + elemstr[elem] = ", ".join(reslist) + + self.schema = "".join([ + f"In the following, each IRI is followed by the local name and ", + f"optionally its description in parentheses. \n", + f"The graph supports the following node types:\n", elemstr['classes'], + f"The graph supports the following relationships:\n", elemstr['rels'], + f"The graph supports the following OWL object properties, ", elemstr['dtprops'], + "The graph supports the following OWL data properties, ", elemstr['oprops'] + ]) + + ''' + Private method split URI into prefix and local + ''' + @staticmethod + def _get_local_name(iri: str): + if "#" in iri: + toks = iri.split("#") + return [f"{toks[0]}#", toks[-1]] + elif "/" in iri: + toks = iri.split("/") + return [f"{'/'.join(toks[0:len(toks)-1])}/", toks[-1]] + else: + raise ValueError(f"Unexpected IRI '{iri}', contains neither '#' nor '/'.") + + ''' + Query Neptune to introspect schema. + ''' + def load_schema(self) -> None: + self.schema_elements['distinct_prefixes'] = {} + + for elem in ELEM_TYPES: + items = self.query(ELEM_TYPES[elem]) + reslist = [] + for r in items['results']['bindings']: + uri = r['elem']['value'] + toks = self._get_local_name(uri) + elem_record = {'uri': uri, 'local': toks[1]} + if self.hide_comments == False: + elem_record['comment'] = r['com']['value'] if 'com' in r else "" + reslist.append(elem_record) + if not(toks[0] in self.schema_elements['distinct_prefixes']): + self.schema_elements['distinct_prefixes'][toks[0]] = "y" + + self.schema_elements[elem] = reslist + + self.load_from_schema_elements(self.schema_elements) + + \ No newline at end of file From 29242b16ea318aead4162f1cb5f6f35fc9d45198 Mon Sep 17 00:00:00 2001 From: mhavey Date: Mon, 22 Jan 2024 15:36:15 -0500 Subject: [PATCH 03/25] rebase --- libs/langchain/tests/unit_tests/graphs/test_imports.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/langchain/tests/unit_tests/graphs/test_imports.py b/libs/langchain/tests/unit_tests/graphs/test_imports.py index 5287c42285533..11393b1d25554 100644 --- a/libs/langchain/tests/unit_tests/graphs/test_imports.py +++ b/libs/langchain/tests/unit_tests/graphs/test_imports.py @@ -7,6 +7,7 @@ "Neo4jGraph", "NebulaGraph", "NeptuneGraph", + "NeptuneRdfGraph", "KuzuGraph", "HugeGraph", "RdfGraph", From 4849e094cac556020a841fff019b1d49085be05b Mon Sep 17 00:00:00 2001 From: mhavey Date: Mon, 22 Jan 2024 15:39:05 -0500 Subject: [PATCH 04/25] rebase --- libs/langchain/langchain/graphs/__init__.py | 1 + libs/langchain/langchain/graphs/neptune_rdf_graph.py | 3 +++ 2 files changed, 4 insertions(+) create mode 100644 libs/langchain/langchain/graphs/neptune_rdf_graph.py diff --git a/libs/langchain/langchain/graphs/__init__.py b/libs/langchain/langchain/graphs/__init__.py index 3189534975ee9..327038082e74e 100644 --- a/libs/langchain/langchain/graphs/__init__.py +++ b/libs/langchain/langchain/graphs/__init__.py @@ -33,6 +33,7 @@ def __getattr__(name: str) -> Any: "KuzuGraph", "HugeGraph", "RdfGraph", + "NeptuneRdfGraph", "ArangoGraph", "FalkorDBGraph", ] diff --git a/libs/langchain/langchain/graphs/neptune_rdf_graph.py b/libs/langchain/langchain/graphs/neptune_rdf_graph.py new file mode 100644 index 0000000000000..50e62577cfb65 --- /dev/null +++ b/libs/langchain/langchain/graphs/neptune_rdf_graph.py @@ -0,0 +1,3 @@ +from langchain_community.graphs.neptune_rdf_graph import NeptuneRdfGraph + +__all__ = ["NeptuneRdfGraph"] From 33991bddf078f9e636cf194b714a329b0706641c Mon Sep 17 00:00:00 2001 From: mhavey Date: Mon, 22 Jan 2024 15:56:22 -0500 Subject: [PATCH 05/25] rebase --- .../chains/graph_qa/neptune_sparql.py | 173 ++++++++++++++++++ 1 file changed, 173 insertions(+) create mode 100644 libs/langchain/langchain/chains/graph_qa/neptune_sparql.py diff --git a/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py b/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py new file mode 100644 index 0000000000000..ee481d43d0cf1 --- /dev/null +++ b/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py @@ -0,0 +1,173 @@ +""" +Question answering over an RDF or OWL graph using SPARQL. +""" +from __future__ import annotations + +from typing import Any, Dict, List, Optional + +from langchain_core.language_models import BaseLanguageModel +from langchain_core.prompts.base import BasePromptTemplate +from langchain_core.pydantic_v1 import Field + +from langchain.callbacks.manager import CallbackManagerForChainRun +from langchain.chains.base import Chain +from langchain.chains.graph_qa.prompts import ( + SPARQL_GENERATION_SELECT_PROMPT, + SPARQL_QA_PROMPT, +) +from langchain.chains.llm import LLMChain +from langchain_community.graphs import NeptuneRdfGraph + +from langchain_core.prompts.prompt import PromptTemplate + +INTERMEDIATE_STEPS_KEY = "intermediate_steps" + +XSPARQL_GENERATION_SELECT_TEMPLATE = """Task: Generate a SPARQL SELECT statement for querying a graph database. +For instance, to find all email addresses of John Doe, the following query in backticks would be suitable: +``` +PREFIX foaf: +SELECT ?email +WHERE {{ + ?person foaf:name "John Doe" . + ?person foaf:mbox ?email . +}} +``` +Instructions: +Use only the node types and properties provided in the schema. +Do not use any node types and properties that are not explicitly provided. +Include all necessary prefixes. + +Examples: + +Schema: +{schema} +Note: Be as concise as possible. +Do not include any explanations or apologies in your responses. +Do not respond to any questions that ask for anything else than for you to construct a SPARQL query. +Do not include any text except the SPARQL query generated. + +The question is: +{prompt}""" + +XSPARQL_GENERATION_SELECT_PROMPT = PromptTemplate( + input_variables=["schema", "prompt"], template=XSPARQL_GENERATION_SELECT_TEMPLATE +) + +def extract_sparql(query: str) -> str: + query = query.strip() + querytoks = query.split("```") + if len(querytoks) == 3: + query = querytoks[1] + + if query.startswith("sparql"): + query = query[6:] + elif query.startswith("") and query.endswith(""): + query= query[8:-9] + return query + +class NeptuneSparqlQAChain(Chain): + """ + Question-answering against an RDF or OWL graph by generating SPARQL statements. + """ + + graph: NeptuneRdfGraph = Field(exclude=True) + sparql_generation_select_chain: LLMChain + qa_chain: LLMChain + input_key: str = "query" #: :meta private: + output_key: str = "result" #: :meta private: + top_k: int = 10 + return_intermediate_steps: bool = False + """Whether or not to return the intermediate steps along with the final answer.""" + return_direct: bool = False + """Whether or not to return the result of querying the graph directly.""" + extra_instructions: Optional[str] = None + """Extra instructions by the appended to the query generation prompt.""" + + @property + def input_keys(self) -> List[str]: + return [self.input_key] + + @property + def output_keys(self) -> List[str]: + _output_keys = [self.output_key] + return _output_keys + + @classmethod + def from_llm( + cls, + llm: BaseLanguageModel, + *, + qa_prompt: BasePromptTemplate = SPARQL_QA_PROMPT, + sparql_select_prompt: BasePromptTemplate = XSPARQL_GENERATION_SELECT_PROMPT, + examples: Optional[str] = None, + **kwargs: Any, + ) -> NeptuneSparqlQAChain: + """Initialize from LLM.""" + qa_chain = LLMChain(llm=llm, prompt=qa_prompt) + template_to_use = XSPARQL_GENERATION_SELECT_TEMPLATE + if not(examples is None): + template_to_use = template_to_use.replace( + "Examples:", "Examples: " + examples) + sparql_select_prompt = PromptTemplate( + input_variables=["schema", "prompt"], template=template_to_use) + sparql_generation_select_chain = LLMChain(llm=llm, prompt=sparql_select_prompt) + + return cls( + qa_chain=qa_chain, + sparql_generation_select_chain=sparql_generation_select_chain, + examples=examples, + **kwargs, + ) + + def _call( + self, + inputs: Dict[str, Any], + run_manager: Optional[CallbackManagerForChainRun] = None, + ) -> Dict[str, str]: + """ + Generate SPARQL query, use it to retrieve a response from the gdb and answer + the question. + """ + _run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager() + callbacks = _run_manager.get_child() + prompt = inputs[self.input_key] + + intermediate_steps: List = [] + + generated_sparql = self.sparql_generation_select_chain.run( + {"prompt": prompt, "schema": self.graph.get_schema}, callbacks=callbacks + ) + + # Extract SPARQL + generated_sparql = extract_sparql(generated_sparql) + + _run_manager.on_text("Generated SPARQL:", end="\n", verbose=self.verbose) + _run_manager.on_text( + generated_sparql, color="green", end="\n", verbose=self.verbose + ) + + intermediate_steps.append({"query": generated_sparql}) + + context = self.graph.query(generated_sparql) + + if self.return_direct: + final_result = context + else: + _run_manager.on_text("Full Context:", end="\n", verbose=self.verbose) + _run_manager.on_text( + str(context), color="green", end="\n", verbose=self.verbose + ) + + intermediate_steps.append({"context": context}) + + result = self.qa_chain( + {"prompt": prompt, "context": context}, + callbacks=callbacks, + ) + final_result = result[self.qa_chain.output_key] + + chain_result: Dict[str, Any] = {self.output_key: final_result} + if self.return_intermediate_steps: + chain_result[INTERMEDIATE_STEPS_KEY] = intermediate_steps + + return chain_result From dd1bc24103946c08388b6fbe3ae6c053c063b57b Mon Sep 17 00:00:00 2001 From: mhavey Date: Mon, 22 Jan 2024 16:25:16 -0500 Subject: [PATCH 06/25] rebase --- .../use_cases/graph/neptune_sparql_qa.ipynb | 367 ++++++++++++++++++ 1 file changed, 367 insertions(+) create mode 100644 docs/docs/use_cases/graph/neptune_sparql_qa.ipynb diff --git a/docs/docs/use_cases/graph/neptune_sparql_qa.ipynb b/docs/docs/use_cases/graph/neptune_sparql_qa.ipynb new file mode 100644 index 0000000000000..07fd533ae352e --- /dev/null +++ b/docs/docs/use_cases/graph/neptune_sparql_qa.ipynb @@ -0,0 +1,367 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# SPARQL Langchain with Amazon Neptune\n", + "\n", + "This notebook shows use of LLM to query RDF graph in Amazon Neptune. It uses Langchain.\n", + "\n", + "* Create RDFGraph object that connects to Neptune and introspects its schema\n", + "* Create chain using LLM and RDFGraph. LLM used here is Anthropic Claude 2 via Bedrock\n", + "* Ask questions to LLM. LLM consults schema and calls RDFGraph to execute SPARQL query\n", + "\n", + "Requirements:\n", + "- Neptune 1.2.x cluster accessible from this notebook\n", + "- Python 3.9 or higher kernet\n", + "- For Bedrock access, ensure IAM role has\n", + "\n", + "{\n", + " \"Action\": [\n", + " \"bedrock:ListFoundationModels\",\n", + " \"bedrock:InvokeModel\"\n", + " ],\n", + " \"Resource\": \"*\",\n", + " \"Effect\": \"Allow\"\n", + "}\n", + "\n", + "- S3 bucket for staging in same account/region as Neptune" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Seed W3C organizational data\n", + "W3C org ontology plus some instances. \n", + "\n", + "You will need an S3 bucket in the same region and account. Set STAGE_BUCKET to name of that bucket." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "STAGE_BUCKET=''" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%bash -s \"$STAGE_BUCKET\"\n", + "\n", + "rm -rf data\n", + "mkdir -p data\n", + "cd data\n", + "echo getting org ontology and sample org instances\n", + "wget http://www.w3.org/ns/org.ttl \n", + "wget https://raw.githubusercontent.com/aws-samples/amazon-neptune-ontology-example-blog/main/data/example_org.ttl \n", + "\n", + "echo Copying org ttl to S3\n", + "aws s3 cp org.ttl s3://$1/org.ttl\n", + "aws s3 cp example_org.ttl s3://$1/example_org.ttl\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Bulk-load the org ttl - both ontology and instances" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load -s s3://{STAGE_BUCKET} -f turtle --store-to loadres --run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load_status {loadres['payload']['loadId']} --errors --details" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Setup Chain" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "EXAMPLES=\"\"\"\n", + "\n", + "\n", + "Find organizations.\n", + "\n", + "\n", + "\n", + "PREFIX rdf: \n", + "PREFIX rdfs: \n", + "PREFIX org: \n", + "\n", + "select ?org ?orgName where {{\n", + " ?org rdfs:label ?orgName .\n", + "}} \n", + "\n", + "\n", + "\n", + "Find sites of an organization\n", + "\n", + "\n", + "\n", + "PREFIX rdf: \n", + "PREFIX rdfs: \n", + "PREFIX org: \n", + "\n", + "select ?org ?orgName ?siteName where {{\n", + " ?org rdfs:label ?orgName .\n", + " ?org org:hasSite/rdfs:label ?siteName . \n", + "}} \n", + "\n", + "\n", + "\n", + "Find suborganizations of an organization\n", + "\n", + "\n", + "\n", + "PREFIX rdf: \n", + "PREFIX rdfs: \n", + "PREFIX org: \n", + "\n", + "select ?org ?orgName ?subName where {{\n", + " ?org rdfs:label ?orgName .\n", + " ?org org:hasSubOrganization/rdfs:label ?subName .\n", + "}} \n", + "\n", + "\n", + "\n", + "Find organizational units of an organization\n", + "\n", + "\n", + "\n", + "PREFIX rdf: \n", + "PREFIX rdfs: \n", + "PREFIX org: \n", + "\n", + "select ?org ?orgName ?unitName where {{\n", + " ?org rdfs:label ?orgName .\n", + " ?org org:hasUnit/rdfs:label ?unitName . \n", + "}} \n", + "\n", + "\n", + "\n", + "Find members of an organization. Also find their manager, or the member they report to.\n", + "\n", + "\n", + "\n", + "PREFIX org: \n", + "PREFIX foaf: \n", + "\n", + "select * where {{\n", + " ?person rdf:type foaf:Person .\n", + " ?person org:memberOf ?org .\n", + " OPTIONAL {{ ?person foaf:firstName ?firstName . }}\n", + " OPTIONAL {{ ?person foaf:family_name ?lastName . }}\n", + " OPTIONAL {{ ?person org:reportsTo ??manager }} .\n", + "}}\n", + "\n", + "\n", + "\n", + "\n", + "Find change events, such as mergers and acquisitions, of an organization\n", + "\n", + "\n", + "\n", + "PREFIX org: \n", + "\n", + "select ?event ?prop ?obj where {{\n", + " ?org rdfs:label ?orgName .\n", + " ?event rdf:type org:ChangeEvent .\n", + " ?event org:originalOrganization ?origOrg .\n", + " ?event org:resultingOrganization ?resultingOrg .\n", + "}}\n", + "\n", + "\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import boto3\n", + "from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n", + "from langchain_community.graphs import NeptuneRdfGraph\n", + "from langchain.chat_models import BedrockChat\n", + "from langchain.llms import Bedrock\n", + "\n", + "nep_host = \"\"\n", + "nep_port = \"\"\n", + "nep_region = 'us-east-1' # enter your region\n", + "nep_ep = f\"https://{nep_host}:{nep_port}/sparql\"\n", + "\n", + "graph = None\n", + "graph = NeptuneRdfGraph(\n", + " query_endpoint=nep_ep,\n", + " use_iam_auth=True,\n", + " region_name=nep_region,\n", + " hide_comments=True\n", + ")\n", + "\n", + "elems = graph.get_schema_elements\n", + "# change elems ...\n", + "graph.load_from_schema_elements(elems)\n", + "\n", + "bedrock_client = boto3.client('bedrock-runtime')\n", + "llm = BedrockChat(\n", + " model_id = \"anthropic.claude-v2\",\n", + " client = bedrock_client\n", + ")\n", + "\n", + "chain = NeptuneSparqlQAChain.from_llm(\n", + " llm=llm, graph=graph, examples=EXAMPLES, verbose=True, top_K=10, return_intermediate_steps=True, return_direct=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "graph.get_schema" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "graph.get_schema_elements" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chain" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Ask questions\n", + "Depends on the data we ingested above" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chain.run('''How many organizations are in the graph''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chain.run('''Are there any mergers or acquisitions''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chain.run('''Find organizations''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chain.run('''Find sites of MegaSystems or MegaFinancial''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "chain.run('''Find a member who is manager of one or more members.''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chain.run('''Find five members and who their manager is.''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "chain.run('''Find org units or suborganizations of The Mega Group. What are the sites of those units?''')" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From 8d78d3e2669f4e7360ca9b32540f20f5912a4f54 Mon Sep 17 00:00:00 2001 From: mhavey Date: Fri, 26 Jan 2024 07:02:42 -0500 Subject: [PATCH 07/25] Update neptune_sparql.py updated doc --- .../chains/graph_qa/neptune_sparql.py | 24 +++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py b/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py index ee481d43d0cf1..cb17b7f8b67cc 100644 --- a/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py +++ b/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py @@ -66,8 +66,28 @@ def extract_sparql(query: str) -> str: return query class NeptuneSparqlQAChain(Chain): - """ - Question-answering against an RDF or OWL graph by generating SPARQL statements. + """Chain for question-answering against a Neptune graph + by generating SPARQL statements. + + *Security note*: Make sure that the database connection uses credentials + that are narrowly-scoped to only include necessary permissions. + Failure to do so may result in data corruption or loss, since the calling + code may attempt commands that would result in deletion, mutation + of data if appropriately prompted or reading sensitive data if such + data is present in the database. + The best way to guard against such negative outcomes is to (as appropriate) + limit the permissions granted to the credentials used with this tool. + + See https://python.langchain.com/docs/security for more information. + + Example: + .. code-block:: python + + chain = NeptuneSparqlQAChain.from_llm( + llm=llm, + graph=graph + ) + response = chain.invoke(query) """ graph: NeptuneRdfGraph = Field(exclude=True) From 442f10b5c167cc0fe1170b1846850fb3113027dc Mon Sep 17 00:00:00 2001 From: mhavey Date: Fri, 26 Jan 2024 07:05:14 -0500 Subject: [PATCH 08/25] Update neptune_sparql_qa.ipynb use chain.invoke --- docs/docs/use_cases/graph/neptune_sparql_qa.ipynb | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/docs/use_cases/graph/neptune_sparql_qa.ipynb b/docs/docs/use_cases/graph/neptune_sparql_qa.ipynb index 07fd533ae352e..1156bd7cdeaa0 100644 --- a/docs/docs/use_cases/graph/neptune_sparql_qa.ipynb +++ b/docs/docs/use_cases/graph/neptune_sparql_qa.ipynb @@ -283,7 +283,7 @@ "metadata": {}, "outputs": [], "source": [ - "chain.run('''How many organizations are in the graph''')" + "chain.invoke('''How many organizations are in the graph''')" ] }, { @@ -292,7 +292,7 @@ "metadata": {}, "outputs": [], "source": [ - "chain.run('''Are there any mergers or acquisitions''')" + "chain.invoke('''Are there any mergers or acquisitions''')" ] }, { @@ -301,7 +301,7 @@ "metadata": {}, "outputs": [], "source": [ - "chain.run('''Find organizations''')" + "chain.invoke('''Find organizations''')" ] }, { @@ -310,7 +310,7 @@ "metadata": {}, "outputs": [], "source": [ - "chain.run('''Find sites of MegaSystems or MegaFinancial''')" + "chain.invoke('''Find sites of MegaSystems or MegaFinancial''')" ] }, { @@ -320,7 +320,7 @@ "outputs": [], "source": [ "%%time\n", - "chain.run('''Find a member who is manager of one or more members.''')" + "chain.invoke('''Find a member who is manager of one or more members.''')" ] }, { @@ -329,7 +329,7 @@ "metadata": {}, "outputs": [], "source": [ - "chain.run('''Find five members and who their manager is.''')" + "chain.invoke('''Find five members and who their manager is.''')" ] }, { @@ -339,7 +339,7 @@ "outputs": [], "source": [ "%%time\n", - "chain.run('''Find org units or suborganizations of The Mega Group. What are the sites of those units?''')" + "chain.invoke('''Find org units or suborganizations of The Mega Group. What are the sites of those units?''')" ] } ], From 63a16c6fbb2006d013e1a64f536c8eda80b7d185 Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Fri, 26 Jan 2024 14:54:50 -0800 Subject: [PATCH 09/25] Minor update to kickstart CI. --- .../langchain_community/graphs/neptune_rdf_graph.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libs/community/langchain_community/graphs/neptune_rdf_graph.py b/libs/community/langchain_community/graphs/neptune_rdf_graph.py index a0a6a472ddc81..414a254178761 100644 --- a/libs/community/langchain_community/graphs/neptune_rdf_graph.py +++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py @@ -170,11 +170,12 @@ def query( json_resp = json.loads(queryres.text) return json_resp - ''' - This is a public method that allows the user to create schema from their own - schema_elements. The anticipated use is that the user prunes the introspected schema. - ''' + def load_from_schema_elements(self, schema_elements): + """ + Create schema from schema_elements. Helpful in cases where + introspected schema needs pruning. + """ elemstr={} for elem in ELEM_TYPES: From 5be36367bfe1319300de1c865dc547878869193f Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Mon, 29 Jan 2024 13:26:58 -0800 Subject: [PATCH 10/25] Fixed lint errors. --- .../graphs/neptune_rdf_graph.py | 125 +++++++++--------- .../unit_tests/graphs/test_neptune_graph.py | 6 +- 2 files changed, 69 insertions(+), 62 deletions(-) diff --git a/libs/community/langchain_community/graphs/neptune_rdf_graph.py b/libs/community/langchain_community/graphs/neptune_rdf_graph.py index 414a254178761..2ea1b673f5fa1 100644 --- a/libs/community/langchain_community/graphs/neptune_rdf_graph.py +++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py @@ -1,18 +1,11 @@ -import boto3 import json -import requests -import urllib.parse +from types import SimpleNamespace +from typing import Optional +import boto3 +import requests from botocore.auth import SigV4Auth from botocore.awsrequest import AWSRequest -from botocore.credentials import ReadOnlyCredentials -from types import SimpleNamespace - -from typing import ( - TYPE_CHECKING, - List, - Optional, -) CLASS_QUERY = """ SELECT DISTINCT ?elem ?com @@ -61,12 +54,13 @@ """ ELEM_TYPES = { - 'classes': CLASS_QUERY, - 'rels': REL_QUERY, - 'dtprops': DTPROP_QUERY, - 'oprops': OPROP_QUERY + "classes": CLASS_QUERY, + "rels": REL_QUERY, + "dtprops": DTPROP_QUERY, + "oprops": OPROP_QUERY, } + class NeptuneRdfGraph: """Neptune wrapper for RDF graph operations. @@ -91,7 +85,7 @@ class NeptuneRdfGraph: use_iam_auth=False ) schema_elem = graph.get_schema_elements() - ... change schema_elements ... + ... change schema_elements ... graph.load_from_schema_elements(schema_elem) schema = graph.get_schema() @@ -106,12 +100,13 @@ class NeptuneRdfGraph: See https://python.langchain.com/docs/security for more information. """ + def __init__( self, query_endpoint: str, use_iam_auth: bool = False, region_name: Optional[str] = None, - hide_comments: bool = False # we introspect comments, but they might bloat the prompt + hide_comments: bool = False, ) -> None: self.use_iam_auth = use_iam_auth self.region_name = region_name @@ -122,7 +117,7 @@ def __init__( self.schema = "" self.schema_elements = {} self.load_schema() - + @property def get_schema(self) -> str: """ @@ -133,18 +128,17 @@ def get_schema(self) -> str: @property def get_schema_elements(self): return self.schema_elements - - ''' + + """ Run Neptune query. - ''' + """ + def query( self, query: str, ): session = boto3.Session() - request_data = { - "query": query - } + request_data = {"query": query} data = request_data request_hdr = None @@ -153,54 +147,66 @@ def query( credentials = credentials.get_frozen_credentials() access_key = credentials.access_key secret_key = credentials.secret_key - service = 'neptune-db' + service = "neptune-db" session_token = credentials.token - params=None + params = None creds = SimpleNamespace( - access_key=access_key, secret_key=secret_key, token=session_token, region=self.region_name) - request = AWSRequest(method='POST', url=self.query_endpoint, data=data, params=params) + access_key=access_key, + secret_key=secret_key, + token=session_token, + region=self.region_name, + ) + request = AWSRequest( + method="POST", url=self.query_endpoint, data=data, params=params + ) SigV4Auth(creds, service, self.region_name).add_auth(request) - request.headers['Content-Type']= 'application/x-www-form-urlencoded' + request.headers["Content-Type"] = "application/x-www-form-urlencoded" request_hdr = request.headers else: request_hdr = {} - request_hdr['Content-Type']= 'application/x-www-form-urlencoded' + request_hdr["Content-Type"] = "application/x-www-form-urlencoded" - queryres = requests.request(method='POST', url=self.query_endpoint, headers=request_hdr, data=data) + queryres = requests.request( + method="POST", url=self.query_endpoint, headers=request_hdr, data=data + ) json_resp = json.loads(queryres.text) return json_resp - def load_from_schema_elements(self, schema_elements): """ Create schema from schema_elements. Helpful in cases where introspected schema needs pruning. """ - elemstr={} + elemstr = {} for elem in ELEM_TYPES: reslist = [] for elemrec in self.schema_elements[elem]: - uri = elemrec['uri'] - local = elemrec['local'] + uri = elemrec["uri"] + local = elemrec["local"] str = f"<{uri}> ({local})" if self.hide_comments is False: - str = str + f", {comment}" + str = str + f", {elemrec['comment']}" reslist.append(str) elemstr[elem] = ", ".join(reslist) - self.schema = "".join([ - f"In the following, each IRI is followed by the local name and ", - f"optionally its description in parentheses. \n", - f"The graph supports the following node types:\n", elemstr['classes'], - f"The graph supports the following relationships:\n", elemstr['rels'], - f"The graph supports the following OWL object properties, ", elemstr['dtprops'], - "The graph supports the following OWL data properties, ", elemstr['oprops'] - ]) + self.schema = ( + "In the following, each IRI is followed by the local name and " + "optionally its description in parentheses. \n" + "The graph supports the following node types:\n" + f"{elemstr['classes']}" + "The graph supports the following relationships:\n" + f"{elemstr['rels']}" + "The graph supports the following OWL object properties, " + f"{elemstr['dtprops']}" + "The graph supports the following OWL data properties, " + f"{elemstr['oprops']}" + ) - ''' + """ Private method split URI into prefix and local - ''' + """ + @staticmethod def _get_local_name(iri: str): if "#" in iri: @@ -211,28 +217,27 @@ def _get_local_name(iri: str): return [f"{'/'.join(toks[0:len(toks)-1])}/", toks[-1]] else: raise ValueError(f"Unexpected IRI '{iri}', contains neither '#' nor '/'.") - - ''' + + """ Query Neptune to introspect schema. - ''' + """ + def load_schema(self) -> None: - self.schema_elements['distinct_prefixes'] = {} + self.schema_elements["distinct_prefixes"] = {} for elem in ELEM_TYPES: items = self.query(ELEM_TYPES[elem]) reslist = [] - for r in items['results']['bindings']: - uri = r['elem']['value'] + for r in items["results"]["bindings"]: + uri = r["elem"]["value"] toks = self._get_local_name(uri) - elem_record = {'uri': uri, 'local': toks[1]} - if self.hide_comments == False: - elem_record['comment'] = r['com']['value'] if 'com' in r else "" + elem_record = {"uri": uri, "local": toks[1]} + if not self.hide_comments: + elem_record["comment"] = r["com"]["value"] if "com" in r else "" reslist.append(elem_record) - if not(toks[0] in self.schema_elements['distinct_prefixes']): - self.schema_elements['distinct_prefixes'][toks[0]] = "y" - + if toks[0] not in self.schema_elements["distinct_prefixes"]: + self.schema_elements["distinct_prefixes"][toks[0]] = "y" + self.schema_elements[elem] = reslist self.load_from_schema_elements(self.schema_elements) - - \ No newline at end of file diff --git a/libs/community/tests/unit_tests/graphs/test_neptune_graph.py b/libs/community/tests/unit_tests/graphs/test_neptune_graph.py index 3cc961c483727..6e714a41665a7 100644 --- a/libs/community/tests/unit_tests/graphs/test_neptune_graph.py +++ b/libs/community/tests/unit_tests/graphs/test_neptune_graph.py @@ -1,3 +1,5 @@ def test_import() -> None: - from langchain_community.graphs import NeptuneGraph # noqa: F401 - from langchain_community.graphs import NeptuneRdfGraph # noqa: F401 + from langchain_community.graphs import ( + NeptuneGraph, # noqa: F401 + NeptuneRdfGraph, # noqa: F401 + ) From 39ede91721ed0e54b046c66af1fef606a8cd7b38 Mon Sep 17 00:00:00 2001 From: mhavey Date: Wed, 31 Jan 2024 09:25:36 -0500 Subject: [PATCH 11/25] initialize session only if IAM auth, and up front --- .../langchain_community/graphs/neptune_rdf_graph.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/libs/community/langchain_community/graphs/neptune_rdf_graph.py b/libs/community/langchain_community/graphs/neptune_rdf_graph.py index 2ea1b673f5fa1..76e3694c181d8 100644 --- a/libs/community/langchain_community/graphs/neptune_rdf_graph.py +++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py @@ -113,6 +113,8 @@ def __init__( self.query_endpoint = query_endpoint self.hide_comments = hide_comments + self.session = boto3.Session() if self.use_iam_auth else None + # Set schema self.schema = "" self.schema_elements = {} @@ -137,13 +139,12 @@ def query( self, query: str, ): - session = boto3.Session() request_data = {"query": query} data = request_data request_hdr = None if self.use_iam_auth: - credentials = session.get_credentials() + credentials = self.session.get_credentials() credentials = credentials.get_frozen_credentials() access_key = credentials.access_key secret_key = credentials.secret_key From 184cce3b28157c7d6cfdb6e73555144de615816c Mon Sep 17 00:00:00 2001 From: mhavey Date: Wed, 31 Jan 2024 09:29:09 -0500 Subject: [PATCH 12/25] Update __init__.py Include Neptune Sparql chain --- libs/langchain/langchain/chains/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/libs/langchain/langchain/chains/__init__.py b/libs/langchain/langchain/chains/__init__.py index 2b7ba6ac256bb..b20d3fecb444d 100644 --- a/libs/langchain/langchain/chains/__init__.py +++ b/libs/langchain/langchain/chains/__init__.py @@ -41,6 +41,7 @@ from langchain.chains.graph_qa.kuzu import KuzuQAChain from langchain.chains.graph_qa.nebulagraph import NebulaGraphQAChain from langchain.chains.graph_qa.neptune_cypher import NeptuneOpenCypherQAChain +from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain from langchain.chains.graph_qa.ontotext_graphdb import OntotextGraphDBQAChain from langchain.chains.graph_qa.sparql import GraphSparqlQAChain from langchain.chains.history_aware_retriever import create_history_aware_retriever @@ -116,6 +117,7 @@ "NatBotChain", "NebulaGraphQAChain", "NeptuneOpenCypherQAChain", + "NeptuneSparqlQAChain", "OpenAIModerationChain", "OpenAPIEndpointChain", "QAGenerationChain", From 986ea33d3a98daacadc80feccae41de136fe4d31 Mon Sep 17 00:00:00 2001 From: mhavey Date: Fri, 2 Feb 2024 13:46:36 -0500 Subject: [PATCH 13/25] pr changes --- .../use_cases/graph/graph_sparql_qa.ipynb | 456 ++++++++++-------- 1 file changed, 260 insertions(+), 196 deletions(-) diff --git a/docs/docs/use_cases/graph/graph_sparql_qa.ipynb b/docs/docs/use_cases/graph/graph_sparql_qa.ipynb index b7541f4388ae8..18fd41985b030 100644 --- a/docs/docs/use_cases/graph/graph_sparql_qa.ipynb +++ b/docs/docs/use_cases/graph/graph_sparql_qa.ipynb @@ -2,288 +2,352 @@ "cells": [ { "cell_type": "markdown", - "id": "c94240f5", "metadata": {}, "source": [ - "# GraphSparqlQAChain\n", + "# SPARQL Langchain with Amazon Neptune\n", "\n", - "Graph databases are an excellent choice for applications based on network-like models. To standardize the syntax and semantics of such graphs, the W3C recommends Semantic Web Technologies, cp. [Semantic Web](https://www.w3.org/standards/semanticweb/). [SPARQL](https://www.w3.org/TR/sparql11-query/) serves as a query language analogously to SQL or Cypher for these graphs. This notebook demonstrates the application of LLMs as a natural language interface to a graph database by generating SPARQL.\\\n", - "Disclaimer: To date, SPARQL query generation via LLMs is still a bit unstable. Be especially careful with UPDATE queries, which alter the graph." + "This notebook shows use of LLM to query RDF graph in Amazon Neptune. It uses Langchain.\n", + "\n", + "* Create RDFGraph object that connects to Neptune and introspects its schema\n", + "* Create chain using LLM and RDFGraph. LLM used here is Anthropic Claude 2 via Bedrock\n", + "* Ask questions to LLM. LLM consults schema and calls RDFGraph to execute SPARQL query\n", + "\n", + "Requirements:\n", + "- Neptune 1.2.x cluster accessible from this notebook\n", + "- Python 3.9 or higher kernet\n", + "- For Bedrock access, ensure IAM role has\n", + "\n", + "{\n", + " \"Action\": [\n", + " \"bedrock:ListFoundationModels\",\n", + " \"bedrock:InvokeModel\"\n", + " ],\n", + " \"Resource\": \"*\",\n", + " \"Effect\": \"Allow\"\n", + "}\n", + "\n", + "- S3 bucket for staging in same account/region as Neptune" ] }, { "cell_type": "markdown", - "id": "dbc0ee68", "metadata": {}, "source": [ - "There are several sources you can run queries against, including files on the web, files you have available locally, SPARQL endpoints, e.g., [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page), and [triple stores](https://www.w3.org/wiki/LargeTripleStores)." + "## Seed W3C organizational data\n", + "W3C org ontology plus some instances. \n", + "\n", + "You will need an S3 bucket in the same region and account. Set STAGE_BUCKET to name of that bucket." ] }, { "cell_type": "code", - "execution_count": 1, - "id": "62812aad", - "metadata": { - "pycharm": { - "is_executing": true - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "from langchain.chains import GraphSparqlQAChain\n", - "from langchain_community.graphs import RdfGraph\n", - "from langchain_openai import ChatOpenAI" + "STAGE_BUCKET=''" ] }, { "cell_type": "code", - "execution_count": 8, - "id": "0928915d", - "metadata": { - "pycharm": { - "is_executing": true - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "graph = RdfGraph(\n", - " source_file=\"http://www.w3.org/People/Berners-Lee/card\",\n", - " standard=\"rdf\",\n", - " local_copy=\"test.ttl\",\n", - ")" + "%%bash -s \"$STAGE_BUCKET\"\n", + "\n", + "rm -rf data\n", + "mkdir -p data\n", + "cd data\n", + "echo getting org ontology and sample org instances\n", + "wget http://www.w3.org/ns/org.ttl \n", + "wget https://raw.githubusercontent.com/aws-samples/amazon-neptune-ontology-example-blog/main/data/example_org.ttl \n", + "\n", + "echo Copying org ttl to S3\n", + "aws s3 cp org.ttl s3://$1/org.ttl\n", + "aws s3 cp example_org.ttl s3://$1/example_org.ttl\n" ] }, { "cell_type": "markdown", - "id": "7af596b5", - "metadata": { - "collapsed": false - }, + "metadata": {}, + "source": [ + "Bulk-load the org ttl - both ontology and instances" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%load -s s3://{STAGE_BUCKET} -f turtle --store-to loadres --run" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "Note that providing a `local_file` is necessary for storing changes locally if the source is read-only." + "%load_status {loadres['payload']['loadId']} --errors --details" ] }, { "cell_type": "markdown", - "id": "58c1a8ea", "metadata": {}, "source": [ - "## Refresh graph schema information\n", - "If the schema of the database changes, you can refresh the schema information needed to generate SPARQL queries." + "## Setup Chain" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "4e3de44f", - "metadata": { - "pycharm": { - "is_executing": true - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "graph.load_schema()" + "EXAMPLES=\"\"\"\n", + "\n", + "\n", + "Find organizations.\n", + "\n", + "\n", + "\n", + "PREFIX rdf: \n", + "PREFIX rdfs: \n", + "PREFIX org: \n", + "\n", + "select ?org ?orgName where {{\n", + " ?org rdfs:label ?orgName .\n", + "}} \n", + "\n", + "\n", + "\n", + "Find sites of an organization\n", + "\n", + "\n", + "\n", + "PREFIX rdf: \n", + "PREFIX rdfs: \n", + "PREFIX org: \n", + "\n", + "select ?org ?orgName ?siteName where {{\n", + " ?org rdfs:label ?orgName .\n", + " ?org org:hasSite/rdfs:label ?siteName . \n", + "}} \n", + "\n", + "\n", + "\n", + "Find suborganizations of an organization\n", + "\n", + "\n", + "\n", + "PREFIX rdf: \n", + "PREFIX rdfs: \n", + "PREFIX org: \n", + "\n", + "select ?org ?orgName ?subName where {{\n", + " ?org rdfs:label ?orgName .\n", + " ?org org:hasSubOrganization/rdfs:label ?subName .\n", + "}} \n", + "\n", + "\n", + "\n", + "Find organizational units of an organization\n", + "\n", + "\n", + "\n", + "PREFIX rdf: \n", + "PREFIX rdfs: \n", + "PREFIX org: \n", + "\n", + "select ?org ?orgName ?unitName where {{\n", + " ?org rdfs:label ?orgName .\n", + " ?org org:hasUnit/rdfs:label ?unitName . \n", + "}} \n", + "\n", + "\n", + "\n", + "Find members of an organization. Also find their manager, or the member they report to.\n", + "\n", + "\n", + "\n", + "PREFIX org: \n", + "PREFIX foaf: \n", + "\n", + "select * where {{\n", + " ?person rdf:type foaf:Person .\n", + " ?person org:memberOf ?org .\n", + " OPTIONAL {{ ?person foaf:firstName ?firstName . }}\n", + " OPTIONAL {{ ?person foaf:family_name ?lastName . }}\n", + " OPTIONAL {{ ?person org:reportsTo ??manager }} .\n", + "}}\n", + "\n", + "\n", + "\n", + "\n", + "Find change events, such as mergers and acquisitions, of an organization\n", + "\n", + "\n", + "\n", + "PREFIX org: \n", + "\n", + "select ?event ?prop ?obj where {{\n", + " ?org rdfs:label ?orgName .\n", + " ?event rdf:type org:ChangeEvent .\n", + " ?event org:originalOrganization ?origOrg .\n", + " ?event org:resultingOrganization ?resultingOrg .\n", + "}}\n", + "\n", + "\n", + "\"\"\"" ] }, { "cell_type": "code", - "execution_count": 10, - "id": "1fe76ccd", + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "In the following, each IRI is followed by the local name and optionally its description in parentheses. \n", - "The RDF graph supports the following node types:\n", - " (PersonalProfileDocument, None), (RSAPublicKey, None), (Male, None), (Person, None), (Work, None)\n", - "The RDF graph supports the following relationships:\n", - " (seeAlso, None), (title, None), (mbox_sha1sum, None), (maker, None), (oidcIssuer, None), (publicHomePage, None), (openid, None), (storage, None), (name, None), (country, None), (type, None), (profileHighlightColor, None), (preferencesFile, None), (label, None), (modulus, None), (participant, None), (street2, None), (locality, None), (nick, None), (homepage, None), (license, None), (givenname, None), (street-address, None), (postal-code, None), (street, None), (lat, None), (primaryTopic, None), (fn, None), (location, None), (developer, None), (city, None), (region, None), (member, None), (long, None), (address, None), (family_name, None), (account, None), (workplaceHomepage, None), (title, None), (publicTypeIndex, None), (office, None), (homePage, None), (mbox, None), (preferredURI, None), (profileBackgroundColor, None), (owns, None), (based_near, None), (hasAddress, None), (img, None), (assistant, None), (title, None), (key, None), (inbox, None), (editableProfile, None), (postalCode, None), (weblog, None), (exponent, None), (avatar, None)\n", - "\n" - ] - } - ], + "outputs": [], + "source": [ + "import boto3\n", + "from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n", + "from langchain_community.graphs import NeptuneRdfGraph\n", + "from langchain.chat_models import BedrockChat\n", + "from langchain.llms import Bedrock\n", + "\n", + "nep_host = \"\"\n", + "nep_port = \n", + "nep_region = 'us-east-1' # enter your region\n", + "\n", + "graph = None\n", + "graph = NeptuneRdfGraph(\n", + " host=nep_host,\n", + " port=nep_port,\n", + " use_iam_auth=True,\n", + " region_name=nep_region,\n", + " hide_comments=True\n", + ")\n", + "\n", + "elems = graph.get_schema_elements\n", + "# change elems ...\n", + "graph.load_from_schema_elements(elems)\n", + "\n", + "bedrock_client = boto3.client('bedrock-runtime')\n", + "llm = BedrockChat(\n", + " model_id = \"anthropic.claude-v2\",\n", + " client = bedrock_client\n", + ")\n", + "\n", + "chain = NeptuneSparqlQAChain.from_llm(\n", + " llm=llm, graph=graph, examples=EXAMPLES, verbose=True, top_K=10, return_intermediate_steps=True, return_direct=False)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "graph.get_schema" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "graph.get_schema_elements" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "chain" + ] + }, { "cell_type": "markdown", - "id": "68a3c677", "metadata": {}, "source": [ - "## Querying the graph\n", - "\n", - "Now, you can use the graph SPARQL QA chain to ask questions about the graph." + "## Ask questions\n", + "Depends on the data we ingested above" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "7476ce98", - "metadata": { - "pycharm": { - "is_executing": true - } - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ - "chain = GraphSparqlQAChain.from_llm(\n", - " ChatOpenAI(temperature=0), graph=graph, verbose=True\n", - ")" + "chain.run('''How many organizations are in the graph''')" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "ef8ee27b", - "metadata": { - "pycharm": { - "is_executing": true - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new GraphSparqlQAChain chain...\u001b[0m\n", - "Identified intent:\n", - "\u001b[32;1m\u001b[1;3mSELECT\u001b[0m\n", - "Generated SPARQL:\n", - "\u001b[32;1m\u001b[1;3mPREFIX foaf: \n", - "SELECT ?homepage\n", - "WHERE {\n", - " ?person foaf:name \"Tim Berners-Lee\" .\n", - " ?person foaf:workplaceHomepage ?homepage .\n", - "}\u001b[0m\n", - "Full Context:\n", - "\u001b[32;1m\u001b[1;3m[]\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "\"Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/.\"" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "chain.run(\"What is Tim Berners-Lee's work homepage?\")" + "chain.run('''Are there any mergers or acquisitions''')" ] }, { - "cell_type": "markdown", - "id": "af4b3294", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "## Updating the graph\n", - "\n", - "Analogously, you can update the graph, i.e., insert triples, using natural language." + "chain.run('''Find organizations''')" ] }, { "cell_type": "code", - "execution_count": 14, - "id": "fdf38841", - "metadata": { - "pycharm": { - "is_executing": true - } - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new GraphSparqlQAChain chain...\u001b[0m\n", - "Identified intent:\n", - "\u001b[32;1m\u001b[1;3mUPDATE\u001b[0m\n", - "Generated SPARQL:\n", - "\u001b[32;1m\u001b[1;3mPREFIX foaf: \n", - "INSERT {\n", - " ?person foaf:workplaceHomepage .\n", - "}\n", - "WHERE {\n", - " ?person foaf:name \"Timothy Berners-Lee\" .\n", - "}\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "'Successfully inserted triples into the graph.'" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ - "chain.run(\n", - " \"Save that the person with the name 'Timothy Berners-Lee' has a work homepage at 'http://www.w3.org/foo/bar/'\"\n", - ")" + "chain.run('''Find sites of MegaSystems or MegaFinancial''')" ] }, { - "cell_type": "markdown", - "id": "5e0f7fc1", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "Let's verify the results:" + "%%time\n", + "chain.run('''Find a member who is manager of one or more members.''')" ] }, { "cell_type": "code", - "execution_count": 15, - "id": "f874171b", + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[(rdflib.term.URIRef('https://www.w3.org/'),),\n", - " (rdflib.term.URIRef('http://www.w3.org/foo/bar/'),)]" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "query = (\n", - " \"\"\"PREFIX foaf: \\n\"\"\"\n", - " \"\"\"SELECT ?hp\\n\"\"\"\n", - " \"\"\"WHERE {\\n\"\"\"\n", - " \"\"\" ?person foaf:name \"Timothy Berners-Lee\" . \\n\"\"\"\n", - " \"\"\" ?person foaf:workplaceHomepage ?hp .\\n\"\"\"\n", - " \"\"\"}\"\"\"\n", - ")\n", - "graph.query(query)" + "chain.run('''Find five members and who their manager is.''')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%%time\n", + "chain.run('''Find org units or suborganizations of The Mega Group. What are the sites of those units?''')" ] } ], "metadata": { "kernelspec": { - "display_name": "lc", + "display_name": "Python 3", "language": "python", - "name": "lc" + "name": "python3" }, "language_info": { "codemirror_mode": { @@ -295,9 +359,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.4" + "version": "3.10.8" } }, "nbformat": 4, - "nbformat_minor": 5 -} \ No newline at end of file + "nbformat_minor": 2 +} From 884d59beec0e5828546a8667bae17865636dfb63 Mon Sep 17 00:00:00 2001 From: mhavey Date: Fri, 2 Feb 2024 13:47:49 -0500 Subject: [PATCH 14/25] PR changes --- .../graphs/neptune_rdf_graph.py | 47 ++++++++++--------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/libs/community/langchain_community/graphs/neptune_rdf_graph.py b/libs/community/langchain_community/graphs/neptune_rdf_graph.py index 76e3694c181d8..b49434630f974 100644 --- a/libs/community/langchain_community/graphs/neptune_rdf_graph.py +++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py @@ -65,7 +65,8 @@ class NeptuneRdfGraph: """Neptune wrapper for RDF graph operations. Args: - query_endpoint: SPARQL endpoint for Neptune + host: SPARQL endpoint host for Neptune + port: SPARQL endpoint port for Neptune. Defaults 8182. use_iam_auth: boolean indicating IAM auth is enabled in Neptune cluster region_name: AWS region required if use_iam_auth is True, e.g., us-west-2 hide_comments: whether to include ontology comments in schema for prompt @@ -74,14 +75,16 @@ class NeptuneRdfGraph: .. code-block:: python graph = NeptuneRdfGraph( - query_endpoint='', + host=', + port=, use_iam_auth=False ) schema = graph.get_schema() OR graph = NeptuneRdfGraph( - query_endpoint='', + host=', + port=, use_iam_auth=False ) schema_elem = graph.get_schema_elements() @@ -103,22 +106,23 @@ class NeptuneRdfGraph: def __init__( self, - query_endpoint: str, + host: str, + port: int = 8182, use_iam_auth: bool = False, region_name: Optional[str] = None, hide_comments: bool = False, ) -> None: self.use_iam_auth = use_iam_auth self.region_name = region_name - self.query_endpoint = query_endpoint self.hide_comments = hide_comments + self.query_endpoint=f"https://{host}:{port}/sparql" self.session = boto3.Session() if self.use_iam_auth else None # Set schema self.schema = "" self.schema_elements = {} - self.load_schema() + self._refresh_schema() @property def get_schema(self) -> str: @@ -185,10 +189,10 @@ def load_from_schema_elements(self, schema_elements): for elemrec in self.schema_elements[elem]: uri = elemrec["uri"] local = elemrec["local"] - str = f"<{uri}> ({local})" + resstr = f"<{uri}> ({local})" if self.hide_comments is False: - str = str + f", {elemrec['comment']}" - reslist.append(str) + resstr = resstr + f", {elemrec['comment']}" + reslist.append(resstr) elemstr[elem] = ", ".join(reslist) self.schema = ( @@ -204,18 +208,17 @@ def load_from_schema_elements(self, schema_elements): f"{elemstr['oprops']}" ) - """ - Private method split URI into prefix and local - """ - @staticmethod def _get_local_name(iri: str): + """ + Private method split URI into prefix and local + """ if "#" in iri: - toks = iri.split("#") - return [f"{toks[0]}#", toks[-1]] + tokens = iri.split("#") + return [f"{tokens[0]}#", tokens[-1]] elif "/" in iri: - toks = iri.split("/") - return [f"{'/'.join(toks[0:len(toks)-1])}/", toks[-1]] + tokens = iri.split("/") + return [f"{'/'.join(tokens[0:len(tokens)-1])}/", tokens[-1]] else: raise ValueError(f"Unexpected IRI '{iri}', contains neither '#' nor '/'.") @@ -223,7 +226,7 @@ def _get_local_name(iri: str): Query Neptune to introspect schema. """ - def load_schema(self) -> None: + def _refresh_schema(self) -> None: self.schema_elements["distinct_prefixes"] = {} for elem in ELEM_TYPES: @@ -231,13 +234,13 @@ def load_schema(self) -> None: reslist = [] for r in items["results"]["bindings"]: uri = r["elem"]["value"] - toks = self._get_local_name(uri) - elem_record = {"uri": uri, "local": toks[1]} + tokens = self._get_local_name(uri) + elem_record = {"uri": uri, "local": tokens[1]} if not self.hide_comments: elem_record["comment"] = r["com"]["value"] if "com" in r else "" reslist.append(elem_record) - if toks[0] not in self.schema_elements["distinct_prefixes"]: - self.schema_elements["distinct_prefixes"][toks[0]] = "y" + if tokens[0] not in self.schema_elements["distinct_prefixes"]: + self.schema_elements["distinct_prefixes"][tokens[0]] = "y" self.schema_elements[elem] = reslist From 062ac02b212f5488ccbb6a62b0e56441a06e1f92 Mon Sep 17 00:00:00 2001 From: mhavey Date: Mon, 5 Feb 2024 20:09:15 -0500 Subject: [PATCH 15/25] PR comment --- docs/docs/use_cases/graph/graph_sparql_qa.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/docs/use_cases/graph/graph_sparql_qa.ipynb b/docs/docs/use_cases/graph/graph_sparql_qa.ipynb index 18fd41985b030..26a8f2bde5140 100644 --- a/docs/docs/use_cases/graph/graph_sparql_qa.ipynb +++ b/docs/docs/use_cases/graph/graph_sparql_qa.ipynb @@ -230,7 +230,7 @@ "\n", "elems = graph.get_schema_elements\n", "# change elems ...\n", - "graph.load_from_schema_elements(elems)\n", + "graph.load_schema(elems)\n", "\n", "bedrock_client = boto3.client('bedrock-runtime')\n", "llm = BedrockChat(\n", From 1f20e437f02da335e258c235f6ef5da3e0156af2 Mon Sep 17 00:00:00 2001 From: mhavey Date: Mon, 5 Feb 2024 20:10:39 -0500 Subject: [PATCH 16/25] PR comments --- .../chains/graph_qa/neptune_sparql.py | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py b/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py index cb17b7f8b67cc..e1f756b90e943 100644 --- a/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py +++ b/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py @@ -22,7 +22,7 @@ INTERMEDIATE_STEPS_KEY = "intermediate_steps" -XSPARQL_GENERATION_SELECT_TEMPLATE = """Task: Generate a SPARQL SELECT statement for querying a graph database. +SPARQL_GENERATION_TEMPLATE = """Task: Generate a SPARQL SELECT statement for querying a graph database. For instance, to find all email addresses of John Doe, the following query in backticks would be suitable: ``` PREFIX foaf: @@ -49,8 +49,8 @@ The question is: {prompt}""" -XSPARQL_GENERATION_SELECT_PROMPT = PromptTemplate( - input_variables=["schema", "prompt"], template=XSPARQL_GENERATION_SELECT_TEMPLATE +SPARQL_GENERATION_PROMPT = PromptTemplate( + input_variables=["schema", "prompt"], template=SPARQL_GENERATION_TEMPLATE ) def extract_sparql(query: str) -> str: @@ -91,7 +91,7 @@ class NeptuneSparqlQAChain(Chain): """ graph: NeptuneRdfGraph = Field(exclude=True) - sparql_generation_select_chain: LLMChain + sparql_generation_chain: LLMChain qa_chain: LLMChain input_key: str = "query" #: :meta private: output_key: str = "result" #: :meta private: @@ -118,23 +118,23 @@ def from_llm( llm: BaseLanguageModel, *, qa_prompt: BasePromptTemplate = SPARQL_QA_PROMPT, - sparql_select_prompt: BasePromptTemplate = XSPARQL_GENERATION_SELECT_PROMPT, + sparql_prompt: BasePromptTemplate = SPARQL_GENERATION_PROMPT, examples: Optional[str] = None, **kwargs: Any, ) -> NeptuneSparqlQAChain: """Initialize from LLM.""" qa_chain = LLMChain(llm=llm, prompt=qa_prompt) - template_to_use = XSPARQL_GENERATION_SELECT_TEMPLATE + template_to_use = SPARQL_GENERATION_TEMPLATE if not(examples is None): template_to_use = template_to_use.replace( "Examples:", "Examples: " + examples) - sparql_select_prompt = PromptTemplate( + sparql_prompt = PromptTemplate( input_variables=["schema", "prompt"], template=template_to_use) - sparql_generation_select_chain = LLMChain(llm=llm, prompt=sparql_select_prompt) + sparql_generation_chain = LLMChain(llm=llm, prompt=sparql_prompt) return cls( qa_chain=qa_chain, - sparql_generation_select_chain=sparql_generation_select_chain, + sparql_generation_chain=sparql_generation_chain, examples=examples, **kwargs, ) @@ -154,7 +154,7 @@ def _call( intermediate_steps: List = [] - generated_sparql = self.sparql_generation_select_chain.run( + generated_sparql = self.sparql_generation_chain.run( {"prompt": prompt, "schema": self.graph.get_schema}, callbacks=callbacks ) From 46b6456262f77aaa0b983ba985d80c7b229bf83e Mon Sep 17 00:00:00 2001 From: mhavey Date: Mon, 5 Feb 2024 20:13:16 -0500 Subject: [PATCH 17/25] PR comments --- .../graphs/neptune_rdf_graph.py | 51 +++++++++---------- 1 file changed, 25 insertions(+), 26 deletions(-) diff --git a/libs/community/langchain_community/graphs/neptune_rdf_graph.py b/libs/community/langchain_community/graphs/neptune_rdf_graph.py index b49434630f974..4eb2e0d9c6f83 100644 --- a/libs/community/langchain_community/graphs/neptune_rdf_graph.py +++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py @@ -89,7 +89,7 @@ class NeptuneRdfGraph: ) schema_elem = graph.get_schema_elements() ... change schema_elements ... - graph.load_from_schema_elements(schema_elem) + graph.load_schema(schema_elem) schema = graph.get_schema() *Security note*: Make sure that the database connection uses credentials @@ -135,14 +135,14 @@ def get_schema(self) -> str: def get_schema_elements(self): return self.schema_elements - """ - Run Neptune query. - """ def query( self, query: str, ): + """ + Run Neptune query. + """ request_data = {"query": query} data = request_data request_hdr = None @@ -177,41 +177,40 @@ def query( json_resp = json.loads(queryres.text) return json_resp - def load_from_schema_elements(self, schema_elements): + def load_schema(self, schema_elements:dict): """ - Create schema from schema_elements. Helpful in cases where - introspected schema needs pruning. + Generates and sets schema from schema_elements. Helpful in + cases where introspected schema needs pruning. """ - elemstr = {} + elem_str = {} for elem in ELEM_TYPES: - reslist = [] - for elemrec in self.schema_elements[elem]: - uri = elemrec["uri"] - local = elemrec["local"] - resstr = f"<{uri}> ({local})" + res_list = [] + for elem_rec in self.schema_elements[elem]: + uri = elem_rec["uri"] + local = elem_rec["local"] + res_str = f"<{uri}> ({local})" if self.hide_comments is False: - resstr = resstr + f", {elemrec['comment']}" - reslist.append(resstr) - elemstr[elem] = ", ".join(reslist) + res_str = res_str + f", {elem_rec['comment']}" + res_list.append(res_str) + elem_str[elem] = ", ".join(res_list) self.schema = ( "In the following, each IRI is followed by the local name and " "optionally its description in parentheses. \n" "The graph supports the following node types:\n" - f"{elemstr['classes']}" + f"{elem_str['classes']}" "The graph supports the following relationships:\n" - f"{elemstr['rels']}" + f"{elem_str['rels']}" "The graph supports the following OWL object properties, " - f"{elemstr['dtprops']}" + f"{elem_str['dtprops']}" "The graph supports the following OWL data properties, " - f"{elemstr['oprops']}" + f"{elem_str['oprops']}" ) - @staticmethod - def _get_local_name(iri: str): + def _get_local_name(self, iri: str): """ - Private method split URI into prefix and local + Split IRI into prefix and local """ if "#" in iri: tokens = iri.split("#") @@ -222,11 +221,11 @@ def _get_local_name(iri: str): else: raise ValueError(f"Unexpected IRI '{iri}', contains neither '#' nor '/'.") - """ - Query Neptune to introspect schema. - """ def _refresh_schema(self) -> None: + """ + Query Neptune to introspect schema. + """ self.schema_elements["distinct_prefixes"] = {} for elem in ELEM_TYPES: From 7fcc1110dccb48653baf9087ccaebea50a1e23e3 Mon Sep 17 00:00:00 2001 From: mhavey Date: Mon, 5 Feb 2024 20:17:57 -0500 Subject: [PATCH 18/25] PR comments --- libs/community/langchain_community/graphs/neptune_rdf_graph.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/community/langchain_community/graphs/neptune_rdf_graph.py b/libs/community/langchain_community/graphs/neptune_rdf_graph.py index 4eb2e0d9c6f83..739c4d1590596 100644 --- a/libs/community/langchain_community/graphs/neptune_rdf_graph.py +++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py @@ -243,4 +243,4 @@ def _refresh_schema(self) -> None: self.schema_elements[elem] = reslist - self.load_from_schema_elements(self.schema_elements) + self.load_schema(self.schema_elements) From 7778f0685ab32c6df5b9ab001ac3f643816d8698 Mon Sep 17 00:00:00 2001 From: mhavey Date: Tue, 6 Feb 2024 05:42:20 -0500 Subject: [PATCH 19/25] lint --- .../langchain_community/graphs/neptune_rdf_graph.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/libs/community/langchain_community/graphs/neptune_rdf_graph.py b/libs/community/langchain_community/graphs/neptune_rdf_graph.py index 739c4d1590596..1df43a93469c0 100644 --- a/libs/community/langchain_community/graphs/neptune_rdf_graph.py +++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py @@ -115,7 +115,7 @@ def __init__( self.use_iam_auth = use_iam_auth self.region_name = region_name self.hide_comments = hide_comments - self.query_endpoint=f"https://{host}:{port}/sparql" + self.query_endpoint = f"https://{host}:{port}/sparql" self.session = boto3.Session() if self.use_iam_auth else None @@ -135,7 +135,6 @@ def get_schema(self) -> str: def get_schema_elements(self): return self.schema_elements - def query( self, query: str, @@ -177,9 +176,9 @@ def query( json_resp = json.loads(queryres.text) return json_resp - def load_schema(self, schema_elements:dict): + def load_schema(self, schema_elements: dict): """ - Generates and sets schema from schema_elements. Helpful in + Generates and sets schema from schema_elements. Helpful in cases where introspected schema needs pruning. """ @@ -221,7 +220,6 @@ def _get_local_name(self, iri: str): else: raise ValueError(f"Unexpected IRI '{iri}', contains neither '#' nor '/'.") - def _refresh_schema(self) -> None: """ Query Neptune to introspect schema. From f5700c98dd5e3393e9c987823a1459c63bfac8d5 Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Tue, 6 Feb 2024 11:58:07 -0800 Subject: [PATCH 20/25] Corrected sample notebook, fixed linting. --- .../use_cases/graph/graph_sparql_qa.ipynb | 456 ++++++++---------- .../use_cases/graph/neptune_sparql_qa.ipynb | 106 ++-- 2 files changed, 234 insertions(+), 328 deletions(-) diff --git a/docs/docs/use_cases/graph/graph_sparql_qa.ipynb b/docs/docs/use_cases/graph/graph_sparql_qa.ipynb index 26a8f2bde5140..b7541f4388ae8 100644 --- a/docs/docs/use_cases/graph/graph_sparql_qa.ipynb +++ b/docs/docs/use_cases/graph/graph_sparql_qa.ipynb @@ -2,352 +2,288 @@ "cells": [ { "cell_type": "markdown", + "id": "c94240f5", "metadata": {}, "source": [ - "# SPARQL Langchain with Amazon Neptune\n", + "# GraphSparqlQAChain\n", "\n", - "This notebook shows use of LLM to query RDF graph in Amazon Neptune. It uses Langchain.\n", - "\n", - "* Create RDFGraph object that connects to Neptune and introspects its schema\n", - "* Create chain using LLM and RDFGraph. LLM used here is Anthropic Claude 2 via Bedrock\n", - "* Ask questions to LLM. LLM consults schema and calls RDFGraph to execute SPARQL query\n", - "\n", - "Requirements:\n", - "- Neptune 1.2.x cluster accessible from this notebook\n", - "- Python 3.9 or higher kernet\n", - "- For Bedrock access, ensure IAM role has\n", - "\n", - "{\n", - " \"Action\": [\n", - " \"bedrock:ListFoundationModels\",\n", - " \"bedrock:InvokeModel\"\n", - " ],\n", - " \"Resource\": \"*\",\n", - " \"Effect\": \"Allow\"\n", - "}\n", - "\n", - "- S3 bucket for staging in same account/region as Neptune" + "Graph databases are an excellent choice for applications based on network-like models. To standardize the syntax and semantics of such graphs, the W3C recommends Semantic Web Technologies, cp. [Semantic Web](https://www.w3.org/standards/semanticweb/). [SPARQL](https://www.w3.org/TR/sparql11-query/) serves as a query language analogously to SQL or Cypher for these graphs. This notebook demonstrates the application of LLMs as a natural language interface to a graph database by generating SPARQL.\\\n", + "Disclaimer: To date, SPARQL query generation via LLMs is still a bit unstable. Be especially careful with UPDATE queries, which alter the graph." ] }, { "cell_type": "markdown", + "id": "dbc0ee68", "metadata": {}, "source": [ - "## Seed W3C organizational data\n", - "W3C org ontology plus some instances. \n", - "\n", - "You will need an S3 bucket in the same region and account. Set STAGE_BUCKET to name of that bucket." + "There are several sources you can run queries against, including files on the web, files you have available locally, SPARQL endpoints, e.g., [Wikidata](https://www.wikidata.org/wiki/Wikidata:Main_Page), and [triple stores](https://www.w3.org/wiki/LargeTripleStores)." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 1, + "id": "62812aad", + "metadata": { + "pycharm": { + "is_executing": true + } + }, "outputs": [], "source": [ - "STAGE_BUCKET=''" + "from langchain.chains import GraphSparqlQAChain\n", + "from langchain_community.graphs import RdfGraph\n", + "from langchain_openai import ChatOpenAI" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 8, + "id": "0928915d", + "metadata": { + "pycharm": { + "is_executing": true + } + }, "outputs": [], "source": [ - "%%bash -s \"$STAGE_BUCKET\"\n", - "\n", - "rm -rf data\n", - "mkdir -p data\n", - "cd data\n", - "echo getting org ontology and sample org instances\n", - "wget http://www.w3.org/ns/org.ttl \n", - "wget https://raw.githubusercontent.com/aws-samples/amazon-neptune-ontology-example-blog/main/data/example_org.ttl \n", - "\n", - "echo Copying org ttl to S3\n", - "aws s3 cp org.ttl s3://$1/org.ttl\n", - "aws s3 cp example_org.ttl s3://$1/example_org.ttl\n" + "graph = RdfGraph(\n", + " source_file=\"http://www.w3.org/People/Berners-Lee/card\",\n", + " standard=\"rdf\",\n", + " local_copy=\"test.ttl\",\n", + ")" ] }, { "cell_type": "markdown", - "metadata": {}, - "source": [ - "Bulk-load the org ttl - both ontology and instances" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%load -s s3://{STAGE_BUCKET} -f turtle --store-to loadres --run" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "id": "7af596b5", + "metadata": { + "collapsed": false + }, "source": [ - "%load_status {loadres['payload']['loadId']} --errors --details" + "Note that providing a `local_file` is necessary for storing changes locally if the source is read-only." ] }, { "cell_type": "markdown", + "id": "58c1a8ea", "metadata": {}, "source": [ - "## Setup Chain" + "## Refresh graph schema information\n", + "If the schema of the database changes, you can refresh the schema information needed to generate SPARQL queries." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "EXAMPLES=\"\"\"\n", - "\n", - "\n", - "Find organizations.\n", - "\n", - "\n", - "\n", - "PREFIX rdf: \n", - "PREFIX rdfs: \n", - "PREFIX org: \n", - "\n", - "select ?org ?orgName where {{\n", - " ?org rdfs:label ?orgName .\n", - "}} \n", - "\n", - "\n", - "\n", - "Find sites of an organization\n", - "\n", - "\n", - "\n", - "PREFIX rdf: \n", - "PREFIX rdfs: \n", - "PREFIX org: \n", - "\n", - "select ?org ?orgName ?siteName where {{\n", - " ?org rdfs:label ?orgName .\n", - " ?org org:hasSite/rdfs:label ?siteName . \n", - "}} \n", - "\n", - "\n", - "\n", - "Find suborganizations of an organization\n", - "\n", - "\n", - "\n", - "PREFIX rdf: \n", - "PREFIX rdfs: \n", - "PREFIX org: \n", - "\n", - "select ?org ?orgName ?subName where {{\n", - " ?org rdfs:label ?orgName .\n", - " ?org org:hasSubOrganization/rdfs:label ?subName .\n", - "}} \n", - "\n", - "\n", - "\n", - "Find organizational units of an organization\n", - "\n", - "\n", - "\n", - "PREFIX rdf: \n", - "PREFIX rdfs: \n", - "PREFIX org: \n", - "\n", - "select ?org ?orgName ?unitName where {{\n", - " ?org rdfs:label ?orgName .\n", - " ?org org:hasUnit/rdfs:label ?unitName . \n", - "}} \n", - "\n", - "\n", - "\n", - "Find members of an organization. Also find their manager, or the member they report to.\n", - "\n", - "\n", - "\n", - "PREFIX org: \n", - "PREFIX foaf: \n", - "\n", - "select * where {{\n", - " ?person rdf:type foaf:Person .\n", - " ?person org:memberOf ?org .\n", - " OPTIONAL {{ ?person foaf:firstName ?firstName . }}\n", - " OPTIONAL {{ ?person foaf:family_name ?lastName . }}\n", - " OPTIONAL {{ ?person org:reportsTo ??manager }} .\n", - "}}\n", - "\n", - "\n", - "\n", - "\n", - "Find change events, such as mergers and acquisitions, of an organization\n", - "\n", - "\n", - "\n", - "PREFIX org: \n", - "\n", - "select ?event ?prop ?obj where {{\n", - " ?org rdfs:label ?orgName .\n", - " ?event rdf:type org:ChangeEvent .\n", - " ?event org:originalOrganization ?origOrg .\n", - " ?event org:resultingOrganization ?resultingOrg .\n", - "}}\n", - "\n", - "\n", - "\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 9, + "id": "4e3de44f", + "metadata": { + "pycharm": { + "is_executing": true + } + }, "outputs": [], "source": [ - "import boto3\n", - "from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n", - "from langchain_community.graphs import NeptuneRdfGraph\n", - "from langchain.chat_models import BedrockChat\n", - "from langchain.llms import Bedrock\n", - "\n", - "nep_host = \"\"\n", - "nep_port = \n", - "nep_region = 'us-east-1' # enter your region\n", - "\n", - "graph = None\n", - "graph = NeptuneRdfGraph(\n", - " host=nep_host,\n", - " port=nep_port,\n", - " use_iam_auth=True,\n", - " region_name=nep_region,\n", - " hide_comments=True\n", - ")\n", - "\n", - "elems = graph.get_schema_elements\n", - "# change elems ...\n", - "graph.load_schema(elems)\n", - "\n", - "bedrock_client = boto3.client('bedrock-runtime')\n", - "llm = BedrockChat(\n", - " model_id = \"anthropic.claude-v2\",\n", - " client = bedrock_client\n", - ")\n", - "\n", - "chain = NeptuneSparqlQAChain.from_llm(\n", - " llm=llm, graph=graph, examples=EXAMPLES, verbose=True, top_K=10, return_intermediate_steps=True, return_direct=False)\n" + "graph.load_schema()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, + "id": "1fe76ccd", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "In the following, each IRI is followed by the local name and optionally its description in parentheses. \n", + "The RDF graph supports the following node types:\n", + " (PersonalProfileDocument, None), (RSAPublicKey, None), (Male, None), (Person, None), (Work, None)\n", + "The RDF graph supports the following relationships:\n", + " (seeAlso, None), (title, None), (mbox_sha1sum, None), (maker, None), (oidcIssuer, None), (publicHomePage, None), (openid, None), (storage, None), (name, None), (country, None), (type, None), (profileHighlightColor, None), (preferencesFile, None), (label, None), (modulus, None), (participant, None), (street2, None), (locality, None), (nick, None), (homepage, None), (license, None), (givenname, None), (street-address, None), (postal-code, None), (street, None), (lat, None), (primaryTopic, None), (fn, None), (location, None), (developer, None), (city, None), (region, None), (member, None), (long, None), (address, None), (family_name, None), (account, None), (workplaceHomepage, None), (title, None), (publicTypeIndex, None), (office, None), (homePage, None), (mbox, None), (preferredURI, None), (profileBackgroundColor, None), (owns, None), (based_near, None), (hasAddress, None), (img, None), (assistant, None), (title, None), (key, None), (inbox, None), (editableProfile, None), (postalCode, None), (weblog, None), (exponent, None), (avatar, None)\n", + "\n" + ] + } + ], "source": [ "graph.get_schema" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "graph.get_schema_elements" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "chain" - ] - }, { "cell_type": "markdown", + "id": "68a3c677", "metadata": {}, "source": [ - "## Ask questions\n", - "Depends on the data we ingested above" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "chain.run('''How many organizations are in the graph''')" + "## Querying the graph\n", + "\n", + "Now, you can use the graph SPARQL QA chain to ask questions about the graph." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, + "execution_count": 11, + "id": "7476ce98", + "metadata": { + "pycharm": { + "is_executing": true + } + }, "outputs": [], "source": [ - "chain.run('''Are there any mergers or acquisitions''')" + "chain = GraphSparqlQAChain.from_llm(\n", + " ChatOpenAI(temperature=0), graph=graph, verbose=True\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 12, + "id": "ef8ee27b", + "metadata": { + "pycharm": { + "is_executing": true + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new GraphSparqlQAChain chain...\u001b[0m\n", + "Identified intent:\n", + "\u001b[32;1m\u001b[1;3mSELECT\u001b[0m\n", + "Generated SPARQL:\n", + "\u001b[32;1m\u001b[1;3mPREFIX foaf: \n", + "SELECT ?homepage\n", + "WHERE {\n", + " ?person foaf:name \"Tim Berners-Lee\" .\n", + " ?person foaf:workplaceHomepage ?homepage .\n", + "}\u001b[0m\n", + "Full Context:\n", + "\u001b[32;1m\u001b[1;3m[]\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "\"Tim Berners-Lee's work homepage is http://www.w3.org/People/Berners-Lee/.\"" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "chain.run('''Find organizations''')" + "chain.run(\"What is Tim Berners-Lee's work homepage?\")" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", + "id": "af4b3294", "metadata": {}, - "outputs": [], "source": [ - "chain.run('''Find sites of MegaSystems or MegaFinancial''')" + "## Updating the graph\n", + "\n", + "Analogously, you can update the graph, i.e., insert triples, using natural language." ] }, { "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], + "execution_count": 14, + "id": "fdf38841", + "metadata": { + "pycharm": { + "is_executing": true + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new GraphSparqlQAChain chain...\u001b[0m\n", + "Identified intent:\n", + "\u001b[32;1m\u001b[1;3mUPDATE\u001b[0m\n", + "Generated SPARQL:\n", + "\u001b[32;1m\u001b[1;3mPREFIX foaf: \n", + "INSERT {\n", + " ?person foaf:workplaceHomepage .\n", + "}\n", + "WHERE {\n", + " ?person foaf:name \"Timothy Berners-Lee\" .\n", + "}\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "data": { + "text/plain": [ + "'Successfully inserted triples into the graph.'" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "%%time\n", - "chain.run('''Find a member who is manager of one or more members.''')" + "chain.run(\n", + " \"Save that the person with the name 'Timothy Berners-Lee' has a work homepage at 'http://www.w3.org/foo/bar/'\"\n", + ")" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "markdown", + "id": "5e0f7fc1", "metadata": {}, - "outputs": [], "source": [ - "chain.run('''Find five members and who their manager is.''')" + "Let's verify the results:" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, + "id": "f874171b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "[(rdflib.term.URIRef('https://www.w3.org/'),),\n", + " (rdflib.term.URIRef('http://www.w3.org/foo/bar/'),)]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "%%time\n", - "chain.run('''Find org units or suborganizations of The Mega Group. What are the sites of those units?''')" + "query = (\n", + " \"\"\"PREFIX foaf: \\n\"\"\"\n", + " \"\"\"SELECT ?hp\\n\"\"\"\n", + " \"\"\"WHERE {\\n\"\"\"\n", + " \"\"\" ?person foaf:name \"Timothy Berners-Lee\" . \\n\"\"\"\n", + " \"\"\" ?person foaf:workplaceHomepage ?hp .\\n\"\"\"\n", + " \"\"\"}\"\"\"\n", + ")\n", + "graph.query(query)" ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "lc", "language": "python", - "name": "python3" + "name": "lc" }, "language_info": { "codemirror_mode": { @@ -359,9 +295,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.8" + "version": "3.11.4" } }, "nbformat": 4, - "nbformat_minor": 2 -} + "nbformat_minor": 5 +} \ No newline at end of file diff --git a/docs/docs/use_cases/graph/neptune_sparql_qa.ipynb b/docs/docs/use_cases/graph/neptune_sparql_qa.ipynb index 1156bd7cdeaa0..4c464e840d522 100644 --- a/docs/docs/use_cases/graph/neptune_sparql_qa.ipynb +++ b/docs/docs/use_cases/graph/neptune_sparql_qa.ipynb @@ -4,19 +4,16 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# SPARQL Langchain with Amazon Neptune\n", + "# Neptune SPARQL QA Chain\n", "\n", - "This notebook shows use of LLM to query RDF graph in Amazon Neptune. It uses Langchain.\n", + "This notebook shows use of LLM to query RDF graph in Amazon Neptune. This code uses a `NeptuneRdfGraph` class that connects with the Neptune database and loads it's schema. The `NeptuneSparqlQAChain` is used to connect the graph and LLM to ask natural language questions.\n", "\n", - "* Create RDFGraph object that connects to Neptune and introspects its schema\n", - "* Create chain using LLM and RDFGraph. LLM used here is Anthropic Claude 2 via Bedrock\n", - "* Ask questions to LLM. LLM consults schema and calls RDFGraph to execute SPARQL query\n", - "\n", - "Requirements:\n", + "Requirements for running this notebook:\n", "- Neptune 1.2.x cluster accessible from this notebook\n", - "- Python 3.9 or higher kernet\n", - "- For Bedrock access, ensure IAM role has\n", + "- Kernel with Python 3.9 or higher\n", + "- For Bedrock access, ensure IAM role has this policy\n", "\n", + "```json\n", "{\n", " \"Action\": [\n", " \"bedrock:ListFoundationModels\",\n", @@ -25,8 +22,9 @@ " \"Resource\": \"*\",\n", " \"Effect\": \"Allow\"\n", "}\n", + "```\n", "\n", - "- S3 bucket for staging in same account/region as Neptune" + "- S3 bucket for staging sample data, bucket should be in same account/region as Neptune." ] }, { @@ -45,7 +43,7 @@ "metadata": {}, "outputs": [], "source": [ - "STAGE_BUCKET=''" + "STAGE_BUCKET = \"\"" ] }, { @@ -106,7 +104,7 @@ "metadata": {}, "outputs": [], "source": [ - "EXAMPLES=\"\"\"\n", + "EXAMPLES = \"\"\"\n", "\n", "\n", "Find organizations.\n", @@ -211,62 +209,34 @@ "source": [ "import boto3\n", "from langchain.chains.graph_qa.neptune_sparql import NeptuneSparqlQAChain\n", - "from langchain_community.graphs import NeptuneRdfGraph\n", "from langchain.chat_models import BedrockChat\n", - "from langchain.llms import Bedrock\n", + "from langchain_community.graphs import NeptuneRdfGraph\n", "\n", - "nep_host = \"\"\n", - "nep_port = \"\"\n", - "nep_region = 'us-east-1' # enter your region\n", - "nep_ep = f\"https://{nep_host}:{nep_port}/sparql\"\n", + "host = \"\"\n", + "port = \"\"\n", + "region = \"us-east-1\" # specify region\n", "\n", - "graph = None\n", "graph = NeptuneRdfGraph(\n", - " query_endpoint=nep_ep,\n", - " use_iam_auth=True,\n", - " region_name=nep_region,\n", - " hide_comments=True\n", + " host=host, port=port, use_iam_auth=True, region_name=region, hide_comments=True\n", ")\n", "\n", - "elems = graph.get_schema_elements\n", - "# change elems ...\n", - "graph.load_from_schema_elements(elems)\n", + "schema_elements = graph.get_schema_elements\n", + "# Optionally, you can update the schema_elements, and\n", + "# load the schema from the pruned elements.\n", + "graph.load_from_schema_elements(schema_elements)\n", "\n", - "bedrock_client = boto3.client('bedrock-runtime')\n", - "llm = BedrockChat(\n", - " model_id = \"anthropic.claude-v2\",\n", - " client = bedrock_client\n", - ")\n", + "bedrock_client = boto3.client(\"bedrock-runtime\")\n", + "llm = BedrockChat(model_id=\"anthropic.claude-v2\", client=bedrock_client)\n", "\n", "chain = NeptuneSparqlQAChain.from_llm(\n", - " llm=llm, graph=graph, examples=EXAMPLES, verbose=True, top_K=10, return_intermediate_steps=True, return_direct=False)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "graph.get_schema" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "graph.get_schema_elements" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "chain" + " llm=llm,\n", + " graph=graph,\n", + " examples=EXAMPLES,\n", + " verbose=True,\n", + " top_K=10,\n", + " return_intermediate_steps=True,\n", + " return_direct=False,\n", + ")" ] }, { @@ -283,7 +253,7 @@ "metadata": {}, "outputs": [], "source": [ - "chain.invoke('''How many organizations are in the graph''')" + "chain.invoke(\"\"\"How many organizations are in the graph\"\"\")" ] }, { @@ -292,7 +262,7 @@ "metadata": {}, "outputs": [], "source": [ - "chain.invoke('''Are there any mergers or acquisitions''')" + "chain.invoke(\"\"\"Are there any mergers or acquisitions\"\"\")" ] }, { @@ -301,7 +271,7 @@ "metadata": {}, "outputs": [], "source": [ - "chain.invoke('''Find organizations''')" + "chain.invoke(\"\"\"Find organizations\"\"\")" ] }, { @@ -310,7 +280,7 @@ "metadata": {}, "outputs": [], "source": [ - "chain.invoke('''Find sites of MegaSystems or MegaFinancial''')" + "chain.invoke(\"\"\"Find sites of MegaSystems or MegaFinancial\"\"\")" ] }, { @@ -319,8 +289,7 @@ "metadata": {}, "outputs": [], "source": [ - "%%time\n", - "chain.invoke('''Find a member who is manager of one or more members.''')" + "chain.invoke(\"\"\"Find a member who is manager of one or more members.\"\"\")" ] }, { @@ -329,7 +298,7 @@ "metadata": {}, "outputs": [], "source": [ - "chain.invoke('''Find five members and who their manager is.''')" + "chain.invoke(\"\"\"Find five members and who their manager is.\"\"\")" ] }, { @@ -338,8 +307,9 @@ "metadata": {}, "outputs": [], "source": [ - "%%time\n", - "chain.invoke('''Find org units or suborganizations of The Mega Group. What are the sites of those units?''')" + "chain.invoke(\n", + " \"\"\"Find org units or suborganizations of The Mega Group. What are the sites of those units?\"\"\"\n", + ")" ] } ], From a4c2727a7f2c7b8c10aaacbd0f15639eb4ad354a Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Tue, 6 Feb 2024 12:11:41 -0800 Subject: [PATCH 21/25] Fixed linting. --- .../langchain_community/graphs/neptune_rdf_graph.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/libs/community/langchain_community/graphs/neptune_rdf_graph.py b/libs/community/langchain_community/graphs/neptune_rdf_graph.py index 1df43a93469c0..ed8f0cc65e815 100644 --- a/libs/community/langchain_community/graphs/neptune_rdf_graph.py +++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py @@ -1,6 +1,6 @@ import json from types import SimpleNamespace -from typing import Optional +from typing import Any, Dict, Optional, Sequence import boto3 import requests @@ -121,7 +121,7 @@ def __init__( # Set schema self.schema = "" - self.schema_elements = {} + self.schema_elements: Dict[str, Any] = {} self._refresh_schema() @property @@ -132,13 +132,13 @@ def get_schema(self) -> str: return self.schema @property - def get_schema_elements(self): + def get_schema_elements(self) -> Dict[str, Any]: return self.schema_elements def query( self, query: str, - ): + ) -> Dict[str, Any]: """ Run Neptune query. """ @@ -176,7 +176,7 @@ def query( json_resp = json.loads(queryres.text) return json_resp - def load_schema(self, schema_elements: dict): + def load_schema(self, schema_elements: Dict[str, Any]) -> None: """ Generates and sets schema from schema_elements. Helpful in cases where introspected schema needs pruning. @@ -207,7 +207,7 @@ def load_schema(self, schema_elements: dict): f"{elem_str['oprops']}" ) - def _get_local_name(self, iri: str): + def _get_local_name(self, iri: str) -> Sequence[str]: """ Split IRI into prefix and local """ From c81df71e79b11790d384fd78e45866e51b4ee017 Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Tue, 6 Feb 2024 12:17:48 -0800 Subject: [PATCH 22/25] Fixed linting errors. --- .../chains/graph_qa/neptune_sparql.py | 35 ++++++++++--------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py b/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py index e1f756b90e943..08a1cc249beed 100644 --- a/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py +++ b/libs/langchain/langchain/chains/graph_qa/neptune_sparql.py @@ -5,25 +5,23 @@ from typing import Any, Dict, List, Optional +from langchain_community.graphs import NeptuneRdfGraph from langchain_core.language_models import BaseLanguageModel from langchain_core.prompts.base import BasePromptTemplate +from langchain_core.prompts.prompt import PromptTemplate from langchain_core.pydantic_v1 import Field from langchain.callbacks.manager import CallbackManagerForChainRun from langchain.chains.base import Chain -from langchain.chains.graph_qa.prompts import ( - SPARQL_GENERATION_SELECT_PROMPT, - SPARQL_QA_PROMPT, -) +from langchain.chains.graph_qa.prompts import SPARQL_QA_PROMPT from langchain.chains.llm import LLMChain -from langchain_community.graphs import NeptuneRdfGraph - -from langchain_core.prompts.prompt import PromptTemplate INTERMEDIATE_STEPS_KEY = "intermediate_steps" -SPARQL_GENERATION_TEMPLATE = """Task: Generate a SPARQL SELECT statement for querying a graph database. -For instance, to find all email addresses of John Doe, the following query in backticks would be suitable: +SPARQL_GENERATION_TEMPLATE = """ +Task: Generate a SPARQL SELECT statement for querying a graph database. +For instance, to find all email addresses of John Doe, the following +query in backticks would be suitable: ``` PREFIX foaf: SELECT ?email @@ -43,7 +41,8 @@ {schema} Note: Be as concise as possible. Do not include any explanations or apologies in your responses. -Do not respond to any questions that ask for anything else than for you to construct a SPARQL query. +Do not respond to any questions that ask for anything else than +for you to construct a SPARQL query. Do not include any text except the SPARQL query generated. The question is: @@ -53,18 +52,20 @@ input_variables=["schema", "prompt"], template=SPARQL_GENERATION_TEMPLATE ) + def extract_sparql(query: str) -> str: query = query.strip() querytoks = query.split("```") if len(querytoks) == 3: query = querytoks[1] - + if query.startswith("sparql"): query = query[6:] elif query.startswith("") and query.endswith(""): - query= query[8:-9] + query = query[8:-9] return query + class NeptuneSparqlQAChain(Chain): """Chain for question-answering against a Neptune graph by generating SPARQL statements. @@ -125,11 +126,13 @@ def from_llm( """Initialize from LLM.""" qa_chain = LLMChain(llm=llm, prompt=qa_prompt) template_to_use = SPARQL_GENERATION_TEMPLATE - if not(examples is None): + if examples: template_to_use = template_to_use.replace( - "Examples:", "Examples: " + examples) + "Examples:", "Examples: " + examples + ) sparql_prompt = PromptTemplate( - input_variables=["schema", "prompt"], template=template_to_use) + input_variables=["schema", "prompt"], template=template_to_use + ) sparql_generation_chain = LLMChain(llm=llm, prompt=sparql_prompt) return cls( @@ -165,7 +168,7 @@ def _call( _run_manager.on_text( generated_sparql, color="green", end="\n", verbose=self.verbose ) - + intermediate_steps.append({"query": generated_sparql}) context = self.graph.query(generated_sparql) From 4c9e985c092b8139158745c7ac5b9d381a111d36 Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Tue, 6 Feb 2024 12:31:20 -0800 Subject: [PATCH 23/25] Fixed imports for unit tests. --- .../graphs/neptune_rdf_graph.py | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/libs/community/langchain_community/graphs/neptune_rdf_graph.py b/libs/community/langchain_community/graphs/neptune_rdf_graph.py index ed8f0cc65e815..b9e0074a366ee 100644 --- a/libs/community/langchain_community/graphs/neptune_rdf_graph.py +++ b/libs/community/langchain_community/graphs/neptune_rdf_graph.py @@ -2,10 +2,7 @@ from types import SimpleNamespace from typing import Any, Dict, Optional, Sequence -import boto3 import requests -from botocore.auth import SigV4Auth -from botocore.awsrequest import AWSRequest CLASS_QUERY = """ SELECT DISTINCT ?elem ?com @@ -117,7 +114,18 @@ def __init__( self.hide_comments = hide_comments self.query_endpoint = f"https://{host}:{port}/sparql" - self.session = boto3.Session() if self.use_iam_auth else None + if self.use_iam_auth: + try: + import boto3 + + self.session = boto3.Session() + except ImportError: + raise ImportError( + "Could not import boto3 python package. " + "Please install it with `pip install boto3`." + ) + else: + self.session = None # Set schema self.schema = "" @@ -160,9 +168,13 @@ def query( token=session_token, region=self.region_name, ) + from botocore.awsrequest import AWSRequest + request = AWSRequest( method="POST", url=self.query_endpoint, data=data, params=params ) + from botocore.auth import SigV4Auth + SigV4Auth(creds, service, self.region_name).add_auth(request) request.headers["Content-Type"] = "application/x-www-form-urlencoded" request_hdr = request.headers From 71c44d9e1a78957f08a6bf0e0b9e3ed655d3ffe5 Mon Sep 17 00:00:00 2001 From: Piyush Jain Date: Tue, 6 Feb 2024 12:37:52 -0800 Subject: [PATCH 24/25] Fixed import test. --- libs/langchain/tests/unit_tests/chains/test_imports.py | 1 + 1 file changed, 1 insertion(+) diff --git a/libs/langchain/tests/unit_tests/chains/test_imports.py b/libs/langchain/tests/unit_tests/chains/test_imports.py index 8317dd62ea983..cf76a851b8c79 100644 --- a/libs/langchain/tests/unit_tests/chains/test_imports.py +++ b/libs/langchain/tests/unit_tests/chains/test_imports.py @@ -33,6 +33,7 @@ "NatBotChain", "NebulaGraphQAChain", "NeptuneOpenCypherQAChain", + "NeptuneSparqlQAChain", "OpenAIModerationChain", "OpenAPIEndpointChain", "QAGenerationChain", From d0b68e09073f13fec2e606df28e4cbbb9ec4ea4e Mon Sep 17 00:00:00 2001 From: Bagatur Date: Mon, 12 Feb 2024 21:17:35 -0800 Subject: [PATCH 25/25] fmt --- libs/langchain/langchain/graphs/__init__.py | 1 - libs/langchain/langchain/graphs/neptune_rdf_graph.py | 3 --- libs/langchain/tests/unit_tests/graphs/test_imports.py | 1 - 3 files changed, 5 deletions(-) delete mode 100644 libs/langchain/langchain/graphs/neptune_rdf_graph.py diff --git a/libs/langchain/langchain/graphs/__init__.py b/libs/langchain/langchain/graphs/__init__.py index 327038082e74e..3189534975ee9 100644 --- a/libs/langchain/langchain/graphs/__init__.py +++ b/libs/langchain/langchain/graphs/__init__.py @@ -33,7 +33,6 @@ def __getattr__(name: str) -> Any: "KuzuGraph", "HugeGraph", "RdfGraph", - "NeptuneRdfGraph", "ArangoGraph", "FalkorDBGraph", ] diff --git a/libs/langchain/langchain/graphs/neptune_rdf_graph.py b/libs/langchain/langchain/graphs/neptune_rdf_graph.py deleted file mode 100644 index 50e62577cfb65..0000000000000 --- a/libs/langchain/langchain/graphs/neptune_rdf_graph.py +++ /dev/null @@ -1,3 +0,0 @@ -from langchain_community.graphs.neptune_rdf_graph import NeptuneRdfGraph - -__all__ = ["NeptuneRdfGraph"] diff --git a/libs/langchain/tests/unit_tests/graphs/test_imports.py b/libs/langchain/tests/unit_tests/graphs/test_imports.py index 11393b1d25554..5287c42285533 100644 --- a/libs/langchain/tests/unit_tests/graphs/test_imports.py +++ b/libs/langchain/tests/unit_tests/graphs/test_imports.py @@ -7,7 +7,6 @@ "Neo4jGraph", "NebulaGraph", "NeptuneGraph", - "NeptuneRdfGraph", "KuzuGraph", "HugeGraph", "RdfGraph",