Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add graph-rag notebook #22

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,353 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "273a70e6-344f-4cac-8c95-b89f5976680b",
"metadata": {},
"outputs": [],
"source": [
"import oracledb\n",
"\n",
"connection = oracledb.connect(\n",
" user=\"<your_user>\", \n",
" password='<your_password>',\n",
" dsn=\"<your_connection_string>\"\n",
")\n",
"\n",
"cursor = connection.cursor()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "76a0b0c4-a248-48e5-958a-1b4bac7d33ed",
"metadata": {},
"outputs": [],
"source": [
"# create tables\n",
"\n",
"create_movie_table = \"\"\"\n",
" CREATE TABLE MOVIES (\n",
" MOVIE_ID NUMBER,\n",
" TITLE VARCHAR2(400),\n",
" GENRES JSON,\n",
" SUMMARY VARCHAR2(16000)\n",
" )\n",
"\"\"\"\n",
"\n",
"create_customer_table = \"\"\"\n",
" CREATE TABLE MOVIES_CUSTOMER (\n",
" CUST_ID NUMBER,\n",
" FIRSTNAME VARCHAR(200),\n",
" LASTNAME VARCHAR(200)\n",
" )\n",
"\"\"\"\n",
"\n",
"create_watched_table = \"\"\"\n",
" CREATE TABLE WATCHED_MOVIE (\n",
" DAY_ID TIMESTAMP(6), \n",
" MOVIE_ID NUMBER,\n",
" PROMO_CUST_ID NUMBER\n",
" )\n",
"\"\"\"\n",
"\n",
"cursor.execute(create_movie_table)\n",
"cursor.execute(create_customer_table)\n",
"cursor.execute(create_watched_table)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "6a50b3bd-5d73-4b78-9757-15c10e5d3e12",
"metadata": {},
"outputs": [],
"source": [
"cursor.execute(\"\"\"\n",
" INSERT INTO MOVIES (MOVIE_ID, TITLE, GENRES, SUMMARY) VALUES\n",
" (1, 'Inception', '{\"Action\": \"Sci-Fi\"}', 'A thief who steals corporate secrets through the use of dream-sharing technology is given the inverse task of planting an idea into the mind of a C.E.O.'),\n",
" (2, 'The Matrix', '{\"Action\": \"Sci-Fi\"}', 'A computer hacker learns from mysterious rebels about the true nature of his reality and his role in the war against its controllers.'),\n",
" (3, 'The Godfather', '{\"Drama\": \"Crime\"}', 'The aging patriarch of an organized crime dynasty transfers control of his clandestine empire to his reluctant son.'),\n",
" (4, 'Titanic', '{\"Romance\": \"Drama\"}', 'A seventeen-year-old aristocrat falls in love with a kind but poor artist aboard the luxurious, ill-fated R.M.S. Titanic.'),\n",
" (5, 'Toy Story', '{\"Animation\": \"Adventure\"}', 'A cowboy doll is profoundly threatened and jealous when a new spaceman figure supplants him as top toy in a boy''s room.');\n",
"\"\"\")\n",
"\n",
"cursor.execute(\"\"\"\n",
" INSERT INTO MOVIES_CUSTOMER (CUST_ID, FIRSTNAME, LASTNAME) VALUES\n",
" (101, 'John', 'Doe'),\n",
" (102, 'Jane', 'Smith'),\n",
" (103, 'Sam', 'Wilson'),\n",
" (104, 'Emily', 'Clark'),\n",
" (105, 'Michael', 'Johnson')\n",
"\"\"\")\n",
"\n",
"cursor.execute(\"\"\"\n",
" INSERT INTO WATCHED_MOVIE (DAY_ID, MOVIE_ID, PROMO_CUST_ID) VALUES\n",
" (TO_TIMESTAMP('2024-10-30 12:34:56.123456', 'YYYY-MM-DD HH24:MI:SS.FF'), 1, 101),\n",
" (TO_TIMESTAMP('2024-10-31 12:34:56.123456', 'YYYY-MM-DD HH24:MI:SS.FF'), 2, 101),\n",
" (TO_TIMESTAMP('2024-09-30 12:34:56.123456', 'YYYY-MM-DD HH24:MI:SS.FF'), 3, 101),\n",
" (TO_TIMESTAMP('2024-10-31 09:15:23.654321', 'YYYY-MM-DD HH24:MI:SS.FF'), 2, 102),\n",
" (TO_TIMESTAMP('2024-11-01 16:45:12.987654', 'YYYY-MM-DD HH24:MI:SS.FF'), 3, 103),\n",
" (TO_TIMESTAMP('2024-11-02 18:22:43.123456', 'YYYY-MM-DD HH24:MI:SS.FF'), 4, 104),\n",
" (TO_TIMESTAMP('2024-11-03 20:01:00.000000', 'YYYY-MM-DD HH24:MI:SS.FF'), 5, 105)\n",
"\"\"\")"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "9535cdfe-d375-4fb7-a592-2ccd08b76307",
"metadata": {},
"outputs": [],
"source": [
"cpg = \"\"\"\n",
" CREATE OR REPLACE PROPERTY GRAPH CUSTOMER_WATCHED_MOVIES\n",
" VERTEX TABLES (\n",
" MOVIES_CUSTOMER AS CUSTOMER\n",
" KEY(CUST_ID),\n",
" MOVIES AS MOVIE\n",
" KEY(MOVIE_ID)\n",
" )\n",
" EDGE TABLES(\n",
" WATCHED_MOVIE AS WATCHED\n",
" KEY(DAY_ID, MOVIE_ID, PROMO_CUST_ID)\n",
" SOURCE KEY (PROMO_CUST_ID) REFERENCES CUSTOMER(CUST_ID)\n",
" DESTINATION KEY (MOVIE_ID) REFERENCES MOVIE(MOVIE_ID)\n",
" )\n",
"\"\"\"\n",
"cursor.execute(cpg)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "c5036544-787c-46cf-b9a5-5aaef8958748",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"('John', 'Doe', 'Inception', datetime.datetime(2024, 10, 30, 12, 34, 56, 123456))\n",
"('John', 'Doe', 'The Matrix', datetime.datetime(2024, 10, 31, 12, 34, 56, 123456))\n",
"('John', 'Doe', 'The Godfather', datetime.datetime(2024, 9, 30, 12, 34, 56, 123456))\n",
"('Jane', 'Smith', 'The Matrix', datetime.datetime(2024, 10, 31, 9, 15, 23, 654321))\n",
"('Sam', 'Wilson', 'The Godfather', datetime.datetime(2024, 11, 1, 16, 45, 12, 987654))\n",
"('Emily', 'Clark', 'Titanic', datetime.datetime(2024, 11, 2, 18, 22, 43, 123456))\n",
"('Michael', 'Johnson', 'Toy Story', datetime.datetime(2024, 11, 3, 20, 1))\n"
]
}
],
"source": [
"sample_graph_query = \"\"\"\n",
" SELECT * FROM GRAPH_TABLE(CUSTOMER_WATCHED_MOVIES\n",
" MATCH (c IS CUSTOMER) -[w IS WATCHED]-> (m IS MOVIE)\n",
" COLUMNS(c.FIRSTNAME AS FIRSTNAME, c.LASTNAME AS LASTNAME, m.TITLE AS MOVIE_TITLE, w.DAY_ID as DAY_WATCHED)\n",
" )\n",
"\"\"\"\n",
"for row in cursor.execute(sample_graph_query):\n",
" print(row)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "153c6cf6-a986-45f4-ad2b-6ddd7a54c092",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING: The step will fail if the API key is not present or is incorrect.\n",
"Please update the OpenAI_API_key before calling the llm the next step.\n",
"The LLM model you will use is OpenAI ChatGPT 3.5\n"
]
}
],
"source": [
"from langchain_openai import ChatOpenAI\n",
"\n",
"\n",
"# Choice 2, Set up OpenAI LLM\n",
"print (\"WARNING: The step will fail if the API key is not present or is incorrect.\")\n",
"print (\"Please update the OpenAI_API_key before calling the llm the next step.\")\n",
"\n",
"# set the LLM to get response\n",
"llm = ChatOpenAI(\n",
" model_name='gpt-3.5-turbo-16k',\n",
" temperature = 0.1,\n",
" openai_api_key=\"<your open AI API key>\",\n",
" max_tokens=2000\n",
")\n",
"print(\"The LLM model you will use is OpenAI ChatGPT 3.5\")"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "f478ae55-9af3-40c8-a763-9ec8aacb2a18",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The genre of Inception based on this summary is science fiction, specifically a heist thriller.\n",
"The genre of The Godfather based on this summary is crime/drama.\n",
"The genre of Toy Story based on this summary is animated comedy.\n",
"The genre of The Matrix based on this summary is science fiction.\n",
"The genre of Titanic based on this summary is romance.\n"
]
}
],
"source": [
"from langchain_core.prompts import PromptTemplate\n",
"\n",
"# Create a prompt template\n",
"template = \"What is the genre of {movie} based on this summary: {summary}?\"\n",
"prompt = PromptTemplate.from_template(template)\n",
"\n",
"# Create a chain with the prompt and LLM\n",
"chain = prompt | llm\n",
"\n",
"# Define your SQL query\n",
"sql = \"\"\"\n",
" SELECT DISTINCT MOVIE_TITLE, MOVIE_SUMMARY\n",
" FROM GRAPH_TABLE( CUSTOMER_WATCHED_MOVIES\n",
" MATCH (c1 IS CUSTOMER)-[e1 IS WATCHED]->(m IS MOVIE)\n",
" COLUMNS (m.title as MOVIE_TITLE, m.summary as MOVIE_SUMMARY) \n",
" )\n",
"\"\"\"\n",
"\n",
"# Execute the SQL query\n",
"cursor.execute(sql)\n",
"\n",
"# Fetch all rows from the executed query\n",
"rows = cursor.fetchall()\n",
"\n",
"# Print the results\n",
"for row in rows:\n",
" # Run the chain and print the output\n",
" result = chain.invoke({'movie': row[0], 'summary': row[1]})\n",
" print(result.content)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "465ef6d1-77a4-46e8-8485-91ccf45214ae",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Based on the dataset provided, the movie watching preferences of this user can be described as follows:\n",
"\n",
"1. Genre Preference: The user seems to have a preference for movies that fall under the genres of science fiction, action, and crime. This is evident from their choice of movies like \"Inception,\" \"The Matrix,\" and \"The Godfather,\" which all belong to these genres.\n",
"\n",
"2. Complex and Intriguing Plots: The user appears to enjoy movies with complex and thought-provoking storylines. Both \"Inception\" and \"The Matrix\" involve intricate concepts and explore the nature of reality, while \"The Godfather\" delves into the complexities of organized crime and family dynamics.\n",
"\n",
"3. Character-Driven Narratives: The user seems to appreciate movies that focus on well-developed characters and their personal journeys. \"The Godfather\" specifically revolves around the character development of the reluctant son as he takes over the family business.\n",
"\n",
"4. Blend of Action and Drama: The user's movie choices indicate a preference for films that combine elements of action and drama. \"Inception\" and \"The Matrix\" are known for their intense action sequences, while \"The Godfather\" balances crime-related drama with moments of tension and conflict.\n",
"\n",
"Overall, this user appears to enjoy intellectually stimulating movies with intricate plots, strong character development, and a mix of action and drama.\n"
]
}
],
"source": [
"# Create a prompt template\n",
"template = \"Based on this dataset of movies a customer has watched, containing movie titles, genres and summaries, how would you describle the movie watching preferences of this user? {data}\"\n",
"prompt = PromptTemplate.from_template(template)\n",
"\n",
"chain = prompt | llm\n",
"\n",
"# Define your SQL query\n",
"sql = \"\"\"\n",
" SELECT DISTINCT MOVIE_TITLE, MOVIE_SUMMARY\n",
" FROM GRAPH_TABLE( CUSTOMER_WATCHED_MOVIES\n",
" MATCH (c1 IS CUSTOMER)-[e1 IS WATCHED]->(m IS MOVIE)\n",
" WHERE c1.CUST_ID = 101\n",
" COLUMNS (m.title as MOVIE_TITLE, m.summary as MOVIE_SUMMARY) \n",
" )\n",
"\"\"\"\n",
"\n",
"# Execute the SQL query\n",
"cursor.execute(sql)\n",
"\n",
"# Fetch all rows from the executed query\n",
"rows = cursor.fetchall()\n",
"# print(rows)\n",
"\n",
"result = chain.invoke({'data': rows})\n",
"print(result.content)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "971a1413-5d6e-4fe1-a014-088138d268e9",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Based on the given dataset, customer 101 has watched the most movies.\n"
]
}
],
"source": [
"# Create a prompt template\n",
"template = \"Based on this dataset of movies that customers have watched, containing customer IDs, movie IDs and date watched, Which customer has watched the most movies? {data}\"\n",
"prompt = PromptTemplate.from_template(template)\n",
"\n",
"chain = prompt | llm\n",
"\n",
"# Define your SQL query\n",
"sql = \"\"\"\n",
" SELECT *\n",
" FROM GRAPH_TABLE( CUSTOMER_WATCHED_MOVIES\n",
" MATCH (c1 IS CUSTOMER)-[e1 IS WATCHED]->(m IS MOVIE)\n",
" WHERE c1.CUST_ID = 101\n",
" COLUMNS (c1.CUST_ID as customer_id, m.MOVIE_ID as movie_id, e1.DAY_ID as date_watched) \n",
" )\n",
"\"\"\"\n",
"\n",
"# Execute the SQL query\n",
"cursor.execute(sql)\n",
"\n",
"# Fetch all rows from the executed query\n",
"rows = cursor.fetchall()\n",
"# print(rows)\n",
"\n",
"result = chain.invoke({'data': rows})\n",
"print(result.content)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Loading