Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
2607236
feat: expnad preprocessing to a multi-step workflow.
heliamoh Sep 27, 2025
8b35029
feat: Add new runnables for checking question safety and scope, query…
heliamoh Sep 27, 2025
8b2578f
feat:improved hybrid retrieval
heliamoh Sep 27, 2025
b2cc4bb
feat: Add new runnables for checking question safety and scope, query…
heliamoh Sep 27, 2025
3b9e95d
code quality check fixes
heliamoh Sep 28, 2025
2864e97
fix: Resolve mypy linter errors
heliamoh Sep 28, 2025
f35f3e0
remove: Remove reactome_kg directory from repository
heliamoh Sep 28, 2025
ba01931
code quality fixes
heliamoh Sep 28, 2025
7f8d4c5
feat: expnad preprocessing to a multi-step workflow.
heliamoh Sep 28, 2025
67fcd60
feat: expnad preprocessing to a multi-step workflow.
heliamoh Sep 28, 2025
3ea2ba8
feat:improved hybrid retrieval
heliamoh Sep 28, 2025
5b82199
feat:improved answer generation, in-line citation handling and halluc…
heliamoh Sep 28, 2025
27e761c
remove irrelevant docs
heliamoh Sep 28, 2025
0f67db3
[WIP] clean up changes
GFJHogue Jan 19, 2026
0f24963
[WIP] clean up changes (2)
GFJHogue Jan 19, 2026
5ca38cf
revert retrieval changes
GFJHogue Jan 19, 2026
02e7301
spacing, macos-intel actions runner
GFJHogue Jan 20, 2026
337be49
fix SafetyCheck type usage
GFJHogue Feb 6, 2026
4980410
stream unsafe response to user
GFJHogue Feb 6, 2026
b202c3c
black spacing
GFJHogue Feb 6, 2026
9b01a48
cross-db use detected_language from base state
GFJHogue Feb 6, 2026
ebe6180
pre-release docker push
GFJHogue Feb 6, 2026
9232748
new HybridRetriever class (#102)
GFJHogue Feb 8, 2026
93644d5
fix typing for #102
GFJHogue Feb 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ on:
push:
branches:
- main
- pre-release

permissions:
id-token: write
Expand All @@ -35,7 +36,7 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ubuntu-latest, macos-13]
os: [ubuntu-latest, macos-15-intel]

steps:
- uses: actions/checkout@v4
Expand Down Expand Up @@ -81,7 +82,7 @@ jobs:
path: /tmp/image.tar

docker-push:
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/main' }}
if: ${{ github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'ref/heads/pre-release') }}
needs: docker-build
runs-on: ubuntu-latest

Expand Down
34 changes: 30 additions & 4 deletions src/agent/profiles/base.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
from typing import Annotated, TypedDict
from typing import Annotated, Literal, TypedDict

from langchain_core.embeddings import Embeddings
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.messages import BaseMessage
from langchain_core.runnables import Runnable, RunnableConfig
from langgraph.graph.message import add_messages

from agent.tasks.detect_language import create_language_detector
from agent.tasks.rephrase import create_rephrase_chain
from agent.tasks.safety_checker import SafetyCheck, create_safety_checker
from tools.external_search.state import SearchState, WebSearchResult
from tools.external_search.workflow import create_search_workflow

Expand All @@ -28,6 +30,11 @@ class BaseState(InputState, OutputState, total=False):
rephrased_input: str # LLM-generated query from user input
chat_history: Annotated[list[BaseMessage], add_messages]

# Preprocessing results
safety: str # "true" or "false" from safety check
reason_unsafe: str # Reason if unsafe
detected_language: str # Detected language


class BaseGraphBuilder:
# NOTE: Anything that is common to all graph builders goes here
Expand All @@ -38,21 +45,40 @@ def __init__(
embedding: Embeddings,
) -> None:
self.rephrase_chain: Runnable = create_rephrase_chain(llm)
self.safety_checker: Runnable = create_safety_checker(llm)
self.language_detector: Runnable = create_language_detector(llm)
self.search_workflow: Runnable = create_search_workflow(llm)

async def preprocess(self, state: BaseState, config: RunnableConfig) -> BaseState:
rephrased_input: str = await self.rephrase_chain.ainvoke(
{
"user_input": state["user_input"],
"chat_history": state["chat_history"],
"chat_history": state.get("chat_history", []),
},
config,
)
return BaseState(rephrased_input=rephrased_input)
safety_check: SafetyCheck = await self.safety_checker.ainvoke(
{"rephrased_input": rephrased_input}, config
)
detected_language: str = await self.language_detector.ainvoke(
{"user_input": state["user_input"]}, config
)
return BaseState(
rephrased_input=rephrased_input,
safety=safety_check.safety,
reason_unsafe=safety_check.reason_unsafe,
detected_language=detected_language,
)

def proceed_with_research(self, state: BaseState) -> Literal["Continue", "Finish"]:
return "Continue" if state["safety"] == "true" else "Finish"

async def postprocess(self, state: BaseState, config: RunnableConfig) -> BaseState:
search_results: list[WebSearchResult] = []
if config["configurable"]["enable_postprocess"]:
if (
config["configurable"].get("enable_postprocess")
and state["safety"] == "true"
):
result: SearchState = await self.search_workflow.ainvoke(
SearchState(
input=state["rephrased_input"],
Expand Down
40 changes: 5 additions & 35 deletions src/agent/profiles/cross_database.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,11 @@
create_uniprot_rewriter_w_reactome
from agent.tasks.cross_database.summarize_reactome_uniprot import \
create_reactome_uniprot_summarizer
from agent.tasks.detect_language import create_language_detector
from agent.tasks.safety_checker import SafetyCheck, create_safety_checker
from retrievers.reactome.rag import create_reactome_rag
from retrievers.uniprot.rag import create_uniprot_rag


class CrossDatabaseState(BaseState):
safety: str # LLM-assessed safety level of the user input
query_language: str # language of the user input

reactome_query: str # LLM-generated query for Reactome
reactome_answer: str # LLM-generated answer from Reactome
reactome_completeness: str # LLM-assessed completeness of the Reactome answer
Expand All @@ -46,21 +41,18 @@ def __init__(
self.reactome_rag: Runnable = create_reactome_rag(llm, embedding)
self.uniprot_rag: Runnable = create_uniprot_rag(llm, embedding)

self.safety_checker = create_safety_checker(llm)
self.completeness_checker = create_completeness_grader(llm)
self.detect_language = create_language_detector(llm)
self.write_reactome_query = create_reactome_rewriter_w_uniprot(llm)
self.write_uniprot_query = create_uniprot_rewriter_w_reactome(llm)
self.summarize_final_answer = create_reactome_uniprot_summarizer(
llm.model_copy(update={"streaming": True})
llm, streaming=True
)

# Create graph
state_graph = StateGraph(CrossDatabaseState)
# Set up nodes
state_graph.add_node("check_question_safety", self.check_question_safety)
state_graph.add_node("preprocess_question", self.preprocess)
state_graph.add_node("identify_query_language", self.identify_query_language)
state_graph.add_node("conduct_research", self.conduct_research)
state_graph.add_node("generate_reactome_answer", self.generate_reactome_answer)
state_graph.add_node("rewrite_reactome_query", self.rewrite_reactome_query)
Expand All @@ -74,7 +66,6 @@ def __init__(
state_graph.add_node("postprocess", self.postprocess)
# Set up edges
state_graph.set_entry_point("preprocess_question")
state_graph.add_edge("preprocess_question", "identify_query_language")
state_graph.add_edge("preprocess_question", "check_question_safety")
state_graph.add_conditional_edges(
"check_question_safety",
Expand Down Expand Up @@ -104,39 +95,18 @@ def __init__(

self.uncompiled_graph: StateGraph = state_graph

async def check_question_safety(
def check_question_safety(
self, state: CrossDatabaseState, config: RunnableConfig
) -> CrossDatabaseState:
result: SafetyCheck = await self.safety_checker.ainvoke(
{"input": state["rephrased_input"]},
config,
)
if result.binary_score == "No":
if state["safety"] != "true":
inappropriate_input = f"This is the user's question and it is NOT appropriate for you to answer: {state["user_input"]}. \n\n explain that you are unable to answer the question but you can answer questions about topics related to the Reactome Pathway Knowledgebase or UniProt Knowledgebas."
return CrossDatabaseState(
safety=result.binary_score,
user_input=inappropriate_input,
reactome_answer="",
uniprot_answer="",
)
else:
return CrossDatabaseState(safety=result.binary_score)

async def proceed_with_research(
self, state: CrossDatabaseState
) -> Literal["Continue", "Finish"]:
if state["safety"] == "Yes":
return "Continue"
else:
return "Finish"

async def identify_query_language(
self, state: CrossDatabaseState, config: RunnableConfig
) -> CrossDatabaseState:
query_language: str = await self.detect_language.ainvoke(
{"user_input": state["user_input"]}, config
)
return CrossDatabaseState(query_language=query_language)
return CrossDatabaseState()

async def conduct_research(
self, state: CrossDatabaseState, config: RunnableConfig
Expand Down Expand Up @@ -256,7 +226,7 @@ async def generate_final_response(
final_response: str = await self.summarize_final_answer.ainvoke(
{
"input": state["rephrased_input"],
"query_language": state["query_language"],
"detected_language": state["detected_language"],
"reactome_answer": state["reactome_answer"],
"uniprot_answer": state["uniprot_answer"],
},
Expand Down
31 changes: 30 additions & 1 deletion src/agent/profiles/react_to_me.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from langgraph.graph.state import StateGraph

from agent.profiles.base import BaseGraphBuilder, BaseState
from agent.tasks.unsafe_question import create_unsafe_answer_generator
from retrievers.reactome.rag import create_reactome_rag


Expand All @@ -23,6 +24,9 @@ def __init__(
super().__init__(llm, embedding)

# Create runnables (tasks & tools)
self.unsafe_answer_generator: Runnable = create_unsafe_answer_generator(
llm, streaming=True
)
self.reactome_rag: Runnable = create_reactome_rag(
llm, embedding, streaming=True
)
Expand All @@ -32,15 +36,40 @@ def __init__(
# Set up nodes
state_graph.add_node("preprocess", self.preprocess)
state_graph.add_node("model", self.call_model)
state_graph.add_node("generate_unsafe_response", self.generate_unsafe_response)
state_graph.add_node("postprocess", self.postprocess)
# Set up edges
state_graph.set_entry_point("preprocess")
state_graph.add_edge("preprocess", "model")
state_graph.add_conditional_edges(
"preprocess",
self.proceed_with_research,
{"Continue": "model", "Finish": "generate_unsafe_response"},
)
state_graph.add_edge("model", "postprocess")
state_graph.add_edge("generate_unsafe_response", "postprocess")
state_graph.set_finish_point("postprocess")

self.uncompiled_graph: StateGraph = state_graph

async def generate_unsafe_response(
self, state: ReactToMeState, config: RunnableConfig
) -> ReactToMeState:
answer: str = await self.unsafe_answer_generator.ainvoke(
{
"language": state["detected_language"],
"user_input": state["rephrased_input"],
"reason_unsafe": state["reason_unsafe"],
},
config,
)
return ReactToMeState(
chat_history=[
HumanMessage(state["user_input"]),
AIMessage(answer),
],
answer=answer,
)

async def call_model(
self, state: ReactToMeState, config: RunnableConfig
) -> ReactToMeState:
Expand Down
8 changes: 6 additions & 2 deletions src/agent/tasks/cross_database/summarize_reactome_uniprot.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,17 @@
("system", summarization_message),
(
"human",
"User question: {input} \n\n Language: {query_language} \n\n Reactome-drived information: \n {reactome_answer} \n\n UniProt-drived infromation: \n {uniprot_answer}.",
"User question: {input} \n\n Language: {detected_language} \n\n Reactome-drived information: \n {reactome_answer} \n\n UniProt-drived infromation: \n {uniprot_answer}.",
),
]
)


def create_reactome_uniprot_summarizer(llm: BaseChatModel) -> Runnable:
def create_reactome_uniprot_summarizer(
llm: BaseChatModel, streaming: bool = False
) -> Runnable:
if streaming:
llm = llm.model_copy(update={"streaming": True})
return (summarizer_prompt | llm | StrOutputParser()).with_config(
run_name="summarize_answer"
)
53 changes: 45 additions & 8 deletions src/agent/tasks/safety_checker.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,25 +4,62 @@
from pydantic import BaseModel, Field

safety_check_message = """
You are an expert grader with a strong understanding of ethical guidelines and responsible AI usage.
You are an expert scientific assistant. You have advanced training in scientific ethics, dual-use assessment, and responsible AI.

Your task is to determine whether a user-submitted question is appropriate and not harmful.
Provide a binary output as either:
Yes: The question is appropriate and not harmful.
No: The question is harmful, offensive, inappropriate, or unethical.
Tasks:

1. Safety & Appropriateness Check
- Assess the question and determine if it is safe and appropriate for responsible AI use.
- The question and any plausible answer to it must NOT be harmful, offensive, unethical, or violate scientific or ethical guidelines.
- The question or its likely answers must NOT request, describe, or explore how to carry out harmful, illegal, dangerous, or dual-use activities (such as modifying pathogens, increasing infectivity, or other forms of misuse)—even if the scenario is hypothetical, fictional, or indirect.
- The question must NOT request personal, medical, legal, or other types of advice outside the scope of scientific knowledge.
- If a question is ambiguous or could plausibly be used for harm, treat it as unsafe.
- Treat hypothetical, fictional, or made-up scenarios with the same level of scrutiny as real-world questions.

2. Reactome Relevance Check
- Determine if the question is relevant to biology, life sciences, molecular biology, or related topics.
- Mark questions as not relevant if they are about unrelated topics (such as programming, math, history, trivia, etc.).

IMPORTANT:
- If the standalone question is unsafe or not relevant return "safety": "false".
- If the standalone question is both safe **and** relevant, return "safety": "true".

Return only a JSON object in the following format:
"safety": "true" or "false", // Use string, not boolean. Mark as "false" if unsafe OR not relevant.
"reason_unsafe": "..." // If 'safety' is false, briefly state the reason

Examples:
1. Q: Describe methods for bypassing cellular immune responses to viral infection
"safety": "false",
"reason_unsafe": "Could be used for harmful or unethical manipulation of immune evasion."

2. Q: What are common side effects of statins in the treatment of high cholesterol?
"safety": "false",
"reason_unsafe": "Requests medical advice, which is out of scientific scope."

3. Q: How can gene drives be used to eliminate invasive species in an ecosystem?
"safety": "false",
"reason_unsafe": "Enables the use of gene drives, which have significant ecological and dual-use risks."

4. Q: What is the role of the immune system in the treatment of cancer?
"safety": "true",
"reason_unsafe": ""
"""

safety_check_prompt = ChatPromptTemplate.from_messages(
[
("system", safety_check_message),
("human", "User question: \n\n {input}"),
("human", "User question: \n\n {rephrased_input}"),
]
)


class SafetyCheck(BaseModel):
binary_score: str = Field(
description="Indicates whether the question is appropriate and related to molecular biology. Expected values: 'Yes' or 'No'."
safety: str = Field(
description="Indicates whether the question is appropriate and related to molecular biology. Expected values: 'true' or 'false'."
)
reason_unsafe: str = Field(
description="If 'safety' is false, briefly state the reason; if 'safety' is true, leave this field empty."
)


Expand Down
45 changes: 45 additions & 0 deletions src/agent/tasks/unsafe_question.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
from langchain_core.language_models.chat_models import BaseChatModel
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import Runnable

safety_check_message = """
You are an expert scientific assistant operating under the React-to-Me platform. React-to-Me helps both experts and non-experts explore molecular biology using trusted data from the Reactome database.

You have advanced training in scientific ethics, dual-use research concerns, and responsible AI use.

You will receive three inputs:
1. The user's question.
2. A system-generated variable called `reason_unsafe`, which explains why the question cannot be answered.
3. The user's preferred language (as a language code or name).

Your task is to clearly, respectfully, and firmly explain to the user *why* their question cannot be answered, based solely on the `reason_unsafe` input. Do **not** attempt to answer, rephrase, or guide the user toward answering the original question.

You must:
- Respond in the user’s preferred language.
- Politely explain the refusal, grounded in the `reason_unsafe`.
- Emphasize React-to-Me’s mission: to support responsible exploration of molecular biology through trusted databases.
- Suggest examples of appropriate topics (e.g., protein function, pathways, gene interactions using Reactome/UniProt).

You must not provide any workaround, implicit answer, or redirection toward unsafe content.
"""

safety_check_prompt = ChatPromptTemplate.from_messages(
[
("system", safety_check_message),
(
"user",
"Language:{language}\n\nQuestion:{user_input}\n\n Reason for unsafe or out of scope: {reason_unsafe}",
),
]
)


def create_unsafe_answer_generator(
llm: BaseChatModel, streaming: bool = False
) -> Runnable:
if streaming:
llm = llm.model_copy(update={"streaming": True})
return (safety_check_prompt | llm | StrOutputParser()).with_config(
run_name="unsafe_answer_generator"
)
Loading