Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
587e5a7
feat: update memos headers
fridayL Nov 19, 2025
5637c9d
feat: headers add
fridayL Nov 19, 2025
68831c0
feat: update search agent
fridayL Nov 20, 2025
58c512d
feat: upadte mem story
fridayL Nov 21, 2025
a497d46
feat: update mem scehduler
fridayL Nov 21, 2025
bd72e9b
feat: update deepsearch mem code
fridayL Nov 21, 2025
91664dc
Merge branch 'dev_new' into feat/deep-search
fridayL Nov 22, 2025
f332ef2
feat: update deepsearch agent
fridayL Nov 22, 2025
c21fc58
feat: update test code
fridayL Nov 22, 2025
fca3776
Merge branch 'dev_new' into feat/deep-search
fridayL Nov 24, 2025
0f62af8
fix: remove dup config
fridayL Nov 24, 2025
5f0a97c
Merge branch 'dev' into feat/deep-search
fridayL Nov 24, 2025
dac3394
feat: dock search pipeline
fridayL Nov 25, 2025
f38115c
Merge branch 'feat/deep-search' of https://github.com/fridayL/MemOS i…
fridayL Nov 25, 2025
696692d
Merge branch 'dev_new' into feat/deep-search
fridayL Nov 25, 2025
9489d54
fix: code test
fridayL Nov 25, 2025
e43e5db
feat: add test scripts
fridayL Nov 25, 2025
ecd4508
feat: add test
fridayL Nov 25, 2025
6e21032
feat: update need_raw process
fridayL Nov 25, 2025
fac355d
Merge branch 'dev_new' into feat/deep-search
fridayL Nov 25, 2025
592f637
fix: add initter
fridayL Nov 25, 2025
df4a66f
Merge branch 'dev_new' into feat/deep-search
fridayL Nov 25, 2025
fbdd07a
Merge branch 'dev_new' into feat/deep-search
fridayL Nov 27, 2025
ad99745
fix: change agent search func name
fridayL Nov 27, 2025
e203755
Merge branch 'dev' into feat/deep-search
fridayL Nov 27, 2025
ca780ea
Merge branch 'dev_new' into feat/deep-search
fridayL Nov 27, 2025
1b67652
Merge branch 'feat/deep-search' of https://github.com/fridayL/MemOS i…
fridayL Nov 27, 2025
94dba83
feat: update logs and defined
fridayL Nov 28, 2025
64414ea
Merge branch 'dev' into feat/deep-search
fridayL Nov 28, 2025
34e9ea4
Merge branch 'dev_new' into feat/deep-search
fridayL Nov 28, 2025
f361d1f
Merge branch 'feat/deep-search' of https://github.com/fridayL/MemOS i…
fridayL Nov 28, 2025
b3acc98
Merge branch 'dev_new' into feat/deep-search
fridayL Dec 1, 2025
953872e
feat: update full text mem search
fridayL Dec 1, 2025
20438e9
Merge branch 'dev_new' into feat/deep-search
fridayL Dec 1, 2025
2591c10
feat: cp plugin to dev
fridayL Dec 1, 2025
4836670
Merge branch 'dev_new' into feat/deep-search
fridayL Dec 1, 2025
383eaaa
feat: add one recall for fulltext retrieval
fridayL Dec 1, 2025
502e15e
fix: set default for fulltext search
fridayL Dec 2, 2025
f33aa47
Merge branch 'dev_new' into feat/deep-search
fridayL Dec 2, 2025
861e489
feat: add langchain chunk
fridayL Dec 2, 2025
10293bf
Merge branch 'dev_new' into feat/deep-search
fridayL Dec 2, 2025
0af35f3
Merge branch 'dev_new' into feat/deep-search
fridayL Dec 3, 2025
b8a953a
feat: fix playground for query
fridayL Dec 3, 2025
3f73bee
Merge branch 'dev_new' into feat/deep-search
fridayL Dec 3, 2025
78c1582
feat: update file content memory extract
fridayL Dec 4, 2025
64383fb
Merge branch 'dev_new' into feat/deep-search
fridayL Dec 4, 2025
ef236cb
feat: update code
fridayL Dec 4, 2025
905b1e2
feat: update import
fridayL Dec 4, 2025
eb09595
code: reformat suffix
fridayL Dec 4, 2025
1fdbff9
Merge branch 'dev_new' into feat/deep-search
fridayL Dec 4, 2025
f51ee8a
Merge branch 'dev' into feat/deep-search
CaralHsi Dec 4, 2025
a93fc0f
feat: update file_id
fridayL Dec 4, 2025
dff374a
Merge branch 'feat/deep-search' of https://github.com/fridayL/MemOS i…
fridayL Dec 4, 2025
54f4784
Merge branch 'dev_new' into feat/deep-search
fridayL Dec 4, 2025
0350135
remove langchain-text-splitters==1.0.0
fridayL Dec 4, 2025
bd9e0ee
Merge branch 'dev_new' into feat/deep-search
fridayL Dec 4, 2025
ecce0f1
feat: add reqiuement
fridayL Dec 4, 2025
4f53e29
feat: make test
fridayL Dec 4, 2025
8e3063a
feat: fix markdown
fridayL Dec 4, 2025
b022b04
feat: fix simple chunker
fridayL Dec 4, 2025
f41942d
Merge branch 'dev' into feat/deep-search
fridayL Dec 4, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions src/memos/chunkers/charactertext_chunker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from memos.configs.chunker import MarkdownChunkerConfig
from memos.dependency import require_python_package
from memos.log import get_logger

from .base import BaseChunker, Chunk


logger = get_logger(__name__)


class CharacterTextChunker(BaseChunker):
"""Character-based text chunker."""

@require_python_package(
import_name="langchain_text_splitters",
install_command="pip install langchain_text_splitters==1.0.0",
install_link="https://github.com/langchain-ai/langchain-text-splitters",
)
def __init__(
self,
config: MarkdownChunkerConfig | None = None,
chunk_size: int = 1000,
chunk_overlap: int = 200,
):
from langchain_text_splitters import (
RecursiveCharacterTextSplitter,
)

self.config = config
self.chunker = RecursiveCharacterTextSplitter(
chunk_size=config.chunk_size if config else chunk_size,
chunk_overlap=config.chunk_overlap if config else chunk_overlap,
length_function=len,
separators=["\n\n", "\n", "。", "!", "?", ". ", "! ", "? ", " ", ""],
)

def chunk(self, text: str, **kwargs) -> list[str] | list[Chunk]:
"""Chunk the given text into smaller chunks based on sentences."""
chunks = self.chunker.split_text(text)
logger.debug(f"Generated {len(chunks)} chunks from input text")
return chunks
23 changes: 16 additions & 7 deletions src/memos/chunkers/markdown_chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,26 +16,35 @@ class MarkdownChunker(BaseChunker):
install_command="pip install langchain_text_splitters==1.0.0",
install_link="https://github.com/langchain-ai/langchain-text-splitters",
)
def __init__(self, config: MarkdownChunkerConfig):
def __init__(
self,
config: MarkdownChunkerConfig | None = None,
chunk_size: int = 1000,
chunk_overlap: int = 200,
recursive: bool = False,
):
from langchain_text_splitters import (
MarkdownHeaderTextSplitter,
RecursiveCharacterTextSplitter,
)

self.config = config
self.chunker = MarkdownHeaderTextSplitter(
headers_to_split_on=config.headers_to_split_on,
strip_headers=config.strip_headers,
headers_to_split_on=config.headers_to_split_on
if config
else [("#", "Header 1"), ("##", "Header 2"), ("###", "Header 3")],
strip_headers=config.strip_headers if config else False,
)
self.chunker_recursive = None
logger.info(f"Initialized MarkdownHeaderTextSplitter with config: {config}")
if config.recursive:
if (config and config.recursive) or recursive:
self.chunker_recursive = RecursiveCharacterTextSplitter(
chunk_size=config.chunk_size,
chunk_overlap=config.chunk_overlap,
chunk_size=config.chunk_size if config else chunk_size,
chunk_overlap=config.chunk_overlap if config else chunk_overlap,
length_function=len,
)

def chunk(self, text: str) -> list[str] | list[Chunk]:
def chunk(self, text: str, **kwargs) -> list[str] | list[Chunk]:
"""Chunk the given text into smaller chunks based on sentences."""
md_header_splits = self.chunker.split_text(text)
chunks = []
Expand Down
50 changes: 50 additions & 0 deletions src/memos/chunkers/simple_chunker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
class SimpleTextSplitter:
"""Simple text splitter wrapper."""

def __init__(self, chunk_size: int, chunk_overlap: int):
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap

def chunk(self, text: str, **kwargs) -> list[str]:
return self._simple_split_text(text, self.chunk_size, self.chunk_overlap)

def _simple_split_text(self, text: str, chunk_size: int, chunk_overlap: int) -> list[str]:
"""
Simple text splitter as fallback when langchain is not available.

Args:
text: Text to split
chunk_size: Maximum size of chunks
chunk_overlap: Overlap between chunks

Returns:
List of text chunks
"""
if not text or len(text) <= chunk_size:
return [text] if text.strip() else []

chunks = []
start = 0
text_len = len(text)

while start < text_len:
# Calculate end position
end = min(start + chunk_size, text_len)

# If not the last chunk, try to break at a good position
if end < text_len:
# Try to break at newline, sentence end, or space
for separator in ["\n\n", "\n", "。", "!", "?", ". ", "! ", "? ", " "]:
last_sep = text.rfind(separator, start, end)
if last_sep != -1:
end = last_sep + len(separator)
break

chunk = text[start:end].strip()
if chunk:
chunks.append(chunk)

# Move start position with overlap
start = max(start + 1, end - chunk_overlap)

return chunks
4 changes: 2 additions & 2 deletions src/memos/mem_reader/read_multi_modal/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,7 +226,7 @@ def parse(
else:
raise ValueError(f"Unknown mode: {mode}. Must be 'fast' or 'fine'")

def _split_text(self, text: str) -> list[str]:
def _split_text(self, text: str, is_markdown: bool = False) -> list[str]:
"""
Split text into chunks using text splitter from utils.

Expand All @@ -245,7 +245,7 @@ def _split_text(self, text: str) -> list[str]:
return [text] if text.strip() else []

try:
chunks = splitter.split_text(text)
chunks = splitter.chunk(text)
logger.debug(f"[FileContentParser] Split text into {len(chunks)} chunks")
return chunks
except Exception as e:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ def parse_fine(
memory_type = "LongTermMemory"

# Split parsed text into chunks
content_chunks = self._split_text(parsed_text)
content_chunks = self._split_text(parsed_text, is_markdown)

# Filter out empty chunks and create indexed list
valid_chunks = [
Expand Down
119 changes: 21 additions & 98 deletions src/memos/mem_reader/read_multi_modal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,48 +111,6 @@ def _cheap_close(t: str) -> str:
DEFAULT_CHUNK_OVERLAP = int(os.getenv("FILE_PARSER_CHUNK_OVERLAP", "200"))


def _simple_split_text(text: str, chunk_size: int, chunk_overlap: int) -> list[str]:
"""
Simple text splitter as fallback when langchain is not available.

Args:
text: Text to split
chunk_size: Maximum size of chunks
chunk_overlap: Overlap between chunks

Returns:
List of text chunks
"""
if not text or len(text) <= chunk_size:
return [text] if text.strip() else []

chunks = []
start = 0
text_len = len(text)

while start < text_len:
# Calculate end position
end = min(start + chunk_size, text_len)

# If not the last chunk, try to break at a good position
if end < text_len:
# Try to break at newline, sentence end, or space
for separator in ["\n\n", "\n", "。", "!", "?", ". ", "! ", "? ", " "]:
last_sep = text.rfind(separator, start, end)
if last_sep != -1:
end = last_sep + len(separator)
break

chunk = text[start:end].strip()
if chunk:
chunks.append(chunk)

# Move start position with overlap
start = max(start + 1, end - chunk_overlap)

return chunks


# Initialize parser instance
file_parser = None
try:
Expand All @@ -163,51 +121,27 @@ def _simple_split_text(text: str, chunk_size: int, chunk_overlap: int) -> list[s
logger.error(f"[FileContentParser] Failed to create parser: {e}")
file_parser = None

# Initialize text splitter instance
text_splitter = None
_use_simple_splitter = False
markdown_text_splitter = None

try:
try:
from langchain.text_splitter import RecursiveCharacterTextSplitter
except ImportError:
try:
from langchain_text_splitters import (
MarkdownHeaderTextSplitter,
RecursiveCharacterTextSplitter,
)
except ImportError:
logger.error(
"langchain not available. Install with: pip install langchain or pip install langchain-text-splitters"
)

text_splitter = RecursiveCharacterTextSplitter(
chunk_size=DEFAULT_CHUNK_SIZE,
chunk_overlap=DEFAULT_CHUNK_OVERLAP,
length_function=len,
separators=["\n\n", "\n", "。", "!", "?", ". ", "! ", "? ", " ", ""],
)
markdown_text_splitter = MarkdownHeaderTextSplitter(
headers_to_split_on=[("#", "Header 1"), ("##", "Header 2"), ("###", "Header 3")],
strip_headers=False,
)
logger.debug(
f"[FileContentParser] Initialized langchain text splitter with chunk_size={DEFAULT_CHUNK_SIZE}, "
f"chunk_overlap={DEFAULT_CHUNK_OVERLAP}"
from memos.chunkers.charactertext_chunker import CharacterTextChunker
from memos.chunkers.markdown_chunker import MarkdownChunker

markdown_text_splitter = MarkdownChunker(
chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP, recursive=True
)
except ImportError as e:
logger.warning(
f"[FileContentParser] langchain not available, using simple text splitter as fallback: {e}. "
"Install with: pip install langchain or pip install langchain-text-splitters"
text_splitter = CharacterTextChunker(
chunk_size=DEFAULT_CHUNK_SIZE, chunk_overlap=DEFAULT_CHUNK_OVERLAP
)
text_splitter = None
_use_simple_splitter = True
logger.info("[FileContentParser] Initialized text splitter instances by lancga")
except Exception as e:
logger.error(
f"[FileContentParser] Failed to initialize text splitter: {e}, using simple splitter as fallback"
logger.warning(
f"[FileContentParser] Failed to create text splitter: {e} will use simple splitter fallback"
)
from memos.chunkers.simple_chunker import SimpleTextSplitter

markdown_text_splitter = None
text_splitter = None
_use_simple_splitter = True


def get_parser() -> Any:
Expand All @@ -220,7 +154,9 @@ def get_parser() -> Any:
return file_parser


def get_text_splitter(chunk_size: int | None = None, chunk_overlap: int | None = None) -> Any:
def get_text_splitter(
chunk_size: int | None = None, chunk_overlap: int | None = None, is_markdown: bool = False
) -> Any:
"""
Get text splitter instance or a callable that uses simple splitter.

Expand All @@ -231,28 +167,15 @@ def get_text_splitter(chunk_size: int | None = None, chunk_overlap: int | None =
Returns:
Text splitter instance (RecursiveCharacterTextSplitter) or a callable wrapper for simple splitter
"""
if text_splitter is not None:
if is_markdown and markdown_text_splitter is not None:
return markdown_text_splitter
elif text_splitter is not None:
return text_splitter

# Return a callable wrapper that uses simple splitter
if _use_simple_splitter:
else:
actual_chunk_size = chunk_size or DEFAULT_CHUNK_SIZE
actual_chunk_overlap = chunk_overlap or DEFAULT_CHUNK_OVERLAP

class SimpleTextSplitter:
"""Simple text splitter wrapper."""

def __init__(self, chunk_size: int, chunk_overlap: int):
self.chunk_size = chunk_size
self.chunk_overlap = chunk_overlap

def split_text(self, text: str) -> list[str]:
return _simple_split_text(text, self.chunk_size, self.chunk_overlap)

return SimpleTextSplitter(actual_chunk_size, actual_chunk_overlap)

return None


def extract_role(message: dict[str, Any]) -> str:
"""Extract role from message."""
Expand Down
Loading