In [196]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
# Create a RecursiveCharacterTextSplitter with chunk size and overlap
splitter = RecursiveCharacterTextSplitter(
separators = [".", "?", "!", ",", " "], # Prioritize sentence-ending first, then spaces, and individual characters
keep_separator = "end",
chunk_size = 100, # Set a larger chunk size (100 characters)
chunk_overlap = 10,
)
# A longer example text to split
text = """The quick brown fox jumps over the lazy dog. The sun sets in the west, casting a golden glow across the horizon.
Birds chirp in the distance, and a gentle breeze rustles the leaves. """
# Split the text into chunks
chunks = splitter.split_text(text)
In [197]:
# Display the output chunks
for i, chunk in enumerate(chunks):
print(f"Chunk {i+1}: {chunk}")
Chunk 1: The quick brown fox jumps over the lazy dog. Chunk 2: The sun sets in the west, casting a golden glow across the horizon. Chunk 3: Birds chirp in the distance, and a gentle breeze rustles the leaves.