In [196]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

# Create a RecursiveCharacterTextSplitter with chunk size and overlap
splitter = RecursiveCharacterTextSplitter(
    separators = [".", "?", "!", ",",  " "],  # Prioritize sentence-ending first, then spaces, and individual characters
    keep_separator = "end",
    chunk_size = 100,              # Set a larger chunk size (100 characters)
    chunk_overlap = 10,         
    
)

# A longer example text to split
text = """The quick brown fox jumps over the lazy dog. The sun sets in the west, casting a golden glow across the horizon. 
Birds chirp in the distance, and a gentle breeze rustles the leaves. """

# Split the text into chunks
chunks = splitter.split_text(text)
In [197]:
# Display the output chunks
for i, chunk in enumerate(chunks):
    print(f"Chunk {i+1}: {chunk}")
Chunk 1: The quick brown fox jumps over the lazy dog.
Chunk 2: The sun sets in the west, casting a golden glow across the horizon.
Chunk 3: Birds chirp in the distance, and a gentle breeze rustles the leaves.