Spaces:
Runtime error
Runtime error
| import spacy_transformers # needed by SpacyTextSplitter when using the en_core_web_trf pipeline | |
| import spacy | |
| from typing import Iterable, Iterator | |
| from langchain.docstore.document import Document | |
| from langchain.text_splitter import SpacyTextSplitter | |
| class SpacySplitter: | |
| def __init__(self): | |
| self.splitter = SpacyTextSplitter(chunk_size=1000, pipeline="en_core_web_trf") | |
| def split_documents(self, docs: Iterable[Document]) -> Iterator[Document]: | |
| spacy.prefer_gpu(gpu_id=1) | |
| chunks = self.splitter.split_documents(docs) | |
| return chunks | |