slide-deck-ai / tests /unit /test_file_manager.py
barunsaha's picture
Add test cases for the other modules
690eb5c
raw
history blame
4.11 kB
"""
Unit tests for the file manager module.
"""
import io
from typing import Any
import pytest
from slidedeckai.helpers import file_manager
class _FakePage:
def __init__(self, text: str) -> None:
self._text = text
def extract_text(self) -> str:
return self._text
class _FakePdf:
def __init__(self, pages_text: list[str]) -> None:
self.pages = [_FakePage(t) for t in pages_text]
def _make_fake_pdf_reader(pages_text: list[str]) -> Any:
"""Return a callable that behaves like PdfReader when called with a file.
The returned object will have a .pages attribute with page objects that
implement extract_text(). This lets tests avoid creating real PDF
binaries and keeps tests deterministic.
"""
def _reader(_fileobj: Any) -> _FakePdf:
return _FakePdf(pages_text)
return _reader
def test_get_pdf_contents_single_page(monkeypatch: pytest.MonkeyPatch) -> None:
"""get_pdf_contents should return the text for a single-page PDF when
page_range end is None.
"""
fake_texts = ['Page one text']
monkeypatch.setattr(
file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts)
)
# When start == end, validate_page_range returns (start, None) — emulate
# that contract here and exercise get_pdf_contents handling of end=None.
result = file_manager.get_pdf_contents(
pdf_file=io.BytesIO(b'pdf'),
page_range=(1, None)
)
assert result == 'Page one text'
def test_get_pdf_contents_multi_page_range(monkeypatch: pytest.MonkeyPatch) -> None:
"""get_pdf_contents should concatenate text from multiple pages in the
provided range.
"""
fake_texts = ['First', 'Second', 'Third']
monkeypatch.setattr(
file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts)
)
# Request pages 1..2 (inclusive). Internally the function iterates from
# start-1 up to end (exclusive), so passing (1, 2) should return First + Second
result = file_manager.get_pdf_contents(
pdf_file=io.BytesIO(b'pdf'),
page_range=(1, 2)
)
assert result == 'FirstSecond'
@pytest.mark.parametrize(
'start,end,expected',
[
(0, 5, (1, 3)), # start too small -> clamped to 1; end clamped to n_pages
(2, 2, (2, None)), # equal start & end -> end is None
(10, 1, (1, None)), # start > end -> start reset to 1
(1, 100, (1, 3)), # end too large -> clamped to n_pages
],
)
def test_validate_page_range_various(
monkeypatch: pytest.MonkeyPatch, start: int, end: int, expected: tuple[int, Any]
) -> None:
"""validate_page_range should correctly normalize start/end values and
return (start, None) when the constrained range is a single page.
"""
fake_texts = ['A', 'B', 'C']
monkeypatch.setattr(
file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts)
)
result = file_manager.validate_page_range(
pdf_file=io.BytesIO(b'pdf'),
start=start,
end=end
)
assert result == expected
def test_validate_page_range_two_page_return(monkeypatch: pytest.MonkeyPatch) -> None:
"""When the validated range spans multiple pages, validate_page_range
should return the clamped (start, end) pair with end not None.
"""
fake_texts = ['A', 'B', 'C', 'D']
monkeypatch.setattr(
file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts)
)
# start=2 end=3 should be unchanged and returned as (2, 3)
result = file_manager.validate_page_range(
pdf_file=io.BytesIO(b'pdf'),
start=2,
end=3
)
assert result == (2, 3)
def test_get_pdf_contents_handles_empty_page_text(monkeypatch: pytest.MonkeyPatch) -> None:
"""Pages may return empty strings; get_pdf_contents should concatenate
them without failing.
"""
fake_texts = ['', 'Line two', '']
monkeypatch.setattr(
file_manager, 'PdfReader', _make_fake_pdf_reader(fake_texts)
)
result = file_manager.get_pdf_contents(pdf_file=io.BytesIO(b"pdf"), page_range=(1, 3))
assert result == 'Line two'