improve readme
Browse files- lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc +0 -0
- lec2note/ingestion/whisper_runner.py +1 -1
- lec2note/processing/__pycache__/processor.cpython-310.pyc +0 -0
- lec2note/processing/processor.py +33 -25
- lec2note/synthesis/__pycache__/assembler.cpython-310.pyc +0 -0
- lec2note/synthesis/assembler.py +12 -10
lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc and b/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc differ
|
|
|
lec2note/ingestion/whisper_runner.py
CHANGED
|
@@ -20,7 +20,7 @@ class WhisperRunner: # noqa: D101
|
|
| 20 |
model_name: str = "large-v3"
|
| 21 |
|
| 22 |
@classmethod
|
| 23 |
-
def transcribe(cls, audio_fp: str | Path, lang: str =
|
| 24 |
"""Transcribe ``audio_fp`` and return list with start/end/text.
|
| 25 |
|
| 26 |
Notes
|
|
|
|
| 20 |
model_name: str = "large-v3"
|
| 21 |
|
| 22 |
@classmethod
|
| 23 |
+
def transcribe(cls, audio_fp: str | Path, lang: str = "auto") -> List[Dict[str, Any]]:
|
| 24 |
"""Transcribe ``audio_fp`` and return list with start/end/text.
|
| 25 |
|
| 26 |
Notes
|
lec2note/processing/__pycache__/processor.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/processing/__pycache__/processor.cpython-310.pyc and b/lec2note/processing/__pycache__/processor.cpython-310.pyc differ
|
|
|
lec2note/processing/processor.py
CHANGED
|
@@ -23,9 +23,16 @@ class Processor: # noqa: D101
|
|
| 23 |
|
| 24 |
@staticmethod
|
| 25 |
def _img_to_data_uri(img_path: Path) -> str:
|
|
|
|
|
|
|
|
|
|
| 26 |
img_path = Path(img_path)
|
| 27 |
mime, _ = mimetypes.guess_type(img_path)
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
return f"data:{mime};base64,{b64}"
|
| 30 |
|
| 31 |
@staticmethod
|
|
@@ -59,34 +66,35 @@ class Processor: # noqa: D101
|
|
| 59 |
|
| 60 |
# Prompt with explicit mapping guidance
|
| 61 |
prompt_text = (
|
| 62 |
-
"
|
| 63 |
-
"**
|
| 64 |
-
"**
|
|
|
|
| 65 |
"**Key Instructions**:\n\n"
|
| 66 |
-
"1. **
|
| 67 |
-
"2. **
|
| 68 |
" - For **formulas and equations**, use LaTeX notation (e.g., enclose with `$` or `$$`).\n"
|
| 69 |
" - For **tables**, recreate them using Markdown table syntax.\n"
|
| 70 |
-
" - For **code**, use Markdown code blocks with appropriate language identifiers.\n\n"
|
| 71 |
-
"3. **
|
| 72 |
-
"4. **
|
| 73 |
-
"5. **
|
| 74 |
-
"---BEGIN LECTURE
|
| 75 |
-
f"**Subtitles (placeholders
|
| 76 |
-
"---END LECTURE
|
| 77 |
)
|
| 78 |
|
| 79 |
-
parts: List[Dict[str, Any]] = [
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
"
|
| 87 |
-
"detail": f"IMG{idx}",
|
| 88 |
-
}
|
| 89 |
-
|
| 90 |
|
| 91 |
return [{"role": "user", "content": parts}]
|
| 92 |
|
|
@@ -96,5 +104,5 @@ class Processor: # noqa: D101
|
|
| 96 |
# collect text for this chunk
|
| 97 |
messages = cls._build_messages(chunk)
|
| 98 |
note = cls._call_llm(messages)
|
| 99 |
-
images=[
|
| 100 |
return NoteChunk(note=note, images=images)
|
|
|
|
| 23 |
|
| 24 |
@staticmethod
|
| 25 |
def _img_to_data_uri(img_path: Path) -> str:
|
| 26 |
+
"""Convert image to data URI; return empty string if path invalid."""
|
| 27 |
+
if not img_path or not Path(img_path).is_file():
|
| 28 |
+
return ""
|
| 29 |
img_path = Path(img_path)
|
| 30 |
mime, _ = mimetypes.guess_type(img_path)
|
| 31 |
+
try:
|
| 32 |
+
b64 = base64.b64encode(img_path.read_bytes()).decode()
|
| 33 |
+
except Exception as exc: # noqa: BLE001
|
| 34 |
+
logger.warning("[Processor] failed reading image %s: %s", img_path, exc)
|
| 35 |
+
return ""
|
| 36 |
return f"data:{mime};base64,{b64}"
|
| 37 |
|
| 38 |
@staticmethod
|
|
|
|
| 66 |
|
| 67 |
# Prompt with explicit mapping guidance
|
| 68 |
prompt_text = (
|
| 69 |
+
"Let's think step by step.\n\n"
|
| 70 |
+
"**Role**: You are an expert academic assistant processing one segment of a longer lecture. Your output for this segment will be merged with others later by a senior editor.\n\n"
|
| 71 |
+
"**Primary Objective**: Act as a **high-fidelity recorder of information**. Your goal is to generate a comprehensive and detailed note segment in Markdown, capturing the full context of this specific segment. **Do NOT add broad introductions or conclusions** that might be redundant in the final merged document.\n\n"
|
| 72 |
+
"**Return Format**: Only return the **note segment** in Markdown format, focusing exclusively on the materials provided.\n\n"
|
| 73 |
"**Key Instructions**:\n\n"
|
| 74 |
+
"1. **Identify the Segment's Core Purpose**: Analyze the materials to determine the main purpose of this specific segment (e.g., 'Is it defining a term?', 'Is it presenting a problem?', 'Is it proposing a solution?'). Structure your note around this local purpose.\n\n"
|
| 75 |
+
"2. **Transcribe Visuals with Precision**: You MUST analyze the accompanying images. If an image contains crucial information like **formulas, equations, tables, code snippets, or algorithms**, you must accurately transcribe it into the Markdown note to support the text. Follow these formats:\n"
|
| 76 |
" - For **formulas and equations**, use LaTeX notation (e.g., enclose with `$` or `$$`).\n"
|
| 77 |
" - For **tables**, recreate them using Markdown table syntax.\n"
|
| 78 |
+
" - For **algorithms or code**, use Markdown code blocks with appropriate language identifiers (e.g., pseudocode, python).\n\n"
|
| 79 |
+
"3. **Capture Emphasized Verbal Points**: From the subtitles, identify and highlight key points, definitions, and direct statements of importance (e.g., 'the key is...', 'the problem is...').\n\n"
|
| 80 |
+
"4. **Do Not Reference Images Directly**: Integrate the visual information seamlessly. Instead of writing 'As shown in IMG1', transcribe the formula from IMG1 directly into the text where it is discussed.\n\n"
|
| 81 |
+
"5. **Language**: The notes' language should align with the subtitles.\n\n"
|
| 82 |
+
"---BEGIN LECTURE SEGMENT---\n"
|
| 83 |
+
f"**Subtitles (with image placeholders)**:\n{placeholder_subs}\n"
|
| 84 |
+
"---END LECTURE SEGMENT---"
|
| 85 |
)
|
| 86 |
|
| 87 |
+
parts: List[Dict[str, Any]] = [{"type": "text", "text": prompt_text}]
|
| 88 |
+
for idx, visual_chunk in enumerate(chunk.visual_chunks, start=1):
|
| 89 |
+
data_uri = cls._img_to_data_uri(visual_chunk.image_path)
|
| 90 |
+
if not data_uri:
|
| 91 |
+
continue # skip invalid image
|
| 92 |
+
parts.append(
|
| 93 |
+
{
|
| 94 |
+
"type": "image_url",
|
| 95 |
+
"image_url": {"url": data_uri, "detail": f"IMG{idx}"},
|
| 96 |
+
}
|
| 97 |
+
)
|
| 98 |
|
| 99 |
return [{"role": "user", "content": parts}]
|
| 100 |
|
|
|
|
| 104 |
# collect text for this chunk
|
| 105 |
messages = cls._build_messages(chunk)
|
| 106 |
note = cls._call_llm(messages)
|
| 107 |
+
images=[vc.image_path for vc in chunk.visual_chunks if vc.image_path and Path(vc.image_path).is_file()]
|
| 108 |
return NoteChunk(note=note, images=images)
|
lec2note/synthesis/__pycache__/assembler.cpython-310.pyc
CHANGED
|
Binary files a/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc and b/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc differ
|
|
|
lec2note/synthesis/assembler.py
CHANGED
|
@@ -45,18 +45,20 @@ class Assembler: # noqa: D101
|
|
| 45 |
{
|
| 46 |
"role": "user",
|
| 47 |
"content": (
|
| 48 |
-
"
|
| 49 |
-
"**
|
| 50 |
-
"**
|
| 51 |
-
"**
|
| 52 |
-
"
|
| 53 |
-
"
|
| 54 |
-
"
|
| 55 |
-
"
|
| 56 |
-
"**
|
|
|
|
|
|
|
| 57 |
"Here are the fragmented notes to process:\n\n"
|
| 58 |
f"{raw_md}"
|
| 59 |
-
)
|
| 60 |
}
|
| 61 |
],
|
| 62 |
)
|
|
|
|
| 45 |
{
|
| 46 |
"role": "user",
|
| 47 |
"content": (
|
| 48 |
+
"Let's think step by step.\n\n"
|
| 49 |
+
"**Role**: You are an **expert educator and storyteller**, tasked with synthesizing fragmented notes into a master study guide.\n\n"
|
| 50 |
+
"**Context**: These notes were generated from sequential segments of a single video lecture. They are detailed but contain significant repetition and lack a coherent, overarching narrative.\n\n"
|
| 51 |
+
"**Primary Goal**: Create a definitive, standalone educational document that not only summarizes the content but also explains the underlying **narrative and logic** of the lecture. The final note should be good enough for someone to learn the topic from scratch.\n\n"
|
| 52 |
+
"**Key Instructions**:\n\n"
|
| 53 |
+
"1. **Establish a Logical Narrative (Explain the 'Why')**: This is your most important task. Do NOT simply list topics. Find the story. Ask yourself: 'What problem is being introduced first? What solution is proposed next? How does each concept build upon the previous one?' Structure the final note to reflect this **problem-solution arc** or logical progression. For example, when introducing 'Actor-Critic', explicitly state that it is a solution to the 'high variance' problem in earlier methods.\n\n"
|
| 54 |
+
"2. **De-duplicate and Consolidate**: Identify all repetitive definitions (e.g., 'State vs. Observation' is likely defined multiple times). Merge them into a single, comprehensive section for each core concept. The first mention is usually the most important.\n\n"
|
| 55 |
+
"3. **Highlight Key Trade-offs**: Based on the content, identify and explicitly state any important trade-offs discussed (e.g., 'Bias vs. Variance' in estimation methods, 'On-Policy vs. Off-Policy' sample efficiency). Use a table or bullet points to make these comparisons clear and easy to understand.\n\n"
|
| 56 |
+
"4. **Synthesize and Enhance**: Where different fragments explain the same concept with slightly different examples (e.g., one note uses a 'cheetah' example, another uses a 'robot'), synthesize these details to create a richer, more complete explanation under a single heading.\n\n"
|
| 57 |
+
"5. **Structure and Polish**: Create a clean, hierarchical structure with a main title, sections (e.g., `## 1. The Problem of X`), and subsections. Ensure the final text is grammatically correct and flows naturally.\n\n"
|
| 58 |
+
"**Constraint**: The goal is to lose redundancy, not information. Ensure all unique concepts, formulas, and key details from the original fragments are preserved in the final, structured document.\n\n"
|
| 59 |
"Here are the fragmented notes to process:\n\n"
|
| 60 |
f"{raw_md}"
|
| 61 |
+
)
|
| 62 |
}
|
| 63 |
],
|
| 64 |
)
|