LRU1 commited on
Commit
cef411e
·
1 Parent(s): 34365ef

improve readme

Browse files
lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc CHANGED
Binary files a/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc and b/lec2note/ingestion/__pycache__/whisper_runner.cpython-310.pyc differ
 
lec2note/ingestion/whisper_runner.py CHANGED
@@ -20,7 +20,7 @@ class WhisperRunner: # noqa: D101
20
  model_name: str = "large-v3"
21
 
22
  @classmethod
23
- def transcribe(cls, audio_fp: str | Path, lang: str = None) -> List[Dict[str, Any]]:
24
  """Transcribe ``audio_fp`` and return list with start/end/text.
25
 
26
  Notes
 
20
  model_name: str = "large-v3"
21
 
22
  @classmethod
23
+ def transcribe(cls, audio_fp: str | Path, lang: str = "auto") -> List[Dict[str, Any]]:
24
  """Transcribe ``audio_fp`` and return list with start/end/text.
25
 
26
  Notes
lec2note/processing/__pycache__/processor.cpython-310.pyc CHANGED
Binary files a/lec2note/processing/__pycache__/processor.cpython-310.pyc and b/lec2note/processing/__pycache__/processor.cpython-310.pyc differ
 
lec2note/processing/processor.py CHANGED
@@ -23,9 +23,16 @@ class Processor: # noqa: D101
23
 
24
  @staticmethod
25
  def _img_to_data_uri(img_path: Path) -> str:
 
 
 
26
  img_path = Path(img_path)
27
  mime, _ = mimetypes.guess_type(img_path)
28
- b64 = base64.b64encode(img_path.read_bytes()).decode()
 
 
 
 
29
  return f"data:{mime};base64,{b64}"
30
 
31
  @staticmethod
@@ -59,34 +66,35 @@ class Processor: # noqa: D101
59
 
60
  # Prompt with explicit mapping guidance
61
  prompt_text = (
62
- "**Role**: You are an expert academic assistant tasked with taking notes from a part of a lecture which will be ultimately merged into a comprehensive and detailed note.\n\n"
63
- "**Primary Objective**: Generate a **comprehensive and detailed** note segment in Markdown. Do not omit details or simplify concepts excessively. Your goal is to capture the full context of the lecture segment.\n\n"
64
- "**Return Format**: Only return the **note** in Markdown format.\n\n"
 
65
  "**Key Instructions**:\n\n"
66
- "1. **Capture Emphasized Points**: Pay close attention to the subtitles. Identify and highlight key points that the speaker seems to emphasize, such as repeated phrases, direct statements of importance (e.g., 'the key is...', 'remember that...'), and core definitions.\n\n"
67
- "2. **Integrate Visuals (Formulas & Tables)**: You MUST analyze the accompanying images. If an image contains crucial information like **formulas, equations, tables, code snippets, or important diagrams**, you must accurately transcribe it into the Markdown note to support the text. Follow these formats:\n"
68
  " - For **formulas and equations**, use LaTeX notation (e.g., enclose with `$` or `$$`).\n"
69
  " - For **tables**, recreate them using Markdown table syntax.\n"
70
- " - For **code**, use Markdown code blocks with appropriate language identifiers.\n\n"
71
- "3. **Structure and Format**: Organize the notes logically. Use headings, subheadings, lists, and bold text to create a clear, readable, and well-structured document.\n\n"
72
- "4. **Language**: The notes'language should align with the subtitles!!!.\n\n"
73
- "5. **Image Mapping**: Stop referencing the images and try to use formulas, tables, code snippets, or important diagrams to describe the images.\n\n"
74
- "---BEGIN LECTURE MATERIALS---\n"
75
- f"**Subtitles (placeholders inserted)**:\n{placeholder_subs}"
76
- "---END LECTURE MATERIALS---\n"
77
  )
78
 
79
- parts: List[Dict[str, Any]] = [
80
- {"type": "text", "text": prompt_text}
81
- ]
82
- for idx,visual_chunk in enumerate(chunk.visual_chunks, start=1): # Limit to 6 images
83
- parts.append({
84
- "type": "image_url",
85
- "image_url": {
86
- "url": cls._img_to_data_uri(visual_chunk.image_path),
87
- "detail": f"IMG{idx}",
88
- },
89
- })
90
 
91
  return [{"role": "user", "content": parts}]
92
 
@@ -96,5 +104,5 @@ class Processor: # noqa: D101
96
  # collect text for this chunk
97
  messages = cls._build_messages(chunk)
98
  note = cls._call_llm(messages)
99
- images=[visual_chunk.image_path for visual_chunk in chunk.visual_chunks]
100
  return NoteChunk(note=note, images=images)
 
23
 
24
  @staticmethod
25
  def _img_to_data_uri(img_path: Path) -> str:
26
+ """Convert image to data URI; return empty string if path invalid."""
27
+ if not img_path or not Path(img_path).is_file():
28
+ return ""
29
  img_path = Path(img_path)
30
  mime, _ = mimetypes.guess_type(img_path)
31
+ try:
32
+ b64 = base64.b64encode(img_path.read_bytes()).decode()
33
+ except Exception as exc: # noqa: BLE001
34
+ logger.warning("[Processor] failed reading image %s: %s", img_path, exc)
35
+ return ""
36
  return f"data:{mime};base64,{b64}"
37
 
38
  @staticmethod
 
66
 
67
  # Prompt with explicit mapping guidance
68
  prompt_text = (
69
+ "Let's think step by step.\n\n"
70
+ "**Role**: You are an expert academic assistant processing one segment of a longer lecture. Your output for this segment will be merged with others later by a senior editor.\n\n"
71
+ "**Primary Objective**: Act as a **high-fidelity recorder of information**. Your goal is to generate a comprehensive and detailed note segment in Markdown, capturing the full context of this specific segment. **Do NOT add broad introductions or conclusions** that might be redundant in the final merged document.\n\n"
72
+ "**Return Format**: Only return the **note segment** in Markdown format, focusing exclusively on the materials provided.\n\n"
73
  "**Key Instructions**:\n\n"
74
+ "1. **Identify the Segment's Core Purpose**: Analyze the materials to determine the main purpose of this specific segment (e.g., 'Is it defining a term?', 'Is it presenting a problem?', 'Is it proposing a solution?'). Structure your note around this local purpose.\n\n"
75
+ "2. **Transcribe Visuals with Precision**: You MUST analyze the accompanying images. If an image contains crucial information like **formulas, equations, tables, code snippets, or algorithms**, you must accurately transcribe it into the Markdown note to support the text. Follow these formats:\n"
76
  " - For **formulas and equations**, use LaTeX notation (e.g., enclose with `$` or `$$`).\n"
77
  " - For **tables**, recreate them using Markdown table syntax.\n"
78
+ " - For **algorithms or code**, use Markdown code blocks with appropriate language identifiers (e.g., pseudocode, python).\n\n"
79
+ "3. **Capture Emphasized Verbal Points**: From the subtitles, identify and highlight key points, definitions, and direct statements of importance (e.g., 'the key is...', 'the problem is...').\n\n"
80
+ "4. **Do Not Reference Images Directly**: Integrate the visual information seamlessly. Instead of writing 'As shown in IMG1', transcribe the formula from IMG1 directly into the text where it is discussed.\n\n"
81
+ "5. **Language**: The notes' language should align with the subtitles.\n\n"
82
+ "---BEGIN LECTURE SEGMENT---\n"
83
+ f"**Subtitles (with image placeholders)**:\n{placeholder_subs}\n"
84
+ "---END LECTURE SEGMENT---"
85
  )
86
 
87
+ parts: List[Dict[str, Any]] = [{"type": "text", "text": prompt_text}]
88
+ for idx, visual_chunk in enumerate(chunk.visual_chunks, start=1):
89
+ data_uri = cls._img_to_data_uri(visual_chunk.image_path)
90
+ if not data_uri:
91
+ continue # skip invalid image
92
+ parts.append(
93
+ {
94
+ "type": "image_url",
95
+ "image_url": {"url": data_uri, "detail": f"IMG{idx}"},
96
+ }
97
+ )
98
 
99
  return [{"role": "user", "content": parts}]
100
 
 
104
  # collect text for this chunk
105
  messages = cls._build_messages(chunk)
106
  note = cls._call_llm(messages)
107
+ images=[vc.image_path for vc in chunk.visual_chunks if vc.image_path and Path(vc.image_path).is_file()]
108
  return NoteChunk(note=note, images=images)
lec2note/synthesis/__pycache__/assembler.cpython-310.pyc CHANGED
Binary files a/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc and b/lec2note/synthesis/__pycache__/assembler.cpython-310.pyc differ
 
lec2note/synthesis/assembler.py CHANGED
@@ -45,18 +45,20 @@ class Assembler: # noqa: D101
45
  {
46
  "role": "user",
47
  "content": (
48
- "You are an expert academic editor and content synthesizer. Your task is to transform a collection of fragmented lecture notes into a single, coherent, detailed and logically structured study note.\n\n"
49
- "**Context:** These notes were generated by summarizing different segments of a single video lecture. As a result, they are chronologically ordered but probably contain significant overlap and redundancy.\n\n"
50
- "**Primary Goal:** Create a comprehensive and well-organized final document from the provided segements.\n\n"
51
- "**Key Instructions:**\n"
52
- "1. **De-duplicate and Consolidate:** Identify all repetitive definitions and explanations. Merge them into a single, comprehensive section for each core concept. \n"
53
- "2. **Synthesize and Enhance:** Where different fragments explain the same concept with slightly different examples or details (e.g., one note uses a 'cheetah' example, another uses a 'robot'), synthesize these details to create a richer, more complete explanation under a single heading.\n"
54
- "3. **Polish and Format:** Ensure the final text is grammatically correct, flows naturally, and uses consistent, clean Markdown formatting (e.g., for tables, code blocks, and mathematical notation).\n\n"
55
- "4. **Language:** The notes' language should align with the subtitles!!!.\n\n"
56
- "**Constraint:** Ensure all unique concepts and key details from the original notes are preserved in the final document. The goal is to lose redundancy, not information.\n\n"
 
 
57
  "Here are the fragmented notes to process:\n\n"
58
  f"{raw_md}"
59
- ),
60
  }
61
  ],
62
  )
 
45
  {
46
  "role": "user",
47
  "content": (
48
+ "Let's think step by step.\n\n"
49
+ "**Role**: You are an **expert educator and storyteller**, tasked with synthesizing fragmented notes into a master study guide.\n\n"
50
+ "**Context**: These notes were generated from sequential segments of a single video lecture. They are detailed but contain significant repetition and lack a coherent, overarching narrative.\n\n"
51
+ "**Primary Goal**: Create a definitive, standalone educational document that not only summarizes the content but also explains the underlying **narrative and logic** of the lecture. The final note should be good enough for someone to learn the topic from scratch.\n\n"
52
+ "**Key Instructions**:\n\n"
53
+ "1. **Establish a Logical Narrative (Explain the 'Why')**: This is your most important task. Do NOT simply list topics. Find the story. Ask yourself: 'What problem is being introduced first? What solution is proposed next? How does each concept build upon the previous one?' Structure the final note to reflect this **problem-solution arc** or logical progression. For example, when introducing 'Actor-Critic', explicitly state that it is a solution to the 'high variance' problem in earlier methods.\n\n"
54
+ "2. **De-duplicate and Consolidate**: Identify all repetitive definitions (e.g., 'State vs. Observation' is likely defined multiple times). Merge them into a single, comprehensive section for each core concept. The first mention is usually the most important.\n\n"
55
+ "3. **Highlight Key Trade-offs**: Based on the content, identify and explicitly state any important trade-offs discussed (e.g., 'Bias vs. Variance' in estimation methods, 'On-Policy vs. Off-Policy' sample efficiency). Use a table or bullet points to make these comparisons clear and easy to understand.\n\n"
56
+ "4. **Synthesize and Enhance**: Where different fragments explain the same concept with slightly different examples (e.g., one note uses a 'cheetah' example, another uses a 'robot'), synthesize these details to create a richer, more complete explanation under a single heading.\n\n"
57
+ "5. **Structure and Polish**: Create a clean, hierarchical structure with a main title, sections (e.g., `## 1. The Problem of X`), and subsections. Ensure the final text is grammatically correct and flows naturally.\n\n"
58
+ "**Constraint**: The goal is to lose redundancy, not information. Ensure all unique concepts, formulas, and key details from the original fragments are preserved in the final, structured document.\n\n"
59
  "Here are the fragmented notes to process:\n\n"
60
  f"{raw_md}"
61
+ )
62
  }
63
  ],
64
  )