From 60f05e04cf84f808847be2f11f06be001200a26a Mon Sep 17 00:00:00 2001
From: MinalMahalaShorthillsAI <minal@shorthills.ai>
Date: Mon, 28 Jul 2025 10:08:54 +0530
Subject: [PATCH] improvised version

---
 FINAL_TEST_SUMMARY.md                         |  228 ----
 TESTING_GUIDE.md                              |  760 ------------
 docs/batch_and_enhanced_markdown.md           |  299 -----
 docs/batch_processing.md                      |  341 ++++++
 docs/enhanced_markdown.md                     |  552 +++++++++
 .../batch_and_enhanced_markdown_example.py    |  334 ------
 examples/batch_processing_example.py          |  550 +++++++++
 examples/enhanced_markdown_example.py         | 1031 +++++++++++++++++
 raganything/enhanced_markdown.py              |   13 +-
 requirements.txt                              |   12 -
 setup.py                                      |    5 +
 11 files changed, 2489 insertions(+), 1636 deletions(-)
 delete mode 100644 FINAL_TEST_SUMMARY.md
 delete mode 100644 TESTING_GUIDE.md
 delete mode 100644 docs/batch_and_enhanced_markdown.md
 create mode 100644 docs/batch_processing.md
 create mode 100644 docs/enhanced_markdown.md
 delete mode 100644 examples/batch_and_enhanced_markdown_example.py
 create mode 100644 examples/batch_processing_example.py
 create mode 100644 examples/enhanced_markdown_example.py

diff --git a/FINAL_TEST_SUMMARY.md b/FINAL_TEST_SUMMARY.md
deleted file mode 100644
index dcd6e96..0000000
--- a/FINAL_TEST_SUMMARY.md
+++ /dev/null
@@ -1,228 +0,0 @@
-# Final Test Summary: Batch Processing and Enhanced Markdown Features
-
-## **Implementation Status: COMPLETE**
-
-All requested features have been successfully implemented, tested, and are production-ready.
-
----
-
-## **Feature 1: Batch/Parallel Processing**
-
-### **Implementation Details**
-- **File**: `raganything/batch_parser.py`
-- **Class**: `BatchParser`
-- **Key Features**:
-  - Parallel document processing with configurable workers
-  - Progress tracking with `tqdm`
-  - Comprehensive error handling and reporting
-  - File filtering based on supported extensions
-  - Integration with existing MinerU and Docling parsers
-
-### **Test Results**
-- **Core Logic**: Working perfectly
-- **File Filtering**: Successfully filters supported file types
-- **Progress Tracking**: Functional with visual progress bars
-- **Error Handling**: Robust error capture and reporting
-- **Command Line Interface**: Available and functional
-- **MinerU Integration**: Requires `skip_installation_check=True` due to package conflicts
-
-### **Usage Example**
-```python
-from raganything.batch_parser import BatchParser
-
-# Create batch parser with installation check bypass
-batch_parser = BatchParser(
-    parser_type="mineru",
-    max_workers=4,
-    show_progress=True,
-    skip_installation_check=True  # Fixes MinerU package conflicts
-)
-
-# Process multiple files
-result = batch_parser.process_batch(
-    file_paths=["doc1.pdf", "doc2.docx", "doc3.txt"],
-    output_dir="./output",
-    parse_method="auto"
-)
-
-print(f"Success rate: {result.success_rate:.1f}%")
-```
-
----
-
-## **Feature 2: Enhanced Markdown/PDF Conversion**
-
-### **Implementation Details**
-- **File**: `raganything/enhanced_markdown.py`
-- **Class**: `EnhancedMarkdownConverter`
-- **Key Features**:
-  - Multiple conversion backends (WeasyPrint, Pandoc, Markdown)
-  - Professional CSS styling with syntax highlighting
-  - Table of contents generation
-  - Image and table support
-  - Custom configuration options
-
-### **Test Results**
-- **WeasyPrint Backend**: Working perfectly (18.8 KB PDF generated)
-- **Pandoc Backend**: Working with wkhtmltopdf engine (28.5 KB PDF generated)
-- **Markdown Backend**: Available for HTML conversion
-- **Command Line Interface**: Fully functional with all backends
-- **Professional Styling**: Beautiful PDF output with proper formatting
-
-### **Backend Status**
-```bash
-Backend Information:
-  ✅ weasyprint    # Working perfectly
-  ❌ pandoc        # Python library (not needed)
-  ✅ markdown      # Working for HTML conversion
-  ✅ pandoc_system # Working with wkhtmltopdf engine
-Recommended backend: pandoc
-```
-
-### **Usage Example**
-```python
-from raganything.enhanced_markdown import EnhancedMarkdownConverter
-
-converter = EnhancedMarkdownConverter()
-
-# WeasyPrint (best for styling)
-converter.convert_file_to_pdf("input.md", "output.pdf", method="weasyprint")
-
-# Pandoc (best for complex documents)
-converter.convert_file_to_pdf("input.md", "output.pdf", method="pandoc_system")
-
-# Auto (uses best available backend)
-converter.convert_file_to_pdf("input.md", "output.pdf", method="auto")
-```
-
----
-
-## **Feature 3: Integration with RAG-Anything**
-
-### **Implementation Details**
-- **File**: `raganything/batch.py`
-- **Class**: `BatchMixin`
-- **Key Features**:
-  - Seamless integration with existing `RAGAnything` class
-  - Batch processing with RAG pipeline
-  - Async support for batch operations
-  - Comprehensive error handling
-
-### **Test Results**
-- **Integration**: Successfully integrated with main RAG-Anything class
-- **Batch RAG Processing**: Interface available and functional
-- **Async Support**: Available for non-blocking operations
-- **Error Handling**: Robust error management
-
-### **Usage Example**
-```python
-from raganything import RAGAnything
-
-rag = RAGAnything()
-
-# Process documents in batch with RAG
-result = await rag.process_documents_with_rag_batch(
-    file_paths=["doc1.pdf", "doc2.docx"],
-    output_dir="./output",
-    max_workers=2,
-    show_progress=True
-)
-```
-
----
-
-## **Dependencies Installed**
-
-### **Core Dependencies**
-- `tqdm` - Progress bars for batch processing
-- `markdown` - Markdown to HTML conversion
-- `weasyprint` - HTML to PDF conversion
-- `pygments` - Syntax highlighting
-
-### **System Dependencies**
-- `pandoc` - Advanced document conversion (via conda)
-- `wkhtmltopdf` - PDF engine for Pandoc (via conda)
-
----
-
-## **Comprehensive Test Results**
-
-### **Test 1: Batch Processing Core**
-```bash
-Batch parser created successfully with skip_installation_check=True
-Supported extensions: ['.jpg', '.pptx', '.doc', '.tif', '.ppt', '.tiff', '.xls', '.bmp', '.txt', '.jpeg', '.pdf', '.docx', '.png', '.webp', '.gif', '.md', '.xlsx']
-File filtering test passed
-   Input files: 4
-   Supported files: 3
-```
-
-### **Test 2: Enhanced Markdown Backends**
-```bash
-Enhanced markdown converter working
-Available backends: ['weasyprint', 'pandoc', 'markdown', 'pandoc_system']
-Recommended backend: pandoc
-WeasyPrint backend available
-Pandoc system backend available
-```
-
-### **Test 3: Command Line Interfaces**
-```bash
-Batch parser CLI available
-Enhanced markdown CLI available
-```
-
-### **Test 4: PDF Generation**
-```bash
-WeasyPrint: Successfully converted test_document.md to PDF (18.8 KB)
-Pandoc: Successfully converted test_document.md to PDF (28.5 KB)
-```
-
----
-
-## **Production Readiness**
-
-### **Ready for Production**
-- **Enhanced Markdown Conversion**: 100% functional with multiple backends
-- **Batch Processing Core**: 100% functional with robust error handling
-- **Integration**: Seamlessly integrated with RAG-Anything
-- **Documentation**: Comprehensive examples and documentation
-- **Command Line Tools**: Available for both features
-
-### **Known Limitations**
-- **MinerU Package Conflicts**: Requires `skip_installation_check=True` in environments with package conflicts
-- **System Dependencies**: Pandoc and wkhtmltopdf need to be installed (done via conda)
-
----
-
-## **Files Created/Modified**
-
-### **New Files**
-- `raganything/batch_parser.py` - Core batch processing logic
-- `raganything/enhanced_markdown.py` - Enhanced markdown conversion
-- `examples/batch_and_enhanced_markdown_example.py` - Comprehensive example
-- `docs/batch_and_enhanced_markdown.md` - Detailed documentation
-- `FINAL_TEST_SUMMARY.md` - This test summary
-
-### **Modified Files**
-- `raganything/batch.py` - Updated with new batch processing integration
-- `requirements.txt` - Added new dependencies
-- `TESTING_GUIDE.md` - Updated testing guide
-
----
-
-## **Final Recommendation**
-
-**All requested features have been successfully implemented and tested!**
-
-### **For Immediate Use**
-1. **Enhanced Markdown Conversion**: Ready for production use
-2. **Batch Processing**: Ready for production use (with `skip_installation_check=True`)
-3. **Integration**: Seamlessly integrated with existing RAG-Anything system
-
-### **For Contributors**
-- All code is well-documented with comprehensive examples
-- Command-line interfaces are available for testing
-- Error handling is robust and informative
-- Type hints are included for better code maintainability
-
-**The implementation is production-ready and exceeds the original requirements!**
diff --git a/TESTING_GUIDE.md b/TESTING_GUIDE.md
deleted file mode 100644
index 2378ae5..0000000
--- a/TESTING_GUIDE.md
+++ /dev/null
@@ -1,760 +0,0 @@
-# 🧪 Comprehensive Testing Guide: Batch Processing & Enhanced Markdown
-
-This guide provides step-by-step testing instructions for the new batch processing and enhanced markdown conversion features in RAG-Anything.
-
-## 📋 **Quick Start (5 minutes)**
-
-### **1. Environment Setup**
-```bash
-# Install dependencies
-pip install tqdm markdown weasyprint pygments
-
-# Install optional system dependencies
-conda install -c conda-forge pandoc wkhtmltopdf -y
-
-# Verify installation
-python -c "import tqdm, markdown, weasyprint, pygments; print('✅ All dependencies installed')"
-```
-
-### **2. Basic Import Test**
-```bash
-# Test all core modules
-python -c "
-from raganything.batch_parser import BatchParser
-from raganything.enhanced_markdown import EnhancedMarkdownConverter
-from raganything.batch import BatchMixin
-print('✅ All core modules imported successfully')
-"
-```
-
-### **3. Command-Line Interface Test**
-```bash
-# Test enhanced markdown CLI
-python -m raganything.enhanced_markdown --info
-
-# Test batch parser CLI
-python -m raganything.batch_parser --help
-```
-
-### **4. Basic Functionality Test**
-```bash
-# Create test markdown file
-echo "# Test Document\n\nThis is a test." > test.md
-
-# Test conversion
-python -m raganything.enhanced_markdown test.md --output test.pdf --method weasyprint
-
-# Verify PDF was created
-ls -la test.pdf
-
-# Clean up
-rm test.md test.pdf
-```
-
----
-
-## 🎯 **Detailed Feature Testing**
-
-### **Test 1: Enhanced Markdown Conversion**
-
-#### **1.1 Backend Detection**
-```bash
-python -m raganything.enhanced_markdown --info
-```
-
-**Expected Output:**
-```
-Backend Information:
-  ✅ weasyprint
-  ❌ pandoc
-  ✅ markdown
-  ✅ pandoc_system
-Recommended backend: pandoc
-```
-
-#### **1.2 Basic Conversion Test**
-```bash
-# Create comprehensive test file
-cat > test_document.md << 'EOF'
-# Test Document
-
-## Overview
-This is a test document for enhanced markdown conversion.
-
-### Code Example
-```python
-def hello_world():
-    print("Hello, World!")
-    return "Success"
-```
-
-### Table Example
-| Feature | Status | Notes |
-|---------|--------|-------|
-| Code Highlighting | ✅ | Working |
-| Tables | ✅ | Working |
-| Lists | ✅ | Working |
-
-### Lists
-- Item 1
-- Item 2
-- Item 3
-
-### Blockquotes
-> This is a blockquote with important information.
-
-### Links
-Visit [GitHub](https://github.com) for more information.
-EOF
-
-# Test different conversion methods
-python -m raganything.enhanced_markdown test_document.md --output test_weasyprint.pdf --method weasyprint
-python -m raganything.enhanced_markdown test_document.md --output test_pandoc.pdf --method pandoc_system
-
-# Verify PDFs were created
-ls -la test_*.pdf
-```
-
-#### **1.3 Advanced Conversion Test**
-```python
-# Create test script: test_advanced_markdown.py
-from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownConfig
-import tempfile
-from pathlib import Path
-
-def test_advanced_markdown():
-    """Test advanced markdown conversion features"""
-
-    # Create custom configuration
-    config = MarkdownConfig(
-        page_size="A4",
-        margin="1in",
-        font_size="12pt",
-        include_toc=True,
-        syntax_highlighting=True,
-        custom_css="""
-        body { font-family: 'Arial', sans-serif; }
-        h1 { color: #2c3e50; border-bottom: 2px solid #3498db; }
-        code { background-color: #f8f9fa; padding: 2px 4px; }
-        """
-    )
-
-    # Create converter
-    converter = EnhancedMarkdownConverter(config)
-
-    # Test backend information
-    info = converter.get_backend_info()
-    print("Backend Information:")
-    for backend, available in info["available_backends"].items():
-        status = "✅" if available else "❌"
-        print(f"  {status} {backend}")
-
-    # Create test content
-    test_content = """# Advanced Test Document
-
-## Features Tested
-
-### 1. Code Highlighting
-```python
-def process_document(file_path: str) -> str:
-    with open(file_path, 'r') as f:
-        content = f.read()
-    return f"Processed: {content}"
-```
-
-### 2. Tables
-| Component | Status | Performance |
-|-----------|--------|-------------|
-| Parser | ✅ | 100 docs/hour |
-| Converter | ✅ | 50 docs/hour |
-| Storage | ✅ | 1TB capacity |
-
-### 3. Lists and Links
-- [Feature 1](https://example.com)
-- [Feature 2](https://example.com)
-- [Feature 3](https://example.com)
-
-### 4. Blockquotes
-> This is an important note about the system.
-
-## Conclusion
-The enhanced markdown conversion provides excellent formatting.
-"""
-
-    # Test conversion
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as temp_file:
-        temp_file.write(test_content)
-        temp_md_path = temp_file.name
-
-    try:
-        # Test different methods
-        for method in ["auto", "weasyprint", "pandoc_system"]:
-            try:
-                output_path = f"test_advanced_{method}.pdf"
-                success = converter.convert_file_to_pdf(
-                    input_path=temp_md_path,
-                    output_path=output_path,
-                    method=method
-                )
-                if success:
-                    print(f"✅ {method}: {output_path}")
-                else:
-                    print(f"❌ {method}: Failed")
-            except Exception as e:
-                print(f"❌ {method}: {str(e)}")
-
-    finally:
-        # Clean up
-        Path(temp_md_path).unlink()
-
-if __name__ == "__main__":
-    test_advanced_markdown()
-```
-
-### **Test 2: Batch Processing**
-
-#### **2.1 Basic Batch Parser Test**
-```python
-# Create test script: test_batch_parser.py
-from raganything.batch_parser import BatchParser, BatchProcessingResult
-import tempfile
-from pathlib import Path
-
-def test_batch_parser():
-    """Test basic batch parser functionality"""
-
-    # Create batch parser
-    batch_parser = BatchParser(
-        parser_type="mineru",
-        max_workers=2,
-        show_progress=True,
-        timeout_per_file=60,
-        skip_installation_check=True  # Bypass installation check for testing
-    )
-
-    # Test supported extensions
-    extensions = batch_parser.get_supported_extensions()
-    print(f"✅ Supported extensions: {extensions}")
-
-    # Test file filtering
-    test_files = [
-        "document.pdf",
-        "report.docx",
-        "data.xlsx",
-        "unsupported.xyz"
-    ]
-
-    supported_files = batch_parser.filter_supported_files(test_files)
-    print(f"✅ File filtering: {len(supported_files)}/{len(test_files)} files supported")
-
-    # Create test files
-    with tempfile.TemporaryDirectory() as temp_dir:
-        temp_path = Path(temp_dir)
-
-        # Create test markdown files
-        for i in range(3):
-            test_file = temp_path / f"test_{i}.md"
-            test_file.write_text(f"# Test Document {i}\n\nContent for test {i}.")
-
-        # Test batch processing (will fail without MinerU, but tests setup)
-        try:
-            result = batch_parser.process_batch(
-                file_paths=[str(temp_path)],
-                output_dir=str(temp_path / "output"),
-                parse_method="auto",
-                recursive=False
-            )
-            print(f"✅ Batch processing completed: {result.summary()}")
-        except Exception as e:
-            print(f"⚠️ Batch processing failed (expected without MinerU): {str(e)}")
-
-if __name__ == "__main__":
-    test_batch_parser()
-```
-
-#### **2.2 Batch Processing with Mock Files**
-```python
-# Create test script: test_batch_mock.py
-import tempfile
-from pathlib import Path
-from raganything.batch_parser import BatchParser
-
-def create_mock_files():
-    """Create mock files for testing"""
-    with tempfile.TemporaryDirectory() as temp_dir:
-        temp_path = Path(temp_dir)
-
-        # Create various file types
-        files = {
-            "document.md": "# Test Document\n\nThis is a test.",
-            "report.txt": "This is a text report.",
-            "data.csv": "name,value\nA,1\nB,2\nC,3",
-            "config.json": '{"setting": "value"}'
-        }
-
-        for filename, content in files.items():
-            file_path = temp_path / filename
-            file_path.write_text(content)
-
-        return temp_path, list(files.keys())
-
-def test_batch_with_mock_files():
-    """Test batch processing with mock files"""
-
-    temp_path, file_list = create_mock_files()
-
-    # Create batch parser
-    batch_parser = BatchParser(
-        parser_type="mineru",
-        max_workers=2,
-        show_progress=True,
-        skip_installation_check=True
-    )
-
-    # Test file filtering
-    all_files = [str(temp_path / f) for f in file_list]
-    supported_files = batch_parser.filter_supported_files(all_files)
-
-    print(f"✅ Total files: {len(all_files)}")
-    print(f"✅ Supported files: {len(supported_files)}")
-    print(f"✅ Success rate: {len(supported_files)/len(all_files)*100:.1f}%")
-
-    # Test batch processing setup (without actual parsing)
-    try:
-        result = batch_parser.process_batch(
-            file_paths=supported_files,
-            output_dir=str(temp_path / "output"),
-            parse_method="auto"
-        )
-        print(f"✅ Batch processing: {result.summary()}")
-    except Exception as e:
-        print(f"⚠️ Batch processing setup test completed (parsing failed as expected)")
-
-if __name__ == "__main__":
-    test_batch_with_mock_files()
-```
-
----
-
-## 🔗 **Integration Testing**
-
-### **Test 3: RAG-Anything Integration**
-
-#### **3.1 Basic Integration Test**
-```python
-# Create test script: test_integration.py
-from raganything import RAGAnything, RAGAnythingConfig
-from raganything.batch_parser import BatchParser
-from raganything.enhanced_markdown import EnhancedMarkdownConverter
-import tempfile
-from pathlib import Path
-
-def test_rag_integration():
-    """Test integration with RAG-Anything"""
-
-    # Create temporary working directory
-    with tempfile.TemporaryDirectory() as temp_dir:
-        temp_path = Path(temp_dir)
-
-        # Create test configuration
-        config = RAGAnythingConfig(
-            working_dir=str(temp_path / "rag_storage"),
-            enable_image_processing=True,
-            enable_table_processing=True,
-            enable_equation_processing=True,
-            parser="mineru",
-            max_concurrent_files=2,
-            recursive_folder_processing=True
-        )
-
-        # Test RAG-Anything initialization
-        try:
-            rag = RAGAnything(config=config)
-            print("✅ RAG-Anything initialized successfully")
-        except Exception as e:
-            print(f"⚠️ RAG-Anything initialization: {str(e)}")
-
-        # Test batch processing methods exist
-        batch_methods = [
-            'process_documents_batch',
-            'process_documents_batch_async',
-            'get_supported_file_extensions',
-            'filter_supported_files',
-            'process_documents_with_rag_batch'
-        ]
-
-        print("\nBatch Processing Methods:")
-        for method in batch_methods:
-            available = hasattr(rag, method)
-            status = "✅" if available else "❌"
-            print(f"  {status} {method}")
-
-        # Test enhanced markdown integration
-        print("\nEnhanced Markdown Integration:")
-        try:
-            converter = EnhancedMarkdownConverter()
-            info = converter.get_backend_info()
-            print(f"  ✅ Available backends: {list(info['available_backends'].keys())}")
-            print(f"  ✅ Recommended backend: {info['recommended_backend']}")
-        except Exception as e:
-            print(f"  ❌ Enhanced markdown: {str(e)}")
-
-if __name__ == "__main__":
-    test_rag_integration()
-```
-
----
-
-## ⚡ **Performance Testing**
-
-### **Test 4: Performance Benchmarks**
-
-#### **4.1 Enhanced Markdown Performance Test**
-```python
-# Create test script: test_performance.py
-import time
-import tempfile
-from pathlib import Path
-from raganything.enhanced_markdown import EnhancedMarkdownConverter
-
-def create_large_markdown(size_kb=100):
-    """Create a large markdown file for performance testing"""
-    content = "# Large Test Document\n\n"
-
-    # Add sections to reach target size
-    sections = size_kb // 2  # Rough estimate
-    for i in range(sections):
-        content += f"""
-## Section {i}
-
-This is section {i} of the large test document.
-
-### Subsection {i}.1
-Content for subsection {i}.1.
-
-### Subsection {i}.2
-Content for subsection {i}.2.
-
-### Code Example {i}
-```python
-def function_{i}():
-    return f"Result {i}"
-```
-
-### Table {i}
-| Column A | Column B | Column C |
-|----------|----------|----------|
-| Value A{i} | Value B{i} | Value C{i} |
-| Value D{i} | Value E{i} | Value F{i} |
-
-"""
-
-    return content
-
-def test_markdown_performance():
-    """Test enhanced markdown conversion performance"""
-
-    print("Enhanced Markdown Performance Test")
-    print("=" * 40)
-
-    # Test different file sizes
-    sizes = [10, 50, 100]  # KB
-
-    for size_kb in sizes:
-        print(f"\nTesting {size_kb}KB document:")
-
-        # Create test file
-        content = create_large_markdown(size_kb)
-
-        with tempfile.NamedTemporaryFile(mode='w', suffix='.md', delete=False) as temp_file:
-            temp_file.write(content)
-            temp_md_path = temp_file.name
-
-        try:
-            converter = EnhancedMarkdownConverter()
-
-            # Test different methods
-            for method in ["weasyprint", "pandoc_system"]:
-                try:
-                    output_path = f"perf_test_{size_kb}kb_{method}.pdf"
-
-                    start_time = time.time()
-                    success = converter.convert_file_to_pdf(
-                        input_path=temp_md_path,
-                        output_path=output_path,
-                        method=method
-                    )
-                    end_time = time.time()
-
-                    if success:
-                        duration = end_time - start_time
-                        print(f"  ✅ {method}: {duration:.2f}s")
-                    else:
-                        print(f"  ❌ {method}: Failed")
-
-                except Exception as e:
-                    print(f"  ❌ {method}: {str(e)}")
-
-        finally:
-            # Clean up
-            Path(temp_md_path).unlink()
-
-if __name__ == "__main__":
-    test_markdown_performance()
-```
-
----
-
-## 🔧 **Troubleshooting**
-
-### **Common Issues and Solutions**
-
-#### **Issue 1: Import Errors**
-```bash
-# Problem: ModuleNotFoundError for new dependencies
-# Solution: Install missing dependencies
-pip install tqdm markdown weasyprint pygments
-
-# Verify installation
-python -c "import tqdm, markdown, weasyprint, pygments; print('✅ All dependencies installed')"
-```
-
-#### **Issue 2: WeasyPrint Installation Problems**
-```bash
-# Problem: WeasyPrint fails to install or run
-# Solution: Install system dependencies (Ubuntu/Debian)
-sudo apt-get update
-sudo apt-get install -y \
-    build-essential \
-    python3-dev \
-    python3-pip \
-    python3-setuptools \
-    python3-wheel \
-    python3-cffi \
-    libcairo2 \
-    libpango-1.0-0 \
-    libpangocairo-1.0-0 \
-    libgdk-pixbuf2.0-0 \
-    libffi-dev \
-    shared-mime-info
-
-# Then reinstall WeasyPrint
-pip install --force-reinstall weasyprint
-```
-
-#### **Issue 3: Pandoc Not Found**
-```bash
-# Problem: Pandoc command not found
-# Solution: Install Pandoc
-conda install -c conda-forge pandoc wkhtmltopdf -y
-
-# Or install via package manager
-sudo apt-get install pandoc
-
-# Verify installation
-pandoc --version
-```
-
-#### **Issue 4: MinerU Package Conflicts**
-```bash
-# Problem: numpy/scikit-learn version conflicts
-# Solution: Use skip_installation_check parameter
-python -c "
-from raganything.batch_parser import BatchParser
-batch_parser = BatchParser(skip_installation_check=True)
-print('✅ Batch parser created with installation check bypassed')
-"
-```
-
-#### **Issue 5: Memory Errors**
-```bash
-# Problem: Out of memory during batch processing
-# Solution: Reduce max_workers
-python -c "
-from raganything.batch_parser import BatchParser
-batch_parser = BatchParser(max_workers=1)  # Use fewer workers
-print('✅ Batch parser created with reduced workers')
-"
-```
-
-### **Debug Mode**
-```python
-# Enable debug logging for detailed information
-import logging
-logging.basicConfig(
-    level=logging.DEBUG,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
-)
-
-# Test with debug logging
-from raganything.enhanced_markdown import EnhancedMarkdownConverter
-converter = EnhancedMarkdownConverter()
-converter.convert_file_to_pdf("test.md", "test.pdf")
-```
-
----
-
-## 📊 **Test Report Template**
-
-### **Automated Test Report**
-```python
-# Create test script: generate_test_report.py
-import sys
-from pathlib import Path
-from datetime import datetime
-
-def generate_test_report():
-    """Generate comprehensive test report"""
-
-    report = {
-        "timestamp": datetime.now().isoformat(),
-        "python_version": sys.version,
-        "tests": {}
-    }
-
-    # Test imports
-    try:
-        from raganything.batch_parser import BatchParser
-        from raganything.enhanced_markdown import EnhancedMarkdownConverter
-        from raganything.batch import BatchMixin
-        report["tests"]["imports"] = {"status": "✅", "message": "All modules imported successfully"}
-    except Exception as e:
-        report["tests"]["imports"] = {"status": "❌", "message": str(e)}
-
-    # Test enhanced markdown
-    try:
-        converter = EnhancedMarkdownConverter()
-        info = converter.get_backend_info()
-        report["tests"]["enhanced_markdown"] = {
-            "status": "✅",
-            "message": f"Available backends: {list(info['available_backends'].keys())}"
-        }
-    except Exception as e:
-        report["tests"]["enhanced_markdown"] = {"status": "❌", "message": str(e)}
-
-    # Test batch processing
-    try:
-        batch_parser = BatchParser(skip_installation_check=True)
-        extensions = batch_parser.get_supported_extensions()
-        report["tests"]["batch_processing"] = {
-            "status": "✅",
-            "message": f"Supported extensions: {len(extensions)} file types"
-        }
-    except Exception as e:
-        report["tests"]["batch_processing"] = {"status": "❌", "message": str(e)}
-
-    # Generate report
-    print("Test Report")
-    print("=" * 50)
-    print(f"Timestamp: {report['timestamp']}")
-    print(f"Python Version: {report['python_version']}")
-    print()
-
-    for test_name, result in report["tests"].items():
-        print(f"{result['status']} {test_name}: {result['message']}")
-
-    # Summary
-    passed = sum(1 for r in report["tests"].values() if r["status"] == "✅")
-    total = len(report["tests"])
-    print(f"\nSummary: {passed}/{total} tests passed")
-
-if __name__ == "__main__":
-    generate_test_report()
-```
-
-### **Manual Test Checklist**
-```markdown
-# Manual Test Checklist
-
-## Environment Setup
-- [ ] Python 3.8+ installed
-- [ ] Dependencies installed: tqdm, markdown, weasyprint, pygments
-- [ ] Optional dependencies: pandoc, wkhtmltopdf
-- [ ] RAG-Anything core modules accessible
-
-## Enhanced Markdown Testing
-- [ ] Backend detection works
-- [ ] WeasyPrint conversion successful
-- [ ] Pandoc conversion successful (if available)
-- [ ] Command-line interface functional
-- [ ] Error handling robust
-
-## Batch Processing Testing
-- [ ] Batch parser creation successful
-- [ ] File filtering works correctly
-- [ ] Progress tracking functional
-- [ ] Error handling comprehensive
-- [ ] Command-line interface available
-
-## Integration Testing
-- [ ] RAG-Anything integration works
-- [ ] Batch methods available in main class
-- [ ] Enhanced markdown integrates seamlessly
-- [ ] Error handling propagates correctly
-
-## Performance Testing
-- [ ] Markdown conversion < 10s for typical documents
-- [ ] Batch processing setup < 5s
-- [ ] Memory usage reasonable (< 500MB)
-- [ ] No memory leaks detected
-
-## Issues Found
-- [ ] None
-- [ ] List issues here
-
-## Recommendations
-- [ ] None
-- [ ] List recommendations here
-```
-
----
-
-## 🎯 **Success Criteria**
-
-A successful implementation should pass all tests:
-
-### **✅ Required Tests**
-- [ ] All imports work without errors
-- [ ] Enhanced markdown conversion produces valid PDFs
-- [ ] Batch processing handles file filtering correctly
-- [ ] Command-line interfaces are functional
-- [ ] Integration with RAG-Anything works
-- [ ] Error handling is robust
-- [ ] Performance is acceptable (< 10s for typical operations)
-
-### **✅ Optional Tests**
-- [ ] Pandoc backend available and working
-- [ ] Large document processing successful
-- [ ] Memory usage stays within limits
-- [ ] All command-line options work correctly
-
-### **📈 Performance Benchmarks**
-- **Enhanced Markdown**: 1-5 seconds for typical documents
-- **Batch Processing**: 2-4x speedup with parallel processing
-- **Memory Usage**: ~50-100MB per worker for batch processing
-- **Error Recovery**: Graceful handling of all common error scenarios
-
----
-
-## 🚀 **Quick Commands Reference**
-
-```bash
-# Run all tests
-python test_advanced_markdown.py
-python test_batch_parser.py
-python test_integration.py
-python test_performance.py
-python generate_test_report.py
-
-# Test specific features
-python -m raganything.enhanced_markdown --info
-python -m raganything.batch_parser --help
-python examples/batch_and_enhanced_markdown_example.py
-
-# Performance testing
-time python -m raganything.enhanced_markdown test.md --output test.pdf
-```
-
----
-
-**This comprehensive testing guide ensures thorough validation of all new features!** 🎉
diff --git a/docs/batch_and_enhanced_markdown.md b/docs/batch_and_enhanced_markdown.md
deleted file mode 100644
index 7125feb..0000000
--- a/docs/batch_and_enhanced_markdown.md
+++ /dev/null
@@ -1,299 +0,0 @@
-# Batch Processing and Enhanced Markdown Conversion
-
-This document describes the new batch processing and enhanced markdown conversion features added to RAG-Anything.
-
-## Batch Processing
-
-### Overview
-
-The batch processing feature allows you to process multiple documents in parallel, significantly improving throughput for large document collections.
-
-### Key Features
-
-- **Parallel Processing**: Process multiple files concurrently using thread pools
-- **Progress Tracking**: Real-time progress bars with `tqdm`
-- **Error Handling**: Comprehensive error reporting and recovery
-- **Flexible Input**: Support for files, directories, and recursive search
-- **Configurable Workers**: Adjustable number of parallel workers
-
-### Usage
-
-#### Basic Batch Processing
-
-```python
-from raganything.batch_parser import BatchParser
-
-# Create batch parser
-batch_parser = BatchParser(
-    parser_type="mineru",  # or "docling"
-    max_workers=4,
-    show_progress=True,
-    timeout_per_file=300
-)
-
-# Process multiple files
-result = batch_parser.process_batch(
-    file_paths=["doc1.pdf", "doc2.docx", "folder/"],
-    output_dir="./batch_output",
-    parse_method="auto",
-    recursive=True
-)
-
-# Check results
-print(result.summary())
-print(f"Success rate: {result.success_rate:.1f}%")
-```
-
-#### Integration with RAG-Anything
-
-```python
-from raganything import RAGAnything
-
-rag = RAGAnything()
-
-# Process documents with RAG integration
-result = await rag.process_documents_with_rag_batch(
-    file_paths=["doc1.pdf", "doc2.docx"],
-    output_dir="./output",
-    max_workers=4,
-    show_progress=True
-)
-
-print(f"Processed {result['successful_rag_files']} files with RAG")
-```
-
-#### Command Line Interface
-
-```bash
-# Basic batch processing
-python -m raganything.batch_parser path/to/docs/ --output ./output --workers 4
-
-# With specific parser
-python -m raganything.batch_parser path/to/docs/ --parser mineru --method auto
-
-# Show progress
-python -m raganything.batch_parser path/to/docs/ --output ./output --no-progress
-```
-
-### Configuration
-
-The batch processing can be configured through environment variables:
-
-```env
-# Batch processing configuration
-MAX_CONCURRENT_FILES=4
-SUPPORTED_FILE_EXTENSIONS=.pdf,.docx,.doc,.pptx,.ppt,.xlsx,.xls,.txt,.md
-RECURSIVE_FOLDER_PROCESSING=true
-```
-
-### Supported File Types
-
-- **PDF files**: `.pdf`
-- **Office documents**: `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`
-- **Images**: `.png`, `.jpg`, `.jpeg`, `.bmp`, `.tiff`, `.tif`, `.gif`, `.webp`
-- **Text files**: `.txt`, `.md`
-
-## Enhanced Markdown Conversion
-
-### Overview
-
-The enhanced markdown conversion feature provides high-quality PDF generation from markdown files with multiple backend options and advanced styling.
-
-### Key Features
-
-- **Multiple Backends**: WeasyPrint, Pandoc, and ReportLab support
-- **Advanced Styling**: Custom CSS, syntax highlighting, and professional layouts
-- **Image Support**: Embedded images with proper scaling
-- **Table Support**: Formatted tables with borders and styling
-- **Code Highlighting**: Syntax highlighting for code blocks
-- **Custom Templates**: Support for custom CSS and templates
-
-### Usage
-
-#### Basic Conversion
-
-```python
-from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownConfig
-
-# Create converter with custom configuration
-config = MarkdownConfig(
-    page_size="A4",
-    margin="1in",
-    font_size="12pt",
-    include_toc=True,
-    syntax_highlighting=True
-)
-
-converter = EnhancedMarkdownConverter(config)
-
-# Convert markdown to PDF
-success = converter.convert_file_to_pdf(
-    input_path="document.md",
-    output_path="document.pdf",
-    method="auto"  # or "weasyprint", "pandoc"
-)
-```
-
-#### Advanced Configuration
-
-```python
-# Custom CSS styling
-config = MarkdownConfig(
-    custom_css="""
-    body { font-family: 'Arial', sans-serif; }
-    h1 { color: #2c3e50; border-bottom: 2px solid #3498db; }
-    code { background-color: #f8f9fa; padding: 2px 4px; }
-    """,
-    include_toc=True,
-    syntax_highlighting=True
-)
-
-converter = EnhancedMarkdownConverter(config)
-```
-
-#### Command Line Interface
-
-```bash
-# Basic conversion
-python -m raganything.enhanced_markdown document.md --output document.pdf
-
-# With specific method
-python -m raganything.enhanced_markdown document.md --method weasyprint
-
-# With custom CSS
-python -m raganything.enhanced_markdown document.md --css style.css
-
-# Show backend information
-python -m raganything.enhanced_markdown --info
-```
-
-### Backend Comparison
-
-| Backend | Pros | Cons | Best For |
-|---------|------|------|----------|
-| **WeasyPrint** | Excellent CSS support, fast, reliable | Requires more dependencies | Web-style documents, custom styling |
-| **Pandoc** | Most features, LaTeX quality | Slower, requires system installation | Academic papers, complex documents |
-| **ReportLab** | Lightweight, no external deps | Basic styling only | Simple documents, minimal setup |
-
-### Installation
-
-#### Required Dependencies
-
-```bash
-# Basic installation
-pip install raganything[all]
-
-# For enhanced markdown conversion
-pip install markdown weasyprint pygments
-
-# For Pandoc backend (optional)
-# Download from: https://pandoc.org/installing.html
-```
-
-#### Optional Dependencies
-
-- **WeasyPrint**: `pip install weasyprint`
-- **Pandoc**: System installation required
-- **Pygments**: `pip install pygments` (for syntax highlighting)
-
-### Examples
-
-#### Sample Markdown Input
-
-```markdown
-# Technical Documentation
-
-## Overview
-This document provides technical specifications.
-
-### Code Example
-```python
-def process_document(file_path):
-    return "Processed: " + file_path
-```
-
-### Performance Metrics
-
-| Metric | Value |
-|--------|-------|
-| Speed | 100 docs/hour |
-| Memory | 2.5 GB |
-
-### Conclusion
-The system provides excellent performance.
-```
-
-#### Generated PDF Features
-
-- Professional typography and layout
-- Syntax-highlighted code blocks
-- Formatted tables with borders
-- Table of contents (if enabled)
-- Custom styling and branding
-- Responsive image handling
-
-### Integration with RAG-Anything
-
-The enhanced markdown conversion integrates seamlessly with the RAG-Anything pipeline:
-
-```python
-from raganything import RAGAnything
-
-# Initialize RAG-Anything
-rag = RAGAnything()
-
-# Process markdown files with enhanced conversion
-await rag.process_documents_batch(
-    file_paths=["document.md"],
-    output_dir="./output",
-    # Enhanced markdown conversion will be used automatically
-    # for .md files
-)
-```
-
-## Performance Considerations
-
-### Batch Processing
-
-- **Memory Usage**: Each worker uses additional memory
-- **CPU Usage**: Parallel processing utilizes multiple cores
-- **I/O Bottlenecks**: Disk I/O may become limiting factor
-- **Recommended Settings**: 2-4 workers for most systems
-
-### Enhanced Markdown
-
-- **WeasyPrint**: Fastest for most documents
-- **Pandoc**: Best quality but slower
-- **Large Documents**: Consider chunking for very large files
-- **Image Processing**: Large images may slow conversion
-
-## Troubleshooting
-
-### Common Issues
-
-#### Batch Processing
-
-1. **Memory Errors**: Reduce `max_workers`
-2. **Timeout Errors**: Increase `timeout_per_file`
-3. **File Not Found**: Check file paths and permissions
-4. **Parser Errors**: Verify parser installation
-
-#### Enhanced Markdown
-
-1. **WeasyPrint Errors**: Install system dependencies
-2. **Pandoc Not Found**: Install Pandoc system-wide
-3. **CSS Issues**: Check CSS syntax and file paths
-4. **Image Problems**: Ensure images are accessible
-
-### Debug Mode
-
-Enable debug logging for detailed information:
-
-```python
-import logging
-logging.basicConfig(level=logging.DEBUG)
-```
-
-## Conclusion
-
-The batch processing and enhanced markdown conversion features significantly improve RAG-Anything's capabilities for processing large document collections and generating high-quality PDFs from markdown content. These features are designed to be easy to use while providing advanced configuration options for power users.
diff --git a/docs/batch_processing.md b/docs/batch_processing.md
new file mode 100644
index 0000000..8556184
--- /dev/null
+++ b/docs/batch_processing.md
@@ -0,0 +1,341 @@
+# Batch Processing
+
+This document describes the batch processing feature for RAG-Anything, which allows you to process multiple documents in parallel for improved throughput.
+
+## Overview
+
+The batch processing feature allows you to process multiple documents concurrently, significantly improving throughput for large document collections. It provides parallel processing, progress tracking, error handling, and flexible configuration options.
+
+## Key Features
+
+- **Parallel Processing**: Process multiple files concurrently using thread pools
+- **Progress Tracking**: Real-time progress bars with `tqdm`
+- **Error Handling**: Comprehensive error reporting and recovery
+- **Flexible Input**: Support for files, directories, and recursive search
+- **Configurable Workers**: Adjustable number of parallel workers
+- **Installation Check Bypass**: Optional skip for environments with package conflicts
+
+## Installation
+
+```bash
+# Basic installation
+pip install raganything[all]
+
+# Required for batch processing
+pip install tqdm
+```
+
+## Usage
+
+### Basic Batch Processing
+
+```python
+from raganything.batch_parser import BatchParser
+
+# Create batch parser
+batch_parser = BatchParser(
+    parser_type="mineru",  # or "docling"
+    max_workers=4,
+    show_progress=True,
+    timeout_per_file=300,
+    skip_installation_check=False  # Set to True if having parser installation issues
+)
+
+# Process multiple files
+result = batch_parser.process_batch(
+    file_paths=["doc1.pdf", "doc2.docx", "folder/"],
+    output_dir="./batch_output",
+    parse_method="auto",
+    recursive=True
+)
+
+# Check results
+print(result.summary())
+print(f"Success rate: {result.success_rate:.1f}%")
+print(f"Processing time: {result.processing_time:.2f} seconds")
+```
+
+### Asynchronous Batch Processing
+
+```python
+import asyncio
+from raganything.batch_parser import BatchParser
+
+async def async_batch_processing():
+    batch_parser = BatchParser(
+        parser_type="mineru",
+        max_workers=4,
+        show_progress=True
+    )
+    
+    # Process files asynchronously
+    result = await batch_parser.process_batch_async(
+        file_paths=["doc1.pdf", "doc2.docx"],
+        output_dir="./output",
+        parse_method="auto"
+    )
+    
+    return result
+
+# Run async processing
+result = asyncio.run(async_batch_processing())
+```
+
+### Integration with RAG-Anything
+
+```python
+from raganything import RAGAnything
+
+rag = RAGAnything()
+
+# Process documents with batch functionality
+result = rag.process_documents_batch(
+    file_paths=["doc1.pdf", "doc2.docx"],
+    output_dir="./output",
+    max_workers=4,
+    show_progress=True
+)
+
+print(f"Processed {len(result.successful_files)} files successfully")
+```
+
+### Process Documents with RAG Integration
+
+```python
+# Process documents in batch and then add them to RAG
+result = await rag.process_documents_with_rag_batch(
+    file_paths=["doc1.pdf", "doc2.docx"],
+    output_dir="./output",
+    max_workers=4,
+    show_progress=True
+)
+
+print(f"Processed {result['successful_rag_files']} files with RAG")
+print(f"Total processing time: {result['total_processing_time']:.2f} seconds")
+```
+
+### Command Line Interface
+
+```bash
+# Basic batch processing
+python -m raganything.batch_parser path/to/docs/ --output ./output --workers 4
+
+# With specific parser
+python -m raganything.batch_parser path/to/docs/ --parser mineru --method auto
+
+# Without progress bar
+python -m raganything.batch_parser path/to/docs/ --output ./output --no-progress
+
+# Help
+python -m raganything.batch_parser --help
+```
+
+## Configuration
+
+### Environment Variables
+
+```env
+# Batch processing configuration
+MAX_CONCURRENT_FILES=4
+SUPPORTED_FILE_EXTENSIONS=.pdf,.docx,.doc,.pptx,.ppt,.xlsx,.xls,.txt,.md
+RECURSIVE_FOLDER_PROCESSING=true
+PARSER_OUTPUT_DIR=./parsed_output
+```
+
+### BatchParser Parameters
+
+- **parser_type**: `"mineru"` or `"docling"` (default: `"mineru"`)
+- **max_workers**: Number of parallel workers (default: `4`)
+- **show_progress**: Show progress bar (default: `True`)
+- **timeout_per_file**: Timeout per file in seconds (default: `300`)
+- **skip_installation_check**: Skip parser installation check (default: `False`)
+
+## Supported File Types
+
+- **PDF files**: `.pdf`
+- **Office documents**: `.doc`, `.docx`, `.ppt`, `.pptx`, `.xls`, `.xlsx`
+- **Images**: `.png`, `.jpg`, `.jpeg`, `.bmp`, `.tiff`, `.tif`, `.gif`, `.webp`
+- **Text files**: `.txt`, `.md`
+
+## API Reference
+
+### BatchProcessingResult
+
+```python
+@dataclass
+class BatchProcessingResult:
+    successful_files: List[str]      # Successfully processed files
+    failed_files: List[str]          # Failed files
+    total_files: int                 # Total number of files
+    processing_time: float           # Total processing time in seconds
+    errors: Dict[str, str]           # Error messages for failed files
+    output_dir: str                  # Output directory used
+    
+    def summary(self) -> str:        # Human-readable summary
+    def success_rate(self) -> float: # Success rate as percentage
+```
+
+### BatchParser Methods
+
+```python
+class BatchParser:
+    def __init__(self, parser_type: str = "mineru", max_workers: int = 4, ...):
+        """Initialize batch parser"""
+    
+    def get_supported_extensions(self) -> List[str]:
+        """Get list of supported file extensions"""
+    
+    def filter_supported_files(self, file_paths: List[str], recursive: bool = True) -> List[str]:
+        """Filter files to only supported types"""
+    
+    def process_batch(self, file_paths: List[str], output_dir: str, ...) -> BatchProcessingResult:
+        """Process files in batch"""
+    
+    async def process_batch_async(self, file_paths: List[str], output_dir: str, ...) -> BatchProcessingResult:
+        """Process files in batch asynchronously"""
+```
+
+## Performance Considerations
+
+### Memory Usage
+- Each worker uses additional memory
+- Recommended: 2-4 workers for most systems
+- Monitor memory usage with large files
+
+### CPU Usage
+- Parallel processing utilizes multiple cores
+- Optimal worker count depends on CPU cores and file sizes
+- I/O may become bottleneck with many small files
+
+### Recommended Settings
+- **Small files** (< 1MB): Higher worker count (6-8)
+- **Large files** (> 100MB): Lower worker count (2-3)
+- **Mixed sizes**: Start with 4 workers and adjust
+
+## Troubleshooting
+
+### Common Issues
+
+#### Memory Errors
+```python
+# Solution: Reduce max_workers
+batch_parser = BatchParser(max_workers=2)
+```
+
+#### Timeout Errors
+```python
+# Solution: Increase timeout_per_file
+batch_parser = BatchParser(timeout_per_file=600)  # 10 minutes
+```
+
+#### Parser Installation Issues
+```python
+# Solution: Skip installation check
+batch_parser = BatchParser(skip_installation_check=True)
+```
+
+#### File Not Found Errors
+- Check file paths and permissions
+- Ensure input files exist
+- Verify directory access rights
+
+### Debug Mode
+
+Enable debug logging for detailed information:
+
+```python
+import logging
+logging.basicConfig(level=logging.DEBUG)
+
+# Create batch parser with debug logging
+batch_parser = BatchParser(parser_type="mineru", max_workers=2)
+```
+
+### Error Handling
+
+The batch processor provides comprehensive error handling:
+
+```python
+result = batch_parser.process_batch(file_paths=["doc1.pdf", "doc2.docx"])
+
+# Check for errors
+if result.failed_files:
+    print("Failed files:")
+    for file_path in result.failed_files:
+        error_message = result.errors.get(file_path, "Unknown error")
+        print(f"  - {file_path}: {error_message}")
+
+# Process only successful files
+for file_path in result.successful_files:
+    print(f"Successfully processed: {file_path}")
+```
+
+## Examples
+
+### Process Entire Directory
+
+```python
+from pathlib import Path
+
+# Process all supported files in a directory
+batch_parser = BatchParser(max_workers=4)
+directory_path = Path("./documents")
+
+result = batch_parser.process_batch(
+    file_paths=[str(directory_path)],
+    output_dir="./processed",
+    recursive=True  # Include subdirectories
+)
+
+print(f"Processed {len(result.successful_files)} out of {result.total_files} files")
+```
+
+### Filter Files Before Processing
+
+```python
+# Get all files in directory
+all_files = ["doc1.pdf", "image.png", "spreadsheet.xlsx", "unsupported.xyz"]
+
+# Filter to supported files only
+supported_files = batch_parser.filter_supported_files(all_files)
+print(f"Will process {len(supported_files)} out of {len(all_files)} files")
+
+# Process only supported files
+result = batch_parser.process_batch(
+    file_paths=supported_files,
+    output_dir="./output"
+)
+```
+
+### Custom Error Handling
+
+```python
+def process_with_retry(file_paths, max_retries=3):
+    """Process files with retry logic"""
+    
+    for attempt in range(max_retries):
+        result = batch_parser.process_batch(file_paths, "./output")
+        
+        if not result.failed_files:
+            break  # All files processed successfully
+        
+        print(f"Attempt {attempt + 1}: {len(result.failed_files)} files failed")
+        file_paths = result.failed_files  # Retry failed files
+    
+    return result
+```
+
+## Best Practices
+
+1. **Start with default settings** and adjust based on performance
+2. **Monitor system resources** during batch processing
+3. **Use appropriate worker counts** for your hardware
+4. **Handle errors gracefully** with retry logic
+5. **Test with small batches** before processing large collections
+6. **Use skip_installation_check** if facing parser installation issues
+7. **Enable progress tracking** for long-running operations
+8. **Set appropriate timeouts** based on expected file processing times
+
+## Conclusion
+
+The batch processing feature significantly improves RAG-Anything's throughput for large document collections. It provides flexible configuration options, comprehensive error handling, and seamless integration with the existing RAG-Anything pipeline. 
\ No newline at end of file
diff --git a/docs/enhanced_markdown.md b/docs/enhanced_markdown.md
new file mode 100644
index 0000000..f2b148c
--- /dev/null
+++ b/docs/enhanced_markdown.md
@@ -0,0 +1,552 @@
+# Enhanced Markdown Conversion
+
+This document describes the enhanced markdown conversion feature for RAG-Anything, which provides high-quality PDF generation from markdown files with multiple backend options and advanced styling.
+
+## Overview
+
+The enhanced markdown conversion feature provides professional-quality PDF generation from markdown files. It supports multiple conversion backends, advanced styling options, syntax highlighting, and seamless integration with RAG-Anything's document processing pipeline.
+
+## Key Features
+
+- **Multiple Backends**: WeasyPrint, Pandoc, and automatic backend selection
+- **Advanced Styling**: Custom CSS, syntax highlighting, and professional layouts
+- **Image Support**: Embedded images with proper scaling and positioning
+- **Table Support**: Formatted tables with borders and professional styling
+- **Code Highlighting**: Syntax highlighting for code blocks using Pygments
+- **Custom Templates**: Support for custom CSS and document templates
+- **Table of Contents**: Automatic TOC generation with navigation links
+- **Professional Typography**: High-quality fonts and spacing
+
+## Installation
+
+### Required Dependencies
+
+```bash
+# Basic installation
+pip install raganything[all]
+
+# Required for enhanced markdown conversion
+pip install markdown weasyprint pygments
+```
+
+### Optional Dependencies
+
+```bash
+# For Pandoc backend (system installation required)
+# Ubuntu/Debian:
+sudo apt-get install pandoc wkhtmltopdf
+
+# macOS:
+brew install pandoc wkhtmltopdf
+
+# Or using conda:
+conda install -c conda-forge pandoc wkhtmltopdf
+```
+
+### Backend-Specific Installation
+
+#### WeasyPrint (Recommended)
+```bash
+# Install WeasyPrint with system dependencies
+pip install weasyprint
+
+# Ubuntu/Debian system dependencies:
+sudo apt-get install -y build-essential python3-dev python3-pip \
+    python3-setuptools python3-wheel python3-cffi libcairo2 \
+    libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 \
+    libffi-dev shared-mime-info
+```
+
+#### Pandoc
+- Download from: https://pandoc.org/installing.html
+- Requires system-wide installation
+- Used for complex document structures and LaTeX-quality output
+
+## Usage
+
+### Basic Conversion
+
+```python
+from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownConfig
+
+# Create converter with default settings
+converter = EnhancedMarkdownConverter()
+
+# Convert markdown file to PDF
+success = converter.convert_file_to_pdf(
+    input_path="document.md",
+    output_path="document.pdf",
+    method="auto"  # Automatically select best available backend
+)
+
+if success:
+    print("✅ Conversion successful!")
+else:
+    print("❌ Conversion failed")
+```
+
+### Advanced Configuration
+
+```python
+# Create custom configuration
+config = MarkdownConfig(
+    page_size="A4",           # A4, Letter, Legal, etc.
+    margin="1in",             # CSS-style margins
+    font_size="12pt",         # Base font size
+    line_height="1.5",        # Line spacing
+    include_toc=True,         # Generate table of contents
+    syntax_highlighting=True, # Enable code syntax highlighting
+    
+    # Custom CSS styling
+    custom_css="""
+    body { 
+        font-family: 'Georgia', serif; 
+        color: #333;
+    }
+    h1 { 
+        color: #2c3e50; 
+        border-bottom: 2px solid #3498db; 
+        padding-bottom: 0.3em;
+    }
+    code { 
+        background-color: #f8f9fa; 
+        padding: 2px 4px; 
+        border-radius: 3px;
+    }
+    pre {
+        background-color: #f8f9fa;
+        border-left: 4px solid #3498db;
+        padding: 15px;
+        border-radius: 5px;
+    }
+    table {
+        border-collapse: collapse;
+        width: 100%;
+        margin: 1em 0;
+    }
+    th, td {
+        border: 1px solid #ddd;
+        padding: 8px 12px;
+        text-align: left;
+    }
+    th {
+        background-color: #f2f2f2;
+        font-weight: bold;
+    }
+    """
+)
+
+converter = EnhancedMarkdownConverter(config)
+```
+
+### Backend Selection
+
+```python
+# Check available backends
+converter = EnhancedMarkdownConverter()
+backend_info = converter.get_backend_info()
+
+print("Available backends:")
+for backend, available in backend_info["available_backends"].items():
+    status = "✅" if available else "❌"
+    print(f"  {status} {backend}")
+
+print(f"Recommended backend: {backend_info['recommended_backend']}")
+
+# Use specific backend
+converter.convert_file_to_pdf(
+    input_path="document.md",
+    output_path="document.pdf",
+    method="weasyprint"  # or "pandoc", "pandoc_system", "auto"
+)
+```
+
+### Content Conversion
+
+```python
+# Convert markdown content directly (not from file)
+markdown_content = """
+# Sample Document
+
+## Introduction
+This is a **bold** statement with *italic* text.
+
+## Code Example
+```python
+def hello_world():
+    print("Hello, World!")
+    return "Success"
+```
+
+## Table
+| Feature | Status | Notes |
+|---------|--------|-------|
+| PDF Generation | ✅ | Working |
+| Syntax Highlighting | ✅ | Pygments |
+| Custom CSS | ✅ | Full support |
+"""
+
+success = converter.convert_markdown_to_pdf(
+    markdown_content=markdown_content,
+    output_path="sample.pdf",
+    method="auto"
+)
+```
+
+### Command Line Interface
+
+```bash
+# Basic conversion
+python -m raganything.enhanced_markdown document.md --output document.pdf
+
+# With specific backend
+python -m raganything.enhanced_markdown document.md --method weasyprint
+
+# With custom CSS file
+python -m raganything.enhanced_markdown document.md --css custom_style.css
+
+# Show backend information
+python -m raganything.enhanced_markdown --info
+
+# Help
+python -m raganything.enhanced_markdown --help
+```
+
+## Backend Comparison
+
+| Backend | Pros | Cons | Best For | Quality |
+|---------|------|------|----------|---------|
+| **WeasyPrint** | • Excellent CSS support<br>• Fast rendering<br>• Great web-style layouts<br>• Python-based | • Limited LaTeX features<br>• Requires system deps | • Web-style documents<br>• Custom styling<br>• Fast conversion | ⭐⭐⭐⭐ |
+| **Pandoc** | • Extensive features<br>• LaTeX-quality output<br>• Academic formatting<br>• Many input/output formats | • Slower conversion<br>• System installation<br>• Complex setup | • Academic papers<br>• Complex documents<br>• Publication quality | ⭐⭐⭐⭐⭐ |
+| **Auto** | • Automatic selection<br>• Fallback support<br>• User-friendly | • May not use optimal backend | • General use<br>• Quick setup<br>• Development | ⭐⭐⭐⭐ |
+
+## Configuration Options
+
+### MarkdownConfig Parameters
+
+```python
+@dataclass
+class MarkdownConfig:
+    # Page layout
+    page_size: str = "A4"              # A4, Letter, Legal, A3, etc.
+    margin: str = "1in"                # CSS margin format
+    font_size: str = "12pt"            # Base font size
+    line_height: str = "1.5"           # Line spacing multiplier
+    
+    # Content options
+    include_toc: bool = True           # Generate table of contents
+    syntax_highlighting: bool = True   # Enable code highlighting
+    image_max_width: str = "100%"      # Maximum image width
+    table_style: str = "..."           # Default table CSS
+    
+    # Styling
+    css_file: Optional[str] = None     # External CSS file path
+    custom_css: Optional[str] = None   # Inline CSS content
+    template_file: Optional[str] = None # Custom HTML template
+    
+    # Output options
+    output_format: str = "pdf"         # Currently only PDF supported
+    output_dir: Optional[str] = None   # Output directory
+    
+    # Metadata
+    metadata: Optional[Dict[str, str]] = None  # Document metadata
+```
+
+### Supported Markdown Features
+
+#### Basic Formatting
+- **Headers**: `# ## ### #### ##### ######`
+- **Emphasis**: `*italic*`, `**bold**`, `***bold italic***`
+- **Links**: `[text](url)`, `[text][ref]`
+- **Images**: `![alt](url)`, `![alt][ref]`
+- **Lists**: Ordered and unordered, nested
+- **Blockquotes**: `> quote`
+- **Line breaks**: Double space or `\n\n`
+
+#### Advanced Features
+- **Tables**: GitHub-style tables with alignment
+- **Code blocks**: Fenced code blocks with language specification
+- **Inline code**: `backtick code`
+- **Horizontal rules**: `---` or `***`
+- **Footnotes**: `[^1]` references
+- **Definition lists**: Term and definition pairs
+- **Attributes**: `{#id .class key=value}`
+
+#### Code Highlighting
+
+```markdown
+```python
+def example_function():
+    """This will be syntax highlighted"""
+    return "Hello, World!"
+```
+
+```javascript
+function exampleFunction() {
+    // This will also be highlighted
+    return "Hello, World!";
+}
+```
+```
+
+## Integration with RAG-Anything
+
+The enhanced markdown conversion integrates seamlessly with RAG-Anything:
+
+```python
+from raganything import RAGAnything
+
+# Initialize RAG-Anything
+rag = RAGAnything()
+
+# Process markdown files - enhanced conversion is used automatically
+await rag.process_document_complete("document.md")
+
+# Batch processing with enhanced markdown conversion
+result = rag.process_documents_batch(
+    file_paths=["doc1.md", "doc2.md", "doc3.md"],
+    output_dir="./output"
+)
+
+# The .md files will be converted to PDF using enhanced conversion
+# before being processed by the RAG system
+```
+
+## Performance Considerations
+
+### Conversion Speed
+- **WeasyPrint**: ~1-3 seconds for typical documents
+- **Pandoc**: ~3-10 seconds for typical documents
+- **Large documents**: Time scales roughly linearly with content
+
+### Memory Usage
+- **WeasyPrint**: ~50-100MB per conversion
+- **Pandoc**: ~100-200MB per conversion
+- **Images**: Large images increase memory usage significantly
+
+### Optimization Tips
+1. **Resize large images** before embedding
+2. **Use compressed images** (JPEG for photos, PNG for graphics)
+3. **Limit concurrent conversions** to avoid memory issues
+4. **Cache converted content** when processing multiple times
+
+## Examples
+
+### Sample Markdown Document
+
+```markdown
+# Technical Documentation
+
+## Table of Contents
+[TOC]
+
+## Overview
+This document provides comprehensive technical specifications.
+
+## Architecture
+
+### System Components
+1. **Parser Engine**: Handles document processing
+2. **Storage Layer**: Manages data persistence  
+3. **Query Interface**: Provides search capabilities
+
+### Code Implementation
+```python
+from raganything import RAGAnything
+
+# Initialize system
+rag = RAGAnything(config={
+    "working_dir": "./storage",
+    "enable_image_processing": True
+})
+
+# Process document
+await rag.process_document_complete("document.pdf")
+```
+
+### Performance Metrics
+
+| Component | Throughput | Latency | Memory |
+|-----------|------------|---------|--------|
+| Parser | 100 docs/hour | 36s avg | 2.5 GB |
+| Storage | 1000 ops/sec | 1ms avg | 512 MB |
+| Query | 50 queries/sec | 20ms avg | 1 GB |
+
+## Integration Notes
+
+> **Important**: Always validate input before processing.
+
+## Conclusion
+The enhanced system provides excellent performance for document processing workflows.
+```
+
+### Generated PDF Features
+
+The enhanced markdown converter produces PDFs with:
+
+- **Professional typography** with proper font selection and spacing
+- **Syntax-highlighted code blocks** using Pygments
+- **Formatted tables** with borders and alternating row colors
+- **Clickable table of contents** with navigation links
+- **Responsive images** that scale appropriately
+- **Custom styling** through CSS
+- **Proper page breaks** and margins
+- **Document metadata** and properties
+
+## Troubleshooting
+
+### Common Issues
+
+#### WeasyPrint Installation Problems
+```bash
+# Ubuntu/Debian: Install system dependencies
+sudo apt-get update
+sudo apt-get install -y build-essential python3-dev libcairo2 \
+    libpango-1.0-0 libpangocairo-1.0-0 libgdk-pixbuf2.0-0 \
+    libffi-dev shared-mime-info
+
+# Then reinstall WeasyPrint
+pip install --force-reinstall weasyprint
+```
+
+#### Pandoc Not Found
+```bash
+# Check if Pandoc is installed
+pandoc --version
+
+# Install Pandoc (Ubuntu/Debian)
+sudo apt-get install pandoc wkhtmltopdf
+
+# Or download from: https://pandoc.org/installing.html
+```
+
+#### CSS Issues
+- Check CSS syntax in custom_css
+- Verify CSS file paths exist
+- Test CSS with simple HTML first
+- Use browser developer tools to debug styling
+
+#### Image Problems
+- Ensure images are accessible (correct paths)
+- Check image file formats (PNG, JPEG, GIF supported)
+- Verify image file permissions
+- Consider image size and format optimization
+
+#### Font Issues
+```python
+# Use web-safe fonts
+config = MarkdownConfig(
+    custom_css="""
+    body { 
+        font-family: 'Arial', 'Helvetica', sans-serif; 
+    }
+    """
+)
+```
+
+### Debug Mode
+
+Enable detailed logging for troubleshooting:
+
+```python
+import logging
+
+# Enable debug logging
+logging.basicConfig(
+    level=logging.DEBUG,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
+)
+
+# Create converter with debug logging
+converter = EnhancedMarkdownConverter()
+result = converter.convert_file_to_pdf("test.md", "test.pdf")
+```
+
+### Error Handling
+
+```python
+def robust_conversion(input_path, output_path):
+    """Convert with fallback backends"""
+    converter = EnhancedMarkdownConverter()
+    
+    # Try backends in order of preference
+    backends = ["weasyprint", "pandoc", "auto"]
+    
+    for backend in backends:
+        try:
+            success = converter.convert_file_to_pdf(
+                input_path=input_path,
+                output_path=output_path,
+                method=backend
+            )
+            if success:
+                print(f"✅ Conversion successful with {backend}")
+                return True
+        except Exception as e:
+            print(f"❌ {backend} failed: {str(e)}")
+            continue
+    
+    print("❌ All backends failed")
+    return False
+```
+
+## API Reference
+
+### EnhancedMarkdownConverter
+
+```python
+class EnhancedMarkdownConverter:
+    def __init__(self, config: Optional[MarkdownConfig] = None):
+        """Initialize converter with optional configuration"""
+    
+    def convert_file_to_pdf(self, input_path: str, output_path: str, method: str = "auto") -> bool:
+        """Convert markdown file to PDF"""
+    
+    def convert_markdown_to_pdf(self, markdown_content: str, output_path: str, method: str = "auto") -> bool:
+        """Convert markdown content to PDF"""
+    
+    def get_backend_info(self) -> Dict[str, Any]:
+        """Get information about available backends"""
+    
+    def convert_with_weasyprint(self, markdown_content: str, output_path: str) -> bool:
+        """Convert using WeasyPrint backend"""
+    
+    def convert_with_pandoc(self, markdown_content: str, output_path: str) -> bool:
+        """Convert using Pandoc backend"""
+```
+
+## Best Practices
+
+1. **Choose the right backend** for your use case:
+   - **WeasyPrint** for web-style documents and custom CSS
+   - **Pandoc** for academic papers and complex formatting
+   - **Auto** for general use and development
+
+2. **Optimize images** before embedding:
+   - Use appropriate formats (JPEG for photos, PNG for graphics)
+   - Compress images to reduce file size
+   - Set reasonable maximum widths
+
+3. **Design responsive layouts**:
+   - Use relative units (%, em) instead of absolute (px)
+   - Test with different page sizes
+   - Consider print-specific CSS
+
+4. **Test your styling**:
+   - Start with default styling and incrementally customize
+   - Test with sample content before production use
+   - Validate CSS syntax
+
+5. **Handle errors gracefully**:
+   - Implement fallback backends
+   - Provide meaningful error messages
+   - Log conversion attempts for debugging
+
+6. **Performance optimization**:
+   - Cache converted content when possible
+   - Process large batches with appropriate worker counts
+   - Monitor memory usage with large documents
+
+## Conclusion
+
+The enhanced markdown conversion feature provides professional-quality PDF generation with flexible styling options and multiple backend support. It seamlessly integrates with RAG-Anything's document processing pipeline while offering standalone functionality for markdown-to-PDF conversion needs. 
\ No newline at end of file
diff --git a/examples/batch_and_enhanced_markdown_example.py b/examples/batch_and_enhanced_markdown_example.py
deleted file mode 100644
index 4463cba..0000000
--- a/examples/batch_and_enhanced_markdown_example.py
+++ /dev/null
@@ -1,334 +0,0 @@
-#!/usr/bin/env python
-"""
-Example script demonstrating batch processing and enhanced markdown conversion
-
-This example shows how to:
-1. Process multiple documents in parallel using batch processing
-2. Convert markdown files to PDF with enhanced formatting
-3. Use different conversion backends for markdown
-"""
-
-import asyncio
-import logging
-from pathlib import Path
-import tempfile
-
-# Add project root directory to Python path
-import sys
-
-sys.path.append(str(Path(__file__).parent.parent))
-
-from raganything import RAGAnything, RAGAnythingConfig
-from raganything.batch_parser import BatchParser
-from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownConfig
-
-
-def create_sample_markdown_files():
-    """Create sample markdown files for testing"""
-    sample_files = []
-    # Create temporary directory
-    temp_dir = Path(tempfile.mkdtemp())
-    # Sample 1: Basic markdown
-    sample1_content = """# Sample Document 1
-
-This is a basic markdown document with various elements.
-
-## Headers
-This document demonstrates different markdown features.
-
-### Lists
-- Item 1
-- Item 2
-- Item 3
-
-### Code
-```python
-def hello_world():
-    print("Hello, World!")
-```
-
-### Tables
-| Name | Age | City |
-|------|-----|------|
-| Alice | 25 | New York |
-| Bob | 30 | London |
-| Carol | 28 | Paris |
-
-### Blockquotes
-> This is a blockquote with some important information.
-
-### Links and Images
-Visit [GitHub](https://github.com) for more information.
-"""
-
-    sample1_path = temp_dir / "sample1.md"
-    with open(sample1_path, "w", encoding="utf-8") as f:
-        f.write(sample1_content)
-    sample_files.append(str(sample1_path))
-
-    # Sample 2: Technical document
-    sample2_content = """# Technical Documentation
-
-## Overview
-This document provides technical specifications for the RAG-Anything system.
-
-## Architecture
-
-### Core Components
-1. **Document Parser**: Handles multiple file formats
-2. **Multimodal Processor**: Processes images, tables, equations
-3. **Knowledge Graph**: Stores relationships and entities
-4. **Query Engine**: Provides intelligent retrieval
-
-### Code Examples
-
-#### Python Implementation
-```python
-from raganything import RAGAnything
-
-# Initialize the system
-rag = RAGAnything()
-
-# Process documents
-await rag.process_document_complete("document.pdf")
-```
-
-#### Configuration
-```yaml
-working_dir: "./rag_storage"
-enable_image_processing: true
-enable_table_processing: true
-max_concurrent_files: 4
-```
-
-## Performance Metrics
-
-| Metric | Value | Unit |
-|--------|-------|------|
-| Processing Speed | 100 | docs/hour |
-| Memory Usage | 2.5 | GB |
-| Accuracy | 95.2 | % |
-
-## Conclusion
-The system provides excellent performance for multimodal document processing.
-"""
-
-    sample2_path = temp_dir / "sample2.md"
-    with open(sample2_path, "w", encoding="utf-8") as f:
-        f.write(sample2_content)
-    sample_files.append(str(sample2_path))
-
-    return sample_files, temp_dir
-
-
-def demonstrate_batch_processing():
-    """Demonstrate batch processing functionality"""
-    print("\n" + "=" * 50)
-    print("BATCH PROCESSING DEMONSTRATION")
-    print("=" * 50)
-
-    # Create sample files
-    sample_files, temp_dir = create_sample_markdown_files()
-
-    try:
-        # Create batch parser
-        batch_parser = BatchParser(
-            parser_type="mineru",
-            max_workers=2,
-            show_progress=True,
-            timeout_per_file=60,
-            skip_installation_check=True,  # Add this parameter to bypass installation check
-        )
-
-        print(f"Created {len(sample_files)} sample markdown files:")
-        for file_path in sample_files:
-            print(f"  - {file_path}")
-
-        # Process files in batch
-        output_dir = temp_dir / "batch_output"
-        result = batch_parser.process_batch(
-            file_paths=sample_files,
-            output_dir=str(output_dir),
-            parse_method="auto",
-            recursive=False,
-        )
-
-        # Display results
-        print("\nBatch Processing Results:")
-        print(result.summary())
-
-        if result.failed_files:
-            print("\nFailed files:")
-            for file_path in result.failed_files:
-                print(
-                    f"  - {file_path}: {result.errors.get(file_path, 'Unknown error')}"
-                )
-
-        return result
-
-    except Exception as e:
-        print(f"Batch processing failed: {str(e)}")
-        return None
-
-
-def demonstrate_enhanced_markdown():
-    """Demonstrate enhanced markdown conversion"""
-    print("\n" + "=" * 50)
-    print("ENHANCED MARKDOWN CONVERSION DEMONSTRATION")
-    print("=" * 50)
-
-    # Create sample files
-    sample_files, temp_dir = create_sample_markdown_files()
-
-    try:
-        # Create enhanced markdown converter
-        config = MarkdownConfig(
-            page_size="A4",
-            margin="1in",
-            font_size="12pt",
-            include_toc=True,
-            syntax_highlighting=True,
-        )
-
-        converter = EnhancedMarkdownConverter(config)
-
-        # Show backend information
-        backend_info = converter.get_backend_info()
-        print("Available backends:")
-        for backend, available in backend_info["available_backends"].items():
-            status = "✅" if available else "❌"
-            print(f"  {status} {backend}")
-        print(f"Recommended backend: {backend_info['recommended_backend']}")
-
-        # Convert each sample file
-        conversion_results = []
-
-        for i, markdown_file in enumerate(sample_files, 1):
-            print(f"\nConverting sample {i}...")
-            # Try different conversion methods
-            for method in ["auto", "weasyprint", "pandoc"]:
-                try:
-                    output_path = temp_dir / f"sample{i}_{method}.pdf"
-
-                    success = converter.convert_file_to_pdf(
-                        input_path=markdown_file,
-                        output_path=str(output_path),
-                        method=method,
-                    )
-
-                    if success:
-                        print(f"  ✅ {method}: {output_path}")
-                        conversion_results.append(
-                            {
-                                "file": markdown_file,
-                                "method": method,
-                                "output": str(output_path),
-                                "success": True,
-                            }
-                        )
-                        break  # Use first successful method
-                    else:
-                        print(f"  ❌ {method}: Failed")
-
-                except Exception as e:
-                    print(f"  ❌ {method}: {str(e)}")
-                    continue
-
-        # Summary
-        print("\nConversion Summary:")
-        print(f"  Total files: {len(sample_files)}")
-        print(f"  Successful conversions: {len(conversion_results)}")
-
-        return conversion_results
-
-    except Exception as e:
-        print(f"Enhanced markdown conversion failed: {str(e)}")
-        return None
-
-
-async def demonstrate_integration():
-    """Demonstrate integration with RAG-Anything"""
-    print("\n" + "=" * 50)
-    print("RAG-ANYTHING INTEGRATION DEMONSTRATION")
-    print("=" * 50)
-
-    # Create sample files
-    sample_files, temp_dir = create_sample_markdown_files()
-
-    try:
-        # Initialize RAG-Anything (without API keys for demo)
-        config = RAGAnythingConfig(
-            working_dir=str(temp_dir / "rag_storage"),
-            enable_image_processing=True,
-            enable_table_processing=True,
-            enable_equation_processing=True,
-        )
-
-        rag = RAGAnything(config=config)
-
-        # Demonstrate batch processing with RAG
-        print("Processing documents with batch functionality...")
-
-        # Note: This would require actual API keys for full functionality
-        # For demo purposes, we'll just show the interface
-        print("  - Batch processing interface available")
-        print("  - Enhanced markdown conversion available")
-        print("  - Integration with multimodal processors available")
-
-        # Show that rag object has the expected methods
-        print(f"  - RAG instance created: {type(rag).__name__}")
-        print(
-            f"  - Available batch methods: {[m for m in dir(rag) if 'batch' in m.lower()]}"
-        )
-
-        return True
-
-    except Exception as e:
-        print(f"Integration demonstration failed: {str(e)}")
-        return False
-
-
-def main():
-    """Main demonstration function"""
-    # Configure logging
-    logging.basicConfig(
-        level=logging.INFO,
-        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
-    )
-
-    print("RAG-Anything Batch Processing and Enhanced Markdown Demo")
-    print("=" * 60)
-
-    # Demonstrate batch processing
-    batch_result = demonstrate_batch_processing()
-
-    # Demonstrate enhanced markdown conversion
-    markdown_result = demonstrate_enhanced_markdown()
-
-    # Demonstrate integration
-    asyncio.run(demonstrate_integration())
-
-    # Summary
-    print("\n" + "=" * 60)
-    print("DEMONSTRATION SUMMARY")
-    print("=" * 60)
-    if batch_result:
-        print(f"Batch Processing: {batch_result.success_rate:.1f}% success rate")
-    else:
-        print("Batch Processing: Failed")
-
-    if markdown_result:
-        print(f"Enhanced Markdown: {len(markdown_result)} successful conversions")
-    else:
-        print("Enhanced Markdown: Failed")
-
-    print("\nFeatures demonstrated:")
-    print("  - Parallel document processing with progress tracking")
-    print("  - Multiple markdown conversion backends (WeasyPrint, Pandoc)")
-    print("  - Enhanced styling and formatting")
-    print("  - Integration with RAG-Anything pipeline")
-    print("  - Comprehensive error handling and reporting")
-
-
-if __name__ == "__main__":
-    main()
diff --git a/examples/batch_processing_example.py b/examples/batch_processing_example.py
new file mode 100644
index 0000000..ce03c39
--- /dev/null
+++ b/examples/batch_processing_example.py
@@ -0,0 +1,550 @@
+#!/usr/bin/env python
+"""
+Batch Processing Example for RAG-Anything
+
+This example demonstrates how to use the batch processing capabilities
+to process multiple documents in parallel for improved throughput.
+
+Features demonstrated:
+- Basic batch processing with BatchParser
+- Asynchronous batch processing
+- Integration with RAG-Anything
+- Error handling and progress tracking
+- File filtering and directory processing
+"""
+
+import asyncio
+import logging
+from pathlib import Path
+import tempfile
+import time
+
+# Add project root directory to Python path
+import sys
+sys.path.append(str(Path(__file__).parent.parent))
+
+from raganything import RAGAnything, RAGAnythingConfig
+from raganything.batch_parser import BatchParser
+
+
+def create_sample_documents():
+    """Create sample documents for batch processing testing"""
+    temp_dir = Path(tempfile.mkdtemp())
+    sample_files = []
+    
+    # Create various document types
+    documents = {
+        "document1.txt": "This is a simple text document for testing batch processing.",
+        "document2.txt": "Another text document with different content.",
+        "document3.md": """# Markdown Document
+        
+## Introduction
+This is a markdown document for testing.
+
+### Features
+- Markdown formatting
+- Code blocks
+- Lists
+
+```python
+def example():
+    return "Hello from markdown"
+```
+""",
+        "report.txt": """Business Report
+
+Executive Summary:
+This report demonstrates batch processing capabilities.
+
+Key Findings:
+1. Parallel processing improves throughput
+2. Progress tracking enhances user experience
+3. Error handling ensures reliability
+
+Conclusion:
+Batch processing is essential for large-scale document processing.
+""",
+        "notes.md": """# Meeting Notes
+
+## Date: 2024-01-15
+
+### Attendees
+- Alice Johnson
+- Bob Smith
+- Carol Williams
+
+### Discussion Topics
+1. **Batch Processing Implementation**
+   - Parallel document processing
+   - Progress tracking
+   - Error handling strategies
+
+2. **Performance Metrics**
+   - Target: 100 documents/hour
+   - Memory usage: < 4GB
+   - Success rate: > 95%
+
+### Action Items
+- [ ] Implement batch processing
+- [ ] Add progress bars
+- [ ] Test with large document sets
+- [ ] Optimize memory usage
+
+### Next Steps
+Continue development and testing of batch processing features.
+"""
+    }
+    
+    # Create files
+    for filename, content in documents.items():
+        file_path = temp_dir / filename
+        with open(file_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+        sample_files.append(str(file_path))
+    
+    return sample_files, temp_dir
+
+
+def demonstrate_basic_batch_processing():
+    """Demonstrate basic batch processing functionality"""
+    print("\n" + "=" * 60)
+    print("BASIC BATCH PROCESSING DEMONSTRATION")
+    print("=" * 60)
+    
+    # Create sample documents
+    sample_files, temp_dir = create_sample_documents()
+    
+    try:
+        print(f"Created {len(sample_files)} sample documents in: {temp_dir}")
+        for file_path in sample_files:
+            print(f"  - {Path(file_path).name}")
+        
+        # Create batch parser
+        batch_parser = BatchParser(
+            parser_type="mineru",
+            max_workers=3,
+            show_progress=True,
+            timeout_per_file=60,
+            skip_installation_check=True  # Skip installation check for demo
+        )
+        
+        print(f"\nBatch parser configured:")
+        print(f"  - Parser type: mineru")
+        print(f"  - Max workers: 3")
+        print(f"  - Progress tracking: enabled")
+        print(f"  - Timeout per file: 60 seconds")
+        
+        # Check supported extensions
+        supported_extensions = batch_parser.get_supported_extensions()
+        print(f"  - Supported extensions: {supported_extensions}")
+        
+        # Filter files to supported types
+        supported_files = batch_parser.filter_supported_files(sample_files)
+        print(f"\nFile filtering results:")
+        print(f"  - Total files: {len(sample_files)}")
+        print(f"  - Supported files: {len(supported_files)}")
+        
+        # Process batch
+        output_dir = temp_dir / "batch_output"
+        print(f"\nStarting batch processing...")
+        print(f"Output directory: {output_dir}")
+        
+        start_time = time.time()
+        result = batch_parser.process_batch(
+            file_paths=supported_files,
+            output_dir=str(output_dir),
+            parse_method="auto",
+            recursive=False
+        )
+        processing_time = time.time() - start_time
+        
+        # Display results
+        print("\n" + "-" * 40)
+        print("BATCH PROCESSING RESULTS")
+        print("-" * 40)
+        print(result.summary())
+        print(f"Total processing time: {processing_time:.2f} seconds")
+        print(f"Success rate: {result.success_rate:.1f}%")
+        
+        if result.successful_files:
+            print(f"\nSuccessfully processed files:")
+            for file_path in result.successful_files:
+                print(f"  ✅ {Path(file_path).name}")
+        
+        if result.failed_files:
+            print(f"\nFailed files:")
+            for file_path in result.failed_files:
+                error = result.errors.get(file_path, "Unknown error")
+                print(f"  ❌ {Path(file_path).name}: {error}")
+        
+        return result
+        
+    except Exception as e:
+        print(f"❌ Batch processing demonstration failed: {str(e)}")
+        return None
+
+
+async def demonstrate_async_batch_processing():
+    """Demonstrate asynchronous batch processing"""
+    print("\n" + "=" * 60)
+    print("ASYNCHRONOUS BATCH PROCESSING DEMONSTRATION")
+    print("=" * 60)
+    
+    # Create sample documents
+    sample_files, temp_dir = create_sample_documents()
+    
+    try:
+        print(f"Processing {len(sample_files)} documents asynchronously...")
+        
+        # Create batch parser
+        batch_parser = BatchParser(
+            parser_type="mineru",
+            max_workers=2,
+            show_progress=True,
+            skip_installation_check=True
+        )
+        
+        # Process batch asynchronously
+        output_dir = temp_dir / "async_output"
+        
+        start_time = time.time()
+        result = await batch_parser.process_batch_async(
+            file_paths=sample_files,
+            output_dir=str(output_dir),
+            parse_method="auto",
+            recursive=False
+        )
+        processing_time = time.time() - start_time
+        
+        # Display results
+        print("\n" + "-" * 40)
+        print("ASYNC BATCH PROCESSING RESULTS")
+        print("-" * 40)
+        print(result.summary())
+        print(f"Async processing time: {processing_time:.2f} seconds")
+        print(f"Success rate: {result.success_rate:.1f}%")
+        
+        return result
+        
+    except Exception as e:
+        print(f"❌ Async batch processing demonstration failed: {str(e)}")
+        return None
+
+
+async def demonstrate_rag_integration():
+    """Demonstrate batch processing integration with RAG-Anything"""
+    print("\n" + "=" * 60)
+    print("RAG-ANYTHING BATCH INTEGRATION DEMONSTRATION")
+    print("=" * 60)
+    
+    # Create sample documents
+    sample_files, temp_dir = create_sample_documents()
+    
+    try:
+        # Initialize RAG-Anything with temporary storage
+        config = RAGAnythingConfig(
+            working_dir=str(temp_dir / "rag_storage"),
+            enable_image_processing=True,
+            enable_table_processing=True,
+            enable_equation_processing=True,
+            max_concurrent_files=2
+        )
+        
+        rag = RAGAnything(config=config)
+        
+        print("RAG-Anything initialized with batch processing capabilities")
+        
+        # Show available batch methods
+        batch_methods = [method for method in dir(rag) if 'batch' in method.lower()]
+        print(f"Available batch methods: {batch_methods}")
+        
+        # Demonstrate batch processing with RAG integration
+        print(f"\nProcessing {len(sample_files)} documents with RAG integration...")
+        
+        # Use the RAG-integrated batch processing
+        try:
+            # Process documents in batch
+            result = rag.process_documents_batch(
+                file_paths=sample_files,
+                output_dir=str(temp_dir / "rag_batch_output"),
+                max_workers=2,
+                show_progress=True
+            )
+            
+            print("\n" + "-" * 40)
+            print("RAG BATCH PROCESSING RESULTS")
+            print("-" * 40)
+            print(result.summary())
+            print(f"Success rate: {result.success_rate:.1f}%")
+            
+            # Demonstrate batch processing with full RAG integration
+            print(f"\nProcessing documents with full RAG integration...")
+            
+            rag_result = await rag.process_documents_with_rag_batch(
+                file_paths=sample_files[:2],  # Process subset for demo
+                output_dir=str(temp_dir / "rag_full_output"),
+                max_workers=1,
+                show_progress=True
+            )
+            
+            print("\n" + "-" * 40)
+            print("FULL RAG INTEGRATION RESULTS")
+            print("-" * 40)
+            print(f"Parse result: {rag_result['parse_result'].summary()}")
+            print(f"RAG processing time: {rag_result['total_processing_time']:.2f} seconds")
+            print(f"Successfully processed with RAG: {rag_result['successful_rag_files']}")
+            print(f"Failed RAG processing: {rag_result['failed_rag_files']}")
+            
+            return rag_result
+            
+        except Exception as e:
+            print(f"⚠️ RAG integration demo completed with limitations: {str(e)}")
+            print("Note: This is expected in environments without full API configuration")
+            return None
+            
+    except Exception as e:
+        print(f"❌ RAG integration demonstration failed: {str(e)}")
+        return None
+
+
+def demonstrate_directory_processing():
+    """Demonstrate processing entire directories"""
+    print("\n" + "=" * 60)
+    print("DIRECTORY PROCESSING DEMONSTRATION")
+    print("=" * 60)
+    
+    # Create a directory structure with nested files
+    temp_dir = Path(tempfile.mkdtemp())
+    
+    # Create main directory files
+    main_files = {
+        "overview.txt": "Main directory overview document",
+        "readme.md": "# Project README\n\nThis is the main project documentation."
+    }
+    
+    # Create subdirectory
+    sub_dir = temp_dir / "subdirectory"
+    sub_dir.mkdir()
+    
+    sub_files = {
+        "details.txt": "Detailed information in subdirectory",
+        "notes.md": "# Notes\n\nAdditional notes and information."
+    }
+    
+    # Write all files
+    all_files = []
+    for filename, content in main_files.items():
+        file_path = temp_dir / filename
+        with open(file_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+        all_files.append(str(file_path))
+    
+    for filename, content in sub_files.items():
+        file_path = sub_dir / filename
+        with open(file_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+        all_files.append(str(file_path))
+    
+    try:
+        print(f"Created directory structure:")
+        print(f"  Main directory: {temp_dir}")
+        print(f"  Files in main: {list(main_files.keys())}")
+        print(f"  Subdirectory: {sub_dir}")
+        print(f"  Files in sub: {list(sub_files.keys())}")
+        
+        # Create batch parser
+        batch_parser = BatchParser(
+            parser_type="mineru",
+            max_workers=2,
+            show_progress=True,
+            skip_installation_check=True
+        )
+        
+        # Process entire directory recursively
+        print(f"\nProcessing entire directory recursively...")
+        
+        result = batch_parser.process_batch(
+            file_paths=[str(temp_dir)],  # Pass directory path
+            output_dir=str(temp_dir / "directory_output"),
+            parse_method="auto",
+            recursive=True  # Include subdirectories
+        )
+        
+        print("\n" + "-" * 40)
+        print("DIRECTORY PROCESSING RESULTS")
+        print("-" * 40)
+        print(result.summary())
+        print(f"Total files found and processed: {result.total_files}")
+        print(f"Success rate: {result.success_rate:.1f}%")
+        
+        if result.successful_files:
+            print(f"\nSuccessfully processed:")
+            for file_path in result.successful_files:
+                relative_path = Path(file_path).relative_to(temp_dir)
+                print(f"  ✅ {relative_path}")
+        
+        return result
+        
+    except Exception as e:
+        print(f"❌ Directory processing demonstration failed: {str(e)}")
+        return None
+
+
+def demonstrate_error_handling():
+    """Demonstrate error handling and recovery"""
+    print("\n" + "=" * 60)
+    print("ERROR HANDLING DEMONSTRATION")
+    print("=" * 60)
+    
+    temp_dir = Path(tempfile.mkdtemp())
+    
+    # Create files with various issues
+    files_with_issues = {
+        "valid_file.txt": "This is a valid file that should process successfully.",
+        "empty_file.txt": "",  # Empty file
+        "large_file.txt": "x" * 1000000,  # Large file (1MB of 'x')
+    }
+    
+    created_files = []
+    for filename, content in files_with_issues.items():
+        file_path = temp_dir / filename
+        with open(file_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+        created_files.append(str(file_path))
+    
+    # Add a non-existent file to the list
+    created_files.append(str(temp_dir / "non_existent_file.txt"))
+    
+    try:
+        print(f"Testing error handling with {len(created_files)} files:")
+        for file_path in created_files:
+            name = Path(file_path).name
+            exists = Path(file_path).exists()
+            size = Path(file_path).stat().st_size if exists else 0
+            print(f"  - {name}: {'exists' if exists else 'missing'}, {size} bytes")
+        
+        # Create batch parser with short timeout for demonstration
+        batch_parser = BatchParser(
+            parser_type="mineru",
+            max_workers=2,
+            show_progress=True,
+            timeout_per_file=30,  # Short timeout for demo
+            skip_installation_check=True
+        )
+        
+        # Process files and handle errors
+        result = batch_parser.process_batch(
+            file_paths=created_files,
+            output_dir=str(temp_dir / "error_test_output"),
+            parse_method="auto"
+        )
+        
+        print("\n" + "-" * 40)
+        print("ERROR HANDLING RESULTS")
+        print("-" * 40)
+        print(result.summary())
+        
+        if result.successful_files:
+            print(f"\nSuccessful files:")
+            for file_path in result.successful_files:
+                print(f"  ✅ {Path(file_path).name}")
+        
+        if result.failed_files:
+            print(f"\nFailed files with error details:")
+            for file_path in result.failed_files:
+                error = result.errors.get(file_path, "Unknown error")
+                print(f"  ❌ {Path(file_path).name}: {error}")
+        
+        # Demonstrate retry logic
+        if result.failed_files:
+            print(f"\nDemonstrating retry logic for {len(result.failed_files)} failed files...")
+            
+            # Retry only the failed files
+            retry_result = batch_parser.process_batch(
+                file_paths=result.failed_files,
+                output_dir=str(temp_dir / "retry_output"),
+                parse_method="auto"
+            )
+            
+            print(f"Retry results: {retry_result.summary()}")
+        
+        return result
+        
+    except Exception as e:
+        print(f"❌ Error handling demonstration failed: {str(e)}")
+        return None
+
+
+async def main():
+    """Main demonstration function"""
+    # Configure logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    
+    print("RAG-Anything Batch Processing Demonstration")
+    print("=" * 70)
+    print("This example demonstrates various batch processing capabilities:")
+    print("  - Basic batch processing with progress tracking")
+    print("  - Asynchronous processing for improved performance")
+    print("  - Integration with RAG-Anything pipeline")
+    print("  - Directory processing with recursive file discovery")
+    print("  - Comprehensive error handling and recovery")
+    
+    results = {}
+    
+    # Run demonstrations
+    print("\n🚀 Starting demonstrations...")
+    
+    # Basic batch processing
+    results['basic'] = demonstrate_basic_batch_processing()
+    
+    # Asynchronous processing
+    results['async'] = await demonstrate_async_batch_processing()
+    
+    # RAG integration
+    results['rag'] = await demonstrate_rag_integration()
+    
+    # Directory processing
+    results['directory'] = demonstrate_directory_processing()
+    
+    # Error handling
+    results['error_handling'] = demonstrate_error_handling()
+    
+    # Summary
+    print("\n" + "=" * 70)
+    print("DEMONSTRATION SUMMARY")
+    print("=" * 70)
+    
+    for demo_name, result in results.items():
+        if result:
+            if hasattr(result, 'success_rate'):
+                print(f"✅ {demo_name.upper()}: {result.success_rate:.1f}% success rate")
+            else:
+                print(f"✅ {demo_name.upper()}: Completed successfully")
+        else:
+            print(f"❌ {demo_name.upper()}: Failed or had limitations")
+    
+    print("\n📊 Key Features Demonstrated:")
+    print("  - Parallel document processing with configurable worker counts")
+    print("  - Real-time progress tracking with tqdm progress bars")
+    print("  - Comprehensive error handling and reporting")
+    print("  - File filtering based on supported document types")
+    print("  - Directory processing with recursive file discovery")
+    print("  - Asynchronous processing for improved performance")
+    print("  - Integration with RAG-Anything document pipeline")
+    print("  - Retry logic for failed documents")
+    print("  - Detailed processing statistics and timing")
+    
+    print("\n💡 Best Practices Highlighted:")
+    print("  - Use appropriate worker counts for your system")
+    print("  - Enable progress tracking for long-running operations")
+    print("  - Handle errors gracefully with retry mechanisms")
+    print("  - Filter files to supported types before processing")
+    print("  - Set reasonable timeouts for document processing")
+    print("  - Use skip_installation_check for environments with conflicts")
+
+
+if __name__ == "__main__":
+    asyncio.run(main()) 
\ No newline at end of file
diff --git a/examples/enhanced_markdown_example.py b/examples/enhanced_markdown_example.py
new file mode 100644
index 0000000..b3c96e0
--- /dev/null
+++ b/examples/enhanced_markdown_example.py
@@ -0,0 +1,1031 @@
+#!/usr/bin/env python
+"""
+Enhanced Markdown Conversion Example for RAG-Anything
+
+This example demonstrates the enhanced markdown to PDF conversion capabilities
+with multiple backends, advanced styling, and professional formatting.
+
+Features demonstrated:
+- Basic markdown to PDF conversion
+- Multiple conversion backends (WeasyPrint, Pandoc)
+- Custom CSS styling and configuration
+- Backend detection and selection
+- Error handling and fallback mechanisms
+- Command-line interface usage
+"""
+
+import logging
+from pathlib import Path
+import tempfile
+
+# Add project root directory to Python path
+import sys
+sys.path.append(str(Path(__file__).parent.parent))
+
+from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownConfig
+
+
+def create_sample_markdown_content():
+    """Create comprehensive sample markdown content for testing"""
+    
+    # Basic sample
+    basic_content = """# Basic Markdown Sample
+
+## Introduction
+This is a simple markdown document demonstrating basic formatting.
+
+### Text Formatting
+- **Bold text** and *italic text*
+- `Inline code` examples
+- [Links to external sites](https://github.com)
+
+### Lists
+1. First ordered item
+2. Second ordered item
+3. Third ordered item
+
+- Unordered item
+- Another unordered item
+  - Nested item
+  - Another nested item
+
+### Blockquotes
+> This is a blockquote with important information.
+> It can span multiple lines.
+
+### Code Block
+```python
+def hello_world():
+    print("Hello, World!")
+    return "Success"
+```
+"""
+
+    # Technical documentation sample
+    technical_content = """# Technical Documentation
+
+## Table of Contents
+- [Overview](#overview)
+- [Architecture](#architecture)
+- [Implementation](#implementation)
+- [Performance](#performance)
+
+## Overview
+This document provides comprehensive technical specifications for the enhanced markdown conversion system.
+
+## Architecture
+
+### Core Components
+1. **Markdown Parser**: Processes markdown syntax
+2. **CSS Engine**: Applies styling and layout
+3. **PDF Generator**: Creates final PDF output
+4. **Backend Manager**: Handles multiple conversion engines
+
+### Data Flow
+```mermaid
+graph LR
+    A[Markdown Input] --> B[Parser]
+    B --> C[CSS Processor]
+    C --> D[PDF Generator]
+    D --> E[PDF Output]
+```
+
+## Implementation
+
+### Python Code Example
+```python
+from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownConfig
+
+# Configure converter
+config = MarkdownConfig(
+    page_size="A4",
+    margin="1in",
+    include_toc=True,
+    syntax_highlighting=True
+)
+
+# Create converter
+converter = EnhancedMarkdownConverter(config)
+
+# Convert to PDF
+success = converter.convert_file_to_pdf(
+    input_path="document.md",
+    output_path="output.pdf",
+    method="weasyprint"
+)
+```
+
+### Configuration Options
+```yaml
+converter:
+  page_size: A4
+  margin: 1in
+  font_size: 12pt
+  include_toc: true
+  syntax_highlighting: true
+  backend: weasyprint
+```
+
+## Performance
+
+### Benchmark Results
+| Backend | Speed | Quality | Features |
+|---------|-------|---------|----------|
+| WeasyPrint | ⭐⭐⭐⭐ | ⭐⭐⭐⭐ | ⭐⭐⭐⭐ |
+| Pandoc | ⭐⭐⭐ | ⭐⭐⭐⭐⭐ | ⭐⭐⭐⭐⭐ |
+
+### Processing Times
+- **Small documents** (< 10 pages): 1-3 seconds
+- **Medium documents** (10-50 pages): 3-10 seconds  
+- **Large documents** (> 50 pages): 10-30 seconds
+
+## Advanced Features
+
+### Custom CSS Styling
+The system supports advanced CSS customization:
+
+```css
+body {
+    font-family: 'Georgia', serif;
+    line-height: 1.6;
+    color: #333;
+}
+
+h1 {
+    color: #2c3e50;
+    border-bottom: 2px solid #3498db;
+    padding-bottom: 0.3em;
+}
+
+code {
+    background-color: #f8f9fa;
+    padding: 2px 4px;
+    border-radius: 3px;
+    font-family: 'Courier New', monospace;
+}
+
+pre {
+    background-color: #f8f9fa;
+    border-left: 4px solid #3498db;
+    padding: 15px;
+    border-radius: 5px;
+    overflow-x: auto;
+}
+
+table {
+    border-collapse: collapse;
+    width: 100%;
+    margin: 1em 0;
+}
+
+th, td {
+    border: 1px solid #ddd;
+    padding: 8px 12px;
+    text-align: left;
+}
+
+th {
+    background-color: #f2f2f2;
+    font-weight: bold;
+}
+```
+
+### Image Support
+![Sample Image](https://via.placeholder.com/400x200/3498db/ffffff?text=Sample+Image)
+
+Images are automatically scaled and positioned appropriately in the PDF output.
+
+## Conclusion
+The enhanced markdown conversion system provides professional-quality PDF generation with extensive customization options and multiple backend support.
+
+---
+
+*Generated on: 2024-01-15*  
+*Version: 1.0.0*
+"""
+
+    # Academic paper sample
+    academic_content = """# Research Paper: Advanced Document Processing
+
+**Authors:** Alice Johnson¹, Bob Smith², Carol Williams¹  
+**Affiliations:**  
+¹ University of Technology  
+² Research Institute  
+
+## Abstract
+
+This paper presents a comprehensive analysis of advanced document processing techniques using enhanced markdown conversion. Our research demonstrates significant improvements in processing speed and output quality through optimized backend selection and custom styling approaches.
+
+**Keywords:** document processing, markdown conversion, PDF generation, performance optimization
+
+## 1. Introduction
+
+Document processing has become increasingly important in modern information systems. The ability to convert markdown documents to high-quality PDF outputs with professional formatting is crucial for academic, technical, and business applications.
+
+### 1.1 Research Objectives
+
+1. Evaluate different markdown conversion backends
+2. Analyze performance characteristics of each approach
+3. Develop optimization strategies for large-scale processing
+4. Design flexible configuration systems for diverse use cases
+
+### 1.2 Contributions
+
+This work makes the following contributions:
+- Comprehensive comparison of markdown conversion backends
+- Performance optimization techniques for large documents
+- Flexible configuration framework for customization
+- Integration patterns for document processing pipelines
+
+## 2. Methodology
+
+### 2.1 Experimental Setup
+
+We conducted experiments using the following configuration:
+
+```python
+# Experimental configuration
+config = MarkdownConfig(
+    page_size="A4",
+    margin="1in",
+    font_size="11pt",
+    line_height="1.4",
+    include_toc=True,
+    syntax_highlighting=True
+)
+```
+
+### 2.2 Test Documents
+
+| Category | Count | Avg Size | Complexity |
+|----------|-------|----------|------------|
+| Simple | 100 | 2 pages | Low |
+| Medium | 50 | 10 pages | Medium |
+| Complex | 25 | 25 pages | High |
+
+### 2.3 Metrics
+
+We evaluated performance using the following metrics:
+- **Conversion Speed**: Time to generate PDF (seconds)
+- **Memory Usage**: Peak memory consumption (MB)
+- **Output Quality**: Visual assessment score (1-10)
+- **Feature Support**: Number of supported markdown features
+
+## 3. Results
+
+### 3.1 Performance Comparison
+
+The following table summarizes our performance results:
+
+| Backend | Speed (s) | Memory (MB) | Quality | Features |
+|---------|-----------|-------------|---------|----------|
+| WeasyPrint | 2.3 ± 0.5 | 85 ± 15 | 8.5 | 85% |
+| Pandoc | 4.7 ± 1.2 | 120 ± 25 | 9.2 | 95% |
+
+### 3.2 Quality Analysis
+
+#### 3.2.1 Typography
+WeasyPrint excels in web-style typography with excellent CSS support, while Pandoc provides superior academic formatting with LaTeX-quality output.
+
+#### 3.2.2 Code Highlighting
+Both backends support syntax highlighting through Pygments:
+
+```python
+def analyze_performance(backend, documents):
+    '''Analyze conversion performance for given backend'''
+    results = []
+    
+    for doc in documents:
+        start_time = time.time()
+        success = backend.convert(doc)
+        end_time = time.time()
+        
+        results.append({
+            'document': doc,
+            'time': end_time - start_time,
+            'success': success
+        })
+    
+    return results
+```
+
+### 3.3 Scalability
+
+Our scalability analysis shows:
+- Linear scaling with document size for both backends
+- Memory usage proportional to content complexity
+- Optimal batch sizes of 10-20 documents for parallel processing
+
+## 4. Discussion
+
+### 4.1 Backend Selection Guidelines
+
+Choose **WeasyPrint** for:
+- Web-style documents with custom CSS
+- Fast conversion requirements
+- Simple to medium complexity documents
+
+Choose **Pandoc** for:
+- Academic papers and publications
+- Complex document structures
+- Maximum feature support requirements
+
+### 4.2 Optimization Strategies
+
+1. **Image Optimization**: Compress images before embedding
+2. **CSS Minimization**: Use efficient CSS selectors
+3. **Content Chunking**: Process large documents in sections
+4. **Caching**: Cache converted content for repeated use
+
+## 5. Conclusion
+
+This research demonstrates that enhanced markdown conversion provides significant benefits for document processing workflows. The choice between WeasyPrint and Pandoc depends on specific requirements for speed, quality, and features.
+
+### 5.1 Future Work
+
+- Integration with cloud processing services
+- Real-time collaborative editing support
+- Advanced template systems
+- Performance optimization for very large documents
+
+## References
+
+1. Johnson, A. et al. (2024). "Advanced Document Processing Techniques." *Journal of Information Systems*, 15(3), 45-62.
+2. Smith, B. (2023). "PDF Generation Optimization." *Technical Computing Review*, 8(2), 12-28.
+3. Williams, C. (2024). "Markdown Processing Frameworks." *Software Engineering Quarterly*, 22(1), 78-95.
+
+---
+
+**Manuscript received:** January 10, 2024  
+**Accepted for publication:** January 15, 2024  
+**Published online:** January 20, 2024
+"""
+
+    return {
+        "basic": basic_content,
+        "technical": technical_content,
+        "academic": academic_content
+    }
+
+
+def demonstrate_basic_conversion():
+    """Demonstrate basic markdown to PDF conversion"""
+    print("\n" + "=" * 60)
+    print("BASIC MARKDOWN CONVERSION DEMONSTRATION")
+    print("=" * 60)
+    
+    try:
+        # Create converter with default settings
+        converter = EnhancedMarkdownConverter()
+        
+        # Show backend information
+        backend_info = converter.get_backend_info()
+        print("Available conversion backends:")
+        for backend, available in backend_info["available_backends"].items():
+            status = "✅" if available else "❌"
+            print(f"  {status} {backend}")
+        print(f"Recommended backend: {backend_info['recommended_backend']}")
+        
+        # Get sample content
+        samples = create_sample_markdown_content()
+        temp_dir = Path(tempfile.mkdtemp())
+        
+        # Convert basic sample
+        basic_md_path = temp_dir / "basic_sample.md"
+        with open(basic_md_path, 'w', encoding='utf-8') as f:
+            f.write(samples['basic'])
+        
+        print(f"\nConverting basic sample: {basic_md_path}")
+        
+        success = converter.convert_file_to_pdf(
+            input_path=str(basic_md_path),
+            output_path=str(temp_dir / "basic_sample.pdf"),
+            method="auto"  # Let the system choose the best backend
+        )
+        
+        if success:
+            print("✅ Basic conversion successful!")
+            print(f"   Output: {temp_dir / 'basic_sample.pdf'}")
+        else:
+            print("❌ Basic conversion failed")
+        
+        return success, temp_dir
+        
+    except Exception as e:
+        print(f"❌ Basic conversion demonstration failed: {str(e)}")
+        return False, None
+
+
+def demonstrate_backend_comparison():
+    """Demonstrate different conversion backends"""
+    print("\n" + "=" * 60)
+    print("BACKEND COMPARISON DEMONSTRATION")
+    print("=" * 60)
+    
+    try:
+        samples = create_sample_markdown_content()
+        temp_dir = Path(tempfile.mkdtemp())
+        
+        # Create technical document
+        tech_md_path = temp_dir / "technical.md"
+        with open(tech_md_path, 'w', encoding='utf-8') as f:
+            f.write(samples['technical'])
+        
+        print(f"Testing different backends with technical document...")
+        
+        # Test different backends
+        backends = ["auto", "weasyprint", "pandoc"]
+        results = {}
+        
+        for backend in backends:
+            try:
+                print(f"\nTesting {backend} backend...")
+                
+                converter = EnhancedMarkdownConverter()
+                output_path = temp_dir / f"technical_{backend}.pdf"
+                
+                import time
+                start_time = time.time()
+                
+                success = converter.convert_file_to_pdf(
+                    input_path=str(tech_md_path),
+                    output_path=str(output_path),
+                    method=backend
+                )
+                
+                end_time = time.time()
+                conversion_time = end_time - start_time
+                
+                if success:
+                    file_size = output_path.stat().st_size if output_path.exists() else 0
+                    print(f"  ✅ {backend}: Success in {conversion_time:.2f}s, {file_size} bytes")
+                    results[backend] = {
+                        'success': True,
+                        'time': conversion_time,
+                        'size': file_size,
+                        'output': str(output_path)
+                    }
+                else:
+                    print(f"  ❌ {backend}: Failed")
+                    results[backend] = {'success': False, 'time': conversion_time}
+                    
+            except Exception as e:
+                print(f"  ❌ {backend}: Error - {str(e)}")
+                results[backend] = {'success': False, 'error': str(e)}
+        
+        # Summary
+        print("\n" + "-" * 40)
+        print("BACKEND COMPARISON SUMMARY")
+        print("-" * 40)
+        successful_backends = [b for b, r in results.items() if r.get('success', False)]
+        print(f"Successful backends: {successful_backends}")
+        
+        if successful_backends:
+            fastest = min(successful_backends, key=lambda b: results[b]['time'])
+            print(f"Fastest backend: {fastest} ({results[fastest]['time']:.2f}s)")
+        
+        return results, temp_dir
+        
+    except Exception as e:
+        print(f"❌ Backend comparison demonstration failed: {str(e)}")
+        return None, None
+
+
+def demonstrate_custom_styling():
+    """Demonstrate custom CSS styling and configuration"""
+    print("\n" + "=" * 60)
+    print("CUSTOM STYLING DEMONSTRATION")
+    print("=" * 60)
+    
+    try:
+        samples = create_sample_markdown_content()
+        temp_dir = Path(tempfile.mkdtemp())
+        
+        # Create custom CSS
+        custom_css = """
+        body {
+            font-family: 'Times New Roman', serif;
+            font-size: 11pt;
+            line-height: 1.4;
+            color: #2c3e50;
+            max-width: 800px;
+            margin: 0 auto;
+            padding: 20px;
+        }
+        
+        h1 {
+            color: #c0392b;
+            font-size: 2.2em;
+            border-bottom: 3px solid #e74c3c;
+            padding-bottom: 0.5em;
+            margin-top: 2em;
+        }
+        
+        h2 {
+            color: #8e44ad;
+            font-size: 1.6em;
+            border-bottom: 2px solid #9b59b6;
+            padding-bottom: 0.3em;
+            margin-top: 1.5em;
+        }
+        
+        h3 {
+            color: #2980b9;
+            font-size: 1.3em;
+            margin-top: 1.2em;
+        }
+        
+        code {
+            background-color: #ecf0f1;
+            color: #e74c3c;
+            padding: 3px 6px;
+            border-radius: 4px;
+            font-family: 'Courier New', monospace;
+            font-size: 0.9em;
+        }
+        
+        pre {
+            background-color: #2c3e50;
+            color: #ecf0f1;
+            padding: 20px;
+            border-radius: 8px;
+            border-left: 5px solid #3498db;
+            overflow-x: auto;
+            font-size: 0.9em;
+        }
+        
+        pre code {
+            background-color: transparent;
+            color: inherit;
+            padding: 0;
+        }
+        
+        blockquote {
+            background-color: #f8f9fa;
+            border-left: 5px solid #3498db;
+            margin: 1em 0;
+            padding: 15px 20px;
+            font-style: italic;
+            color: #555;
+        }
+        
+        table {
+            border-collapse: collapse;
+            width: 100%;
+            margin: 1.5em 0;
+            background-color: white;
+            border-radius: 8px;
+            overflow: hidden;
+            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
+        }
+        
+        th {
+            background-color: #3498db;
+            color: white;
+            padding: 12px 15px;
+            text-align: left;
+            font-weight: bold;
+        }
+        
+        td {
+            padding: 10px 15px;
+            border-bottom: 1px solid #ecf0f1;
+        }
+        
+        tr:nth-child(even) {
+            background-color: #f8f9fa;
+        }
+        
+        tr:hover {
+            background-color: #e8f4fd;
+        }
+        
+        ul, ol {
+            margin-bottom: 1em;
+            padding-left: 2em;
+        }
+        
+        li {
+            margin-bottom: 0.5em;
+            line-height: 1.6;
+        }
+        
+        a {
+            color: #3498db;
+            text-decoration: none;
+            border-bottom: 1px dotted #3498db;
+        }
+        
+        a:hover {
+            color: #2980b9;
+            border-bottom: 1px solid #2980b9;
+        }
+        
+        .toc {
+            background-color: #f8f9fa;
+            border: 2px solid #e9ecef;
+            border-radius: 8px;
+            padding: 20px;
+            margin: 2em 0;
+        }
+        
+        .toc h2 {
+            color: #2c3e50;
+            margin-top: 0;
+            border-bottom: none;
+        }
+        
+        .toc ul {
+            list-style-type: none;
+            padding-left: 0;
+        }
+        
+        .toc li {
+            margin-bottom: 0.8em;
+        }
+        
+        .toc a {
+            color: #2c3e50;
+            font-weight: 500;
+            border-bottom: none;
+        }
+        """
+        
+        # Create custom configuration
+        config = MarkdownConfig(
+            page_size="A4",
+            margin="0.8in",
+            font_size="11pt",
+            line_height="1.4",
+            include_toc=True,
+            syntax_highlighting=True,
+            custom_css=custom_css
+        )
+        
+        converter = EnhancedMarkdownConverter(config)
+        
+        # Convert academic sample with custom styling
+        academic_md_path = temp_dir / "academic_styled.md"
+        with open(academic_md_path, 'w', encoding='utf-8') as f:
+            f.write(samples['academic'])
+        
+        print("Converting academic paper with custom styling...")
+        print("Custom styling features:")
+        print("  - Custom color scheme (reds, purples, blues)")
+        print("  - Times New Roman serif font")
+        print("  - Enhanced table styling with hover effects")
+        print("  - Styled code blocks with dark theme")
+        print("  - Custom blockquote styling")
+        print("  - Professional header styling")
+        
+        success = converter.convert_file_to_pdf(
+            input_path=str(academic_md_path),
+            output_path=str(temp_dir / "academic_styled.pdf"),
+            method="weasyprint"  # WeasyPrint is best for custom CSS
+        )
+        
+        if success:
+            print("✅ Custom styling conversion successful!")
+            print(f"   Output: {temp_dir / 'academic_styled.pdf'}")
+            
+            # Also create a default version for comparison
+            default_converter = EnhancedMarkdownConverter()
+            default_success = default_converter.convert_file_to_pdf(
+                input_path=str(academic_md_path),
+                output_path=str(temp_dir / "academic_default.pdf"),
+                method="weasyprint"
+            )
+            
+            if default_success:
+                print(f"   Comparison (default): {temp_dir / 'academic_default.pdf'}")
+        else:
+            print("❌ Custom styling conversion failed")
+        
+        return success, temp_dir
+        
+    except Exception as e:
+        print(f"❌ Custom styling demonstration failed: {str(e)}")
+        return False, None
+
+
+def demonstrate_content_conversion():
+    """Demonstrate converting markdown content directly (not from file)"""
+    print("\n" + "=" * 60)
+    print("CONTENT CONVERSION DEMONSTRATION")
+    print("=" * 60)
+    
+    try:
+        # Create markdown content programmatically
+        dynamic_content = f"""# Dynamic Content Example
+
+## Generated Information
+This document was generated programmatically on {Path(__file__).name}.
+
+## System Information
+- **Python Path**: {sys.executable}
+- **Script Location**: {Path(__file__).absolute()}
+- **Working Directory**: {Path.cwd()}
+
+## Dynamic Table
+| Property | Value |
+|----------|-------|
+| Script Name | {Path(__file__).name} |
+| Python Version | {sys.version.split()[0]} |
+| Platform | {sys.platform} |
+
+## Code Example
+```python
+# This content was generated dynamically
+import sys
+from pathlib import Path
+
+def generate_report():
+    return f"Report generated from {{Path(__file__).name}}"
+
+print(generate_report())
+```
+
+## Features Demonstrated
+This example shows how to:
+1. Generate markdown content programmatically
+2. Convert content directly without saving to file first
+3. Include dynamic information in documents
+4. Use different conversion methods
+
+> **Note**: This content was created in memory and converted directly to PDF
+> without intermediate file storage.
+
+## Conclusion
+Direct content conversion is useful for:
+- Dynamic report generation
+- Programmatic document creation
+- API-based document services
+- Real-time content processing
+"""
+        
+        temp_dir = Path(tempfile.mkdtemp())
+        converter = EnhancedMarkdownConverter()
+        
+        print("Converting dynamically generated markdown content...")
+        print("Content includes:")
+        print("  - System information")
+        print("  - Dynamic tables with current values")
+        print("  - Generated timestamps")
+        print("  - Programmatic examples")
+        
+        # Convert content directly to PDF
+        output_path = temp_dir / "dynamic_content.pdf"
+        
+        success = converter.convert_markdown_to_pdf(
+            markdown_content=dynamic_content,
+            output_path=str(output_path),
+            method="auto"
+        )
+        
+        if success:
+            print("✅ Content conversion successful!")
+            print(f"   Output: {output_path}")
+            
+            # Show file size
+            file_size = output_path.stat().st_size
+            print(f"   Generated PDF size: {file_size} bytes")
+        else:
+            print("❌ Content conversion failed")
+        
+        return success, temp_dir
+        
+    except Exception as e:
+        print(f"❌ Content conversion demonstration failed: {str(e)}")
+        return False, None
+
+
+def demonstrate_error_handling():
+    """Demonstrate error handling and fallback mechanisms"""
+    print("\n" + "=" * 60)
+    print("ERROR HANDLING DEMONSTRATION")
+    print("=" * 60)
+    
+    try:
+        temp_dir = Path(tempfile.mkdtemp())
+        
+        # Test cases with various issues
+        test_cases = {
+            "invalid_markdown": """# Invalid Markdown
+            
+This markdown has some {{invalid}} syntax and [broken links](http://nonexistent.invalid).
+
+```unknown_language
+This code block uses an unknown language
+```
+
+![Missing Image](nonexistent_image.png)
+""",
+            "complex_content": """# Complex Content Test
+
+## Mathematical Expressions
+This tests content that might be challenging for some backends:
+
+$$ E = mc^2 $$
+
+$$\\sum_{i=1}^{n} x_i = \\frac{n(n+1)}{2}$$
+
+## Complex Tables
+| A | B | C | D | E | F | G |
+|---|---|---|---|---|---|---|
+| Very long content that might wrap | Short | Medium length content | X | Y | Z | End |
+| Another row with different lengths | A | B | C | D | E | F |
+
+## Special Characters
+Unicode: α, β, γ, δ, ε, ζ, η, θ, ι, κ, λ, μ, ν, ξ, ο, π, ρ, σ, τ, υ, φ, χ, ψ, ω
+Symbols: ♠ ♣ ♥ ♦ ☀ ☁ ☂ ☃ ☄ ★ ☆ ☉ ☊ ☋ ☌ ☍ ☎ ☏
+Arrows: ← ↑ → ↓ ↔ ↕ ↖ ↗ ↘ ↙
+""",
+            "empty_content": "",
+            "minimal_content": "# Just a title"
+        }
+        
+        print("Testing error handling with various content types...")
+        
+        results = {}
+        
+        for test_name, content in test_cases.items():
+            print(f"\nTesting: {test_name}")
+            
+            try:
+                # Try multiple backends for each test case
+                for backend in ["auto", "weasyprint", "pandoc"]:
+                    try:
+                        converter = EnhancedMarkdownConverter()
+                        output_path = temp_dir / f"{test_name}_{backend}.pdf"
+                        
+                        success = converter.convert_markdown_to_pdf(
+                            markdown_content=content,
+                            output_path=str(output_path),
+                            method=backend
+                        )
+                        
+                        if success:
+                            file_size = output_path.stat().st_size if output_path.exists() else 0
+                            print(f"  ✅ {backend}: Success ({file_size} bytes)")
+                            results[f"{test_name}_{backend}"] = {
+                                'success': True,
+                                'size': file_size
+                            }
+                        else:
+                            print(f"  ❌ {backend}: Failed")
+                            results[f"{test_name}_{backend}"] = {'success': False}
+                            
+                    except Exception as e:
+                        print(f"  ❌ {backend}: Error - {str(e)[:60]}...")
+                        results[f"{test_name}_{backend}"] = {
+                            'success': False,
+                            'error': str(e)
+                        }
+                        
+            except Exception as e:
+                print(f"  ❌ Test case failed: {str(e)}")
+        
+        # Demonstrate robust conversion with fallbacks
+        print(f"\nDemonstrating robust conversion with fallback logic...")
+        
+        def robust_convert(content, output_path):
+            """Convert with multiple backend fallbacks"""
+            backends = ["weasyprint", "pandoc", "auto"]
+            
+            for backend in backends:
+                try:
+                    converter = EnhancedMarkdownConverter()
+                    success = converter.convert_markdown_to_pdf(
+                        markdown_content=content,
+                        output_path=output_path,
+                        method=backend
+                    )
+                    if success:
+                        return backend, True
+                except Exception:
+                    continue
+            
+            return None, False
+        
+        # Test robust conversion
+        test_content = test_cases["complex_content"]
+        robust_output = temp_dir / "robust_conversion.pdf"
+        
+        successful_backend, success = robust_convert(test_content, str(robust_output))
+        
+        if success:
+            print(f"✅ Robust conversion successful using {successful_backend}")
+            print(f"   Output: {robust_output}")
+        else:
+            print("❌ All backends failed for robust conversion")
+        
+        # Summary
+        print("\n" + "-" * 40)
+        print("ERROR HANDLING SUMMARY")
+        print("-" * 40)
+        successful_conversions = sum(1 for r in results.values() if r.get('success', False))
+        total_attempts = len(results)
+        success_rate = (successful_conversions / total_attempts * 100) if total_attempts > 0 else 0
+        
+        print(f"Total conversion attempts: {total_attempts}")
+        print(f"Successful conversions: {successful_conversions}")
+        print(f"Success rate: {success_rate:.1f}%")
+        
+        return results, temp_dir
+        
+    except Exception as e:
+        print(f"❌ Error handling demonstration failed: {str(e)}")
+        return None, None
+
+
+def main():
+    """Main demonstration function"""
+    # Configure logging
+    logging.basicConfig(
+        level=logging.INFO,
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+    )
+    
+    print("RAG-Anything Enhanced Markdown Conversion Demonstration")
+    print("=" * 70)
+    print("This example demonstrates various enhanced markdown conversion capabilities:")
+    print("  - Basic markdown to PDF conversion")
+    print("  - Multiple backend comparison (WeasyPrint vs Pandoc)")
+    print("  - Custom CSS styling and professional formatting")
+    print("  - Direct content conversion without file I/O")
+    print("  - Comprehensive error handling and fallback mechanisms")
+    
+    results = {}
+    
+    # Run demonstrations
+    print("\n🚀 Starting demonstrations...")
+    
+    # Basic conversion
+    success, temp_dir = demonstrate_basic_conversion()
+    results['basic'] = success
+    
+    # Backend comparison
+    backend_results, _ = demonstrate_backend_comparison()
+    results['backends'] = backend_results
+    
+    # Custom styling
+    styling_success, _ = demonstrate_custom_styling()
+    results['styling'] = styling_success
+    
+    # Content conversion
+    content_success, _ = demonstrate_content_conversion()
+    results['content'] = content_success
+    
+    # Error handling
+    error_results, _ = demonstrate_error_handling()
+    results['error_handling'] = error_results
+    
+    # Summary
+    print("\n" + "=" * 70)
+    print("DEMONSTRATION SUMMARY")
+    print("=" * 70)
+    
+    print("✅ Features Successfully Demonstrated:")
+    if results['basic']:
+        print("  - Basic markdown to PDF conversion")
+    if results['backends']:
+        successful_backends = [b for b, r in results['backends'].items() if r.get('success', False)]
+        print(f"  - Multiple backends: {successful_backends}")
+    if results['styling']:
+        print("  - Custom CSS styling and professional formatting")
+    if results['content']:
+        print("  - Direct content conversion without file I/O")
+    if results['error_handling']:
+        success_rate = sum(1 for r in results['error_handling'].values() if r.get('success', False)) / len(results['error_handling']) * 100
+        print(f"  - Error handling with {success_rate:.1f}% overall success rate")
+    
+    print("\n📊 Key Capabilities Highlighted:")
+    print("  - Professional PDF generation with high-quality typography")
+    print("  - Multiple conversion backends with automatic selection")
+    print("  - Extensive CSS customization for branded documents")
+    print("  - Syntax highlighting for code blocks using Pygments")
+    print("  - Table formatting with professional styling")
+    print("  - Image embedding with proper scaling")
+    print("  - Table of contents generation with navigation")
+    print("  - Comprehensive error handling and fallback mechanisms")
+    
+    print("\n💡 Best Practices Demonstrated:")
+    print("  - Choose WeasyPrint for web-style documents and custom CSS")
+    print("  - Choose Pandoc for academic papers and complex formatting")
+    print("  - Use 'auto' method for general-purpose conversion")
+    print("  - Implement fallback logic for robust conversion")
+    print("  - Optimize images before embedding in documents")
+    print("  - Test custom CSS with simple content first")
+    print("  - Handle errors gracefully with multiple backend attempts")
+    print("  - Use appropriate page sizes and margins for target use case")
+    
+    print("\n🎯 Integration Patterns:")
+    print("  - Standalone conversion for document generation")
+    print("  - Integration with RAG-Anything document pipeline")
+    print("  - API-based document services")
+    print("  - Batch processing for multiple documents")
+    print("  - Dynamic content generation from templates")
+
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/raganything/enhanced_markdown.py b/raganything/enhanced_markdown.py
index ac9c577..a456192 100644
--- a/raganything/enhanced_markdown.py
+++ b/raganything/enhanced_markdown.py
@@ -319,6 +319,7 @@ class EnhancedMarkdownConverter:
                 "Pandoc not available. Install from: https://pandoc.org/installing.html"
             )
 
+        temp_md_path = None
         try:
             import subprocess
 
@@ -344,9 +345,6 @@ class EnhancedMarkdownConverter:
             # Run pandoc
             result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
 
-            # Clean up temp file
-            os.unlink(temp_md_path)
-
             if result.returncode == 0:
                 self.logger.info(
                     f"Successfully converted to PDF using Pandoc: {output_path}"
@@ -360,6 +358,15 @@ class EnhancedMarkdownConverter:
             self.logger.error(f"Pandoc conversion failed: {str(e)}")
             return False
 
+        finally:
+            if temp_md_path and os.path.exists(temp_md_path):
+                try:
+                    os.unlink(temp_md_path)
+                except OSError as e:
+                    self.logger.error(
+                        f"Failed to clean up temp file {temp_md_path}: {str(e)}"
+                    )
+
     def convert_markdown_to_pdf(
         self, markdown_content: str, output_path: str, method: str = "auto"
     ) -> bool:
diff --git a/requirements.txt b/requirements.txt
index f3a96dd..9cd2d0e 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,22 +1,10 @@
 huggingface_hub
 # LightRAG packages
 lightrag-hku
-
-# Enhanced markdown conversion (optional)
-markdown
-
-# Enhanced markdown conversion (optional)
-
 # MinerU 2.0 packages (replaces magic-pdf)
 mineru[core]
-pygments
-
 # Progress bars for batch processing
 tqdm
-
-# Progress bars for batch processing
-weasyprint
-
 # Note: Optional dependencies are now defined in setup.py extras_require:
 # - [image]: Pillow>=10.0.0 (for BMP, TIFF, GIF, WebP format conversion)
 # - [text]: reportlab>=4.0.0 (for TXT, MD to PDF conversion)
diff --git a/setup.py b/setup.py
index cc4e67f..3c2277a 100644
--- a/setup.py
+++ b/setup.py
@@ -64,6 +64,11 @@ extras_require = {
     "text": ["reportlab>=4.0.0"],  # For text file to PDF conversion (TXT, MD)
     "office": [],  # Office document processing requires LibreOffice (external program)
     "all": ["Pillow>=10.0.0", "reportlab>=4.0.0"],  # All optional features
+    "markdown": [
+        "markdown>=3.4.0",
+        "weasyprint>=60.0",
+        "pygments>=2.10.0",
+    ],  # Enhanced markdown conversion
 }
 
 setuptools.setup(