fix lint

2025-08-20 19:01:34 +03:00 · 2025-07-29 17:07:15 +08:00
parent 4f900db761
commit 5e56140300
5 changed files with 365 additions and 330 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -76,4 +76,4 @@ memory-bank/
 # AI
 .claude/
 .cursor/
-CLAUDE.md
+CLAUDE.md
--- a/docs/batch_processing.md
+++ b/docs/batch_processing.md
@@ -67,14 +67,14 @@ async def async_batch_processing():
        max_workers=4,
        show_progress=True
    )
-    
+
    # Process files asynchronously
    result = await batch_parser.process_batch_async(
        file_paths=["doc1.pdf", "doc2.docx"],
        output_dir="./output",
        parse_method="auto"
    )
-    
+
    return result

 # Run async processing
@@ -170,7 +170,7 @@ class BatchProcessingResult:
    processing_time: float           # Total processing time in seconds
    errors: Dict[str, str]           # Error messages for failed files
    output_dir: str                  # Output directory used
-    
+
    def summary(self) -> str:        # Human-readable summary
    def success_rate(self) -> float: # Success rate as percentage
 ```
@@ -181,16 +181,16 @@ class BatchProcessingResult:
 class BatchParser:
    def __init__(self, parser_type: str = "mineru", max_workers: int = 4, ...):
        """Initialize batch parser"""
-    
+
    def get_supported_extensions(self) -> List[str]:
        """Get list of supported file extensions"""
-    
+
    def filter_supported_files(self, file_paths: List[str], recursive: bool = True) -> List[str]:
        """Filter files to only supported types"""
-    
+
    def process_batch(self, file_paths: List[str], output_dir: str, ...) -> BatchProcessingResult:
        """Process files in batch"""
-    
+
    async def process_batch_async(self, file_paths: List[str], output_dir: str, ...) -> BatchProcessingResult:
        """Process files in batch asynchronously"""
 ```
@@ -312,16 +312,16 @@ result = batch_parser.process_batch(
 ```python
 def process_with_retry(file_paths, max_retries=3):
    """Process files with retry logic"""
-    
+
    for attempt in range(max_retries):
        result = batch_parser.process_batch(file_paths, "./output")
-        
+
        if not result.failed_files:
            break  # All files processed successfully
-        
+
        print(f"Attempt {attempt + 1}: {len(result.failed_files)} files failed")
        file_paths = result.failed_files  # Retry failed files
-    
+
    return result
 ```

@@ -338,4 +338,4 @@ def process_with_retry(file_paths, max_retries=3):

 ## Conclusion

-The batch processing feature significantly improves RAG-Anything's throughput for large document collections. It provides flexible configuration options, comprehensive error handling, and seamless integration with the existing RAG-Anything pipeline. 
+The batch processing feature significantly improves RAG-Anything's throughput for large document collections. It provides flexible configuration options, comprehensive error handling, and seamless integration with the existing RAG-Anything pipeline.
--- a/docs/enhanced_markdown.md
+++ b/docs/enhanced_markdown.md
@@ -96,21 +96,21 @@ config = MarkdownConfig(
    line_height="1.5",        # Line spacing
    include_toc=True,         # Generate table of contents
    syntax_highlighting=True, # Enable code syntax highlighting
-    
+
    # Custom CSS styling
    custom_css="""
-    body { 
-        font-family: 'Georgia', serif; 
+    body {
+        font-family: 'Georgia', serif;
        color: #333;
    }
-    h1 { 
-        color: #2c3e50; 
-        border-bottom: 2px solid #3498db; 
+    h1 {
+        color: #2c3e50;
+        border-bottom: 2px solid #3498db;
        padding-bottom: 0.3em;
    }
-    code { 
-        background-color: #f8f9fa; 
-        padding: 2px 4px; 
+    code {
+        background-color: #f8f9fa;
+        padding: 2px 4px;
        border-radius: 3px;
    }
    pre {
@@ -232,22 +232,22 @@ class MarkdownConfig:
    margin: str = "1in"                # CSS margin format
    font_size: str = "12pt"            # Base font size
    line_height: str = "1.5"           # Line spacing multiplier
-    
+
    # Content options
    include_toc: bool = True           # Generate table of contents
    syntax_highlighting: bool = True   # Enable code highlighting
    image_max_width: str = "100%"      # Maximum image width
    table_style: str = "..."           # Default table CSS
-    
+
    # Styling
    css_file: Optional[str] = None     # External CSS file path
    custom_css: Optional[str] = None   # Inline CSS content
    template_file: Optional[str] = None # Custom HTML template
-    
+
    # Output options
    output_format: str = "pdf"         # Currently only PDF supported
    output_dir: Optional[str] = None   # Output directory
-    
+
    # Metadata
    metadata: Optional[Dict[str, str]] = None  # Document metadata
 ```
@@ -347,7 +347,7 @@ This document provides comprehensive technical specifications.

 ### System Components
 1. **Parser Engine**: Handles document processing
-2. **Storage Layer**: Manages data persistence  
+2. **Storage Layer**: Manages data persistence
 3. **Query Interface**: Provides search capabilities

 ### Code Implementation
@@ -437,8 +437,8 @@ sudo apt-get install pandoc wkhtmltopdf
 # Use web-safe fonts
 config = MarkdownConfig(
    custom_css="""
-    body { 
-        font-family: 'Arial', 'Helvetica', sans-serif; 
+    body {
+        font-family: 'Arial', 'Helvetica', sans-serif;
    }
    """
 )
@@ -468,10 +468,10 @@ result = converter.convert_file_to_pdf("test.md", "test.pdf")
 def robust_conversion(input_path, output_path):
    """Convert with fallback backends"""
    converter = EnhancedMarkdownConverter()
-    
+
    # Try backends in order of preference
    backends = ["weasyprint", "pandoc", "auto"]
-    
+
    for backend in backends:
        try:
            success = converter.convert_file_to_pdf(
@@ -485,7 +485,7 @@ def robust_conversion(input_path, output_path):
        except Exception as e:
            print(f"❌ {backend} failed: {str(e)}")
            continue
-    
+
    print("❌ All backends failed")
    return False
 ```
@@ -498,19 +498,19 @@ def robust_conversion(input_path, output_path):
 class EnhancedMarkdownConverter:
    def __init__(self, config: Optional[MarkdownConfig] = None):
        """Initialize converter with optional configuration"""
-    
+
    def convert_file_to_pdf(self, input_path: str, output_path: str, method: str = "auto") -> bool:
        """Convert markdown file to PDF"""
-    
+
    def convert_markdown_to_pdf(self, markdown_content: str, output_path: str, method: str = "auto") -> bool:
        """Convert markdown content to PDF"""
-    
+
    def get_backend_info(self) -> Dict[str, Any]:
        """Get information about available backends"""
-    
+
    def convert_with_weasyprint(self, markdown_content: str, output_path: str) -> bool:
        """Convert using WeasyPrint backend"""
-    
+
    def convert_with_pandoc(self, markdown_content: str, output_path: str) -> bool:
        """Convert using Pandoc backend"""
 ```
@@ -549,4 +549,4 @@ class EnhancedMarkdownConverter:

 ## Conclusion

-The enhanced markdown conversion feature provides professional-quality PDF generation with flexible styling options and multiple backend support. It seamlessly integrates with RAG-Anything's document processing pipeline while offering standalone functionality for markdown-to-PDF conversion needs. 
+The enhanced markdown conversion feature provides professional-quality PDF generation with flexible styling options and multiple backend support. It seamlessly integrates with RAG-Anything's document processing pipeline while offering standalone functionality for markdown-to-PDF conversion needs.
--- a/examples/batch_processing_example.py
+++ b/examples/batch_processing_example.py
@@ -21,6 +21,7 @@ import time

 # Add project root directory to Python path
 import sys
+
 sys.path.append(str(Path(__file__).parent.parent))

 from raganything import RAGAnything, RAGAnythingConfig
@@ -31,13 +32,13 @@ def create_sample_documents():
    """Create sample documents for batch processing testing"""
    temp_dir = Path(tempfile.mkdtemp())
    sample_files = []
-    
+
    # Create various document types
    documents = {
        "document1.txt": "This is a simple text document for testing batch processing.",
        "document2.txt": "Another text document with different content.",
        "document3.md": """# Markdown Document
-        
+
 ## Introduction
 This is a markdown document for testing.

@@ -92,16 +93,16 @@ Batch processing is essential for large-scale document processing.

 ### Next Steps
 Continue development and testing of batch processing features.
-"""
+""",
    }
-    
+
    # Create files
    for filename, content in documents.items():
        file_path = temp_dir / filename
-        with open(file_path, 'w', encoding='utf-8') as f:
+        with open(file_path, "w", encoding="utf-8") as f:
            f.write(content)
        sample_files.append(str(file_path))
-    
+
    return sample_files, temp_dir


@@ -110,54 +111,54 @@ def demonstrate_basic_batch_processing():
    print("\n" + "=" * 60)
    print("BASIC BATCH PROCESSING DEMONSTRATION")
    print("=" * 60)
-    
+
    # Create sample documents
    sample_files, temp_dir = create_sample_documents()
-    
+
    try:
        print(f"Created {len(sample_files)} sample documents in: {temp_dir}")
        for file_path in sample_files:
            print(f"  - {Path(file_path).name}")
-        
+
        # Create batch parser
        batch_parser = BatchParser(
            parser_type="mineru",
            max_workers=3,
            show_progress=True,
            timeout_per_file=60,
-            skip_installation_check=True  # Skip installation check for demo
+            skip_installation_check=True,  # Skip installation check for demo
        )
-        
-        print(f"\nBatch parser configured:")
-        print(f"  - Parser type: mineru")
-        print(f"  - Max workers: 3")
-        print(f"  - Progress tracking: enabled")
-        print(f"  - Timeout per file: 60 seconds")
-        
+
+        print("\nBatch parser configured:")
+        print("  - Parser type: mineru")
+        print("  - Max workers: 3")
+        print("  - Progress tracking: enabled")
+        print("  - Timeout per file: 60 seconds")
+
        # Check supported extensions
        supported_extensions = batch_parser.get_supported_extensions()
        print(f"  - Supported extensions: {supported_extensions}")
-        
+
        # Filter files to supported types
        supported_files = batch_parser.filter_supported_files(sample_files)
-        print(f"\nFile filtering results:")
+        print("\nFile filtering results:")
        print(f"  - Total files: {len(sample_files)}")
        print(f"  - Supported files: {len(supported_files)}")
-        
+
        # Process batch
        output_dir = temp_dir / "batch_output"
-        print(f"\nStarting batch processing...")
+        print("\nStarting batch processing...")
        print(f"Output directory: {output_dir}")
-        
+
        start_time = time.time()
        result = batch_parser.process_batch(
            file_paths=supported_files,
            output_dir=str(output_dir),
            parse_method="auto",
-            recursive=False
+            recursive=False,
        )
        processing_time = time.time() - start_time
-        
+
        # Display results
        print("\n" + "-" * 40)
        print("BATCH PROCESSING RESULTS")
@@ -165,20 +166,20 @@ def demonstrate_basic_batch_processing():
        print(result.summary())
        print(f"Total processing time: {processing_time:.2f} seconds")
        print(f"Success rate: {result.success_rate:.1f}%")
-        
+
        if result.successful_files:
-            print(f"\nSuccessfully processed files:")
+            print("\nSuccessfully processed files:")
            for file_path in result.successful_files:
                print(f"  ✅ {Path(file_path).name}")
-        
+
        if result.failed_files:
-            print(f"\nFailed files:")
+            print("\nFailed files:")
            for file_path in result.failed_files:
                error = result.errors.get(file_path, "Unknown error")
                print(f"  ❌ {Path(file_path).name}: {error}")
-        
+
        return result
-        
+
    except Exception as e:
        print(f"❌ Batch processing demonstration failed: {str(e)}")
        return None
@@ -189,33 +190,33 @@ async def demonstrate_async_batch_processing():
    print("\n" + "=" * 60)
    print("ASYNCHRONOUS BATCH PROCESSING DEMONSTRATION")
    print("=" * 60)
-    
+
    # Create sample documents
    sample_files, temp_dir = create_sample_documents()
-    
+
    try:
        print(f"Processing {len(sample_files)} documents asynchronously...")
-        
+
        # Create batch parser
        batch_parser = BatchParser(
            parser_type="mineru",
            max_workers=2,
            show_progress=True,
-            skip_installation_check=True
+            skip_installation_check=True,
        )
-        
+
        # Process batch asynchronously
        output_dir = temp_dir / "async_output"
-        
+
        start_time = time.time()
        result = await batch_parser.process_batch_async(
            file_paths=sample_files,
            output_dir=str(output_dir),
            parse_method="auto",
-            recursive=False
+            recursive=False,
        )
        processing_time = time.time() - start_time
-        
+
        # Display results
        print("\n" + "-" * 40)
        print("ASYNC BATCH PROCESSING RESULTS")
@@ -223,9 +224,9 @@ async def demonstrate_async_batch_processing():
        print(result.summary())
        print(f"Async processing time: {processing_time:.2f} seconds")
        print(f"Success rate: {result.success_rate:.1f}%")
-        
+
        return result
-        
+
    except Exception as e:
        print(f"❌ Async batch processing demonstration failed: {str(e)}")
        return None
@@ -236,10 +237,10 @@ async def demonstrate_rag_integration():
    print("\n" + "=" * 60)
    print("RAG-ANYTHING BATCH INTEGRATION DEMONSTRATION")
    print("=" * 60)
-    
+
    # Create sample documents
    sample_files, temp_dir = create_sample_documents()
-    
+
    try:
        # Initialize RAG-Anything with temporary storage
        config = RAGAnythingConfig(
@@ -247,20 +248,20 @@ async def demonstrate_rag_integration():
            enable_image_processing=True,
            enable_table_processing=True,
            enable_equation_processing=True,
-            max_concurrent_files=2
+            max_concurrent_files=2,
        )
-        
+
        rag = RAGAnything(config=config)
-        
+
        print("RAG-Anything initialized with batch processing capabilities")
-        
+
        # Show available batch methods
-        batch_methods = [method for method in dir(rag) if 'batch' in method.lower()]
+        batch_methods = [method for method in dir(rag) if "batch" in method.lower()]
        print(f"Available batch methods: {batch_methods}")
-        
+
        # Demonstrate batch processing with RAG integration
        print(f"\nProcessing {len(sample_files)} documents with RAG integration...")
-        
+
        # Use the RAG-integrated batch processing
        try:
            # Process documents in batch
@@ -268,40 +269,46 @@ async def demonstrate_rag_integration():
                file_paths=sample_files,
                output_dir=str(temp_dir / "rag_batch_output"),
                max_workers=2,
-                show_progress=True
+                show_progress=True,
            )
-            
+
            print("\n" + "-" * 40)
            print("RAG BATCH PROCESSING RESULTS")
            print("-" * 40)
            print(result.summary())
            print(f"Success rate: {result.success_rate:.1f}%")
-            
+
            # Demonstrate batch processing with full RAG integration
-            print(f"\nProcessing documents with full RAG integration...")
-            
+            print("\nProcessing documents with full RAG integration...")
+
            rag_result = await rag.process_documents_with_rag_batch(
                file_paths=sample_files[:2],  # Process subset for demo
                output_dir=str(temp_dir / "rag_full_output"),
                max_workers=1,
-                show_progress=True
+                show_progress=True,
            )
-            
+
            print("\n" + "-" * 40)
            print("FULL RAG INTEGRATION RESULTS")
            print("-" * 40)
            print(f"Parse result: {rag_result['parse_result'].summary()}")
-            print(f"RAG processing time: {rag_result['total_processing_time']:.2f} seconds")
-            print(f"Successfully processed with RAG: {rag_result['successful_rag_files']}")
+            print(
+                f"RAG processing time: {rag_result['total_processing_time']:.2f} seconds"
+            )
+            print(
+                f"Successfully processed with RAG: {rag_result['successful_rag_files']}"
+            )
            print(f"Failed RAG processing: {rag_result['failed_rag_files']}")
-            
+
            return rag_result
-            
+
        except Exception as e:
            print(f"⚠️ RAG integration demo completed with limitations: {str(e)}")
-            print("Note: This is expected in environments without full API configuration")
+            print(
+                "Note: This is expected in environments without full API configuration"
+            )
            return None
-            
+
    except Exception as e:
        print(f"❌ RAG integration demonstration failed: {str(e)}")
        return None
@@ -312,79 +319,79 @@ def demonstrate_directory_processing():
    print("\n" + "=" * 60)
    print("DIRECTORY PROCESSING DEMONSTRATION")
    print("=" * 60)
-    
+
    # Create a directory structure with nested files
    temp_dir = Path(tempfile.mkdtemp())
-    
+
    # Create main directory files
    main_files = {
        "overview.txt": "Main directory overview document",
-        "readme.md": "# Project README\n\nThis is the main project documentation."
+        "readme.md": "# Project README\n\nThis is the main project documentation.",
    }
-    
+
    # Create subdirectory
    sub_dir = temp_dir / "subdirectory"
    sub_dir.mkdir()
-    
+
    sub_files = {
        "details.txt": "Detailed information in subdirectory",
-        "notes.md": "# Notes\n\nAdditional notes and information."
+        "notes.md": "# Notes\n\nAdditional notes and information.",
    }
-    
+
    # Write all files
    all_files = []
    for filename, content in main_files.items():
        file_path = temp_dir / filename
-        with open(file_path, 'w', encoding='utf-8') as f:
+        with open(file_path, "w", encoding="utf-8") as f:
            f.write(content)
        all_files.append(str(file_path))
-    
+
    for filename, content in sub_files.items():
        file_path = sub_dir / filename
-        with open(file_path, 'w', encoding='utf-8') as f:
+        with open(file_path, "w", encoding="utf-8") as f:
            f.write(content)
        all_files.append(str(file_path))
-    
+
    try:
-        print(f"Created directory structure:")
+        print("Created directory structure:")
        print(f"  Main directory: {temp_dir}")
        print(f"  Files in main: {list(main_files.keys())}")
        print(f"  Subdirectory: {sub_dir}")
        print(f"  Files in sub: {list(sub_files.keys())}")
-        
+
        # Create batch parser
        batch_parser = BatchParser(
            parser_type="mineru",
            max_workers=2,
            show_progress=True,
-            skip_installation_check=True
+            skip_installation_check=True,
        )
-        
+
        # Process entire directory recursively
-        print(f"\nProcessing entire directory recursively...")
-        
+        print("\nProcessing entire directory recursively...")
+
        result = batch_parser.process_batch(
            file_paths=[str(temp_dir)],  # Pass directory path
            output_dir=str(temp_dir / "directory_output"),
            parse_method="auto",
-            recursive=True  # Include subdirectories
+            recursive=True,  # Include subdirectories
        )
-        
+
        print("\n" + "-" * 40)
        print("DIRECTORY PROCESSING RESULTS")
        print("-" * 40)
        print(result.summary())
        print(f"Total files found and processed: {result.total_files}")
        print(f"Success rate: {result.success_rate:.1f}%")
-        
+
        if result.successful_files:
-            print(f"\nSuccessfully processed:")
+            print("\nSuccessfully processed:")
            for file_path in result.successful_files:
                relative_path = Path(file_path).relative_to(temp_dir)
                print(f"  ✅ {relative_path}")
-        
+
        return result
-        
+
    except Exception as e:
        print(f"❌ Directory processing demonstration failed: {str(e)}")
        return None
@@ -395,26 +402,26 @@ def demonstrate_error_handling():
    print("\n" + "=" * 60)
    print("ERROR HANDLING DEMONSTRATION")
    print("=" * 60)
-    
+
    temp_dir = Path(tempfile.mkdtemp())
-    
+
    # Create files with various issues
    files_with_issues = {
        "valid_file.txt": "This is a valid file that should process successfully.",
        "empty_file.txt": "",  # Empty file
        "large_file.txt": "x" * 1000000,  # Large file (1MB of 'x')
    }
-    
+
    created_files = []
    for filename, content in files_with_issues.items():
        file_path = temp_dir / filename
-        with open(file_path, 'w', encoding='utf-8') as f:
+        with open(file_path, "w", encoding="utf-8") as f:
            f.write(content)
        created_files.append(str(file_path))
-    
+
    # Add a non-existent file to the list
    created_files.append(str(temp_dir / "non_existent_file.txt"))
-    
+
    try:
        print(f"Testing error handling with {len(created_files)} files:")
        for file_path in created_files:
@@ -422,54 +429,56 @@ def demonstrate_error_handling():
            exists = Path(file_path).exists()
            size = Path(file_path).stat().st_size if exists else 0
            print(f"  - {name}: {'exists' if exists else 'missing'}, {size} bytes")
-        
+
        # Create batch parser with short timeout for demonstration
        batch_parser = BatchParser(
            parser_type="mineru",
            max_workers=2,
            show_progress=True,
            timeout_per_file=30,  # Short timeout for demo
-            skip_installation_check=True
+            skip_installation_check=True,
        )
-        
+
        # Process files and handle errors
        result = batch_parser.process_batch(
            file_paths=created_files,
            output_dir=str(temp_dir / "error_test_output"),
-            parse_method="auto"
+            parse_method="auto",
        )
-        
+
        print("\n" + "-" * 40)
        print("ERROR HANDLING RESULTS")
        print("-" * 40)
        print(result.summary())
-        
+
        if result.successful_files:
-            print(f"\nSuccessful files:")
+            print("\nSuccessful files:")
            for file_path in result.successful_files:
                print(f"  ✅ {Path(file_path).name}")
-        
+
        if result.failed_files:
-            print(f"\nFailed files with error details:")
+            print("\nFailed files with error details:")
            for file_path in result.failed_files:
                error = result.errors.get(file_path, "Unknown error")
                print(f"  ❌ {Path(file_path).name}: {error}")
-        
+
        # Demonstrate retry logic
        if result.failed_files:
-            print(f"\nDemonstrating retry logic for {len(result.failed_files)} failed files...")
-            
+            print(
+                f"\nDemonstrating retry logic for {len(result.failed_files)} failed files..."
+            )
+
            # Retry only the failed files
            retry_result = batch_parser.process_batch(
                file_paths=result.failed_files,
                output_dir=str(temp_dir / "retry_output"),
-                parse_method="auto"
+                parse_method="auto",
            )
-            
+
            print(f"Retry results: {retry_result.summary()}")
-        
+
        return result
-        
+
    except Exception as e:
        print(f"❌ Error handling demonstration failed: {str(e)}")
        return None
@@ -480,9 +489,9 @@ async def main():
    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
-        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    )
-    
+
    print("RAG-Anything Batch Processing Demonstration")
    print("=" * 70)
    print("This example demonstrates various batch processing capabilities:")
@@ -491,41 +500,43 @@ async def main():
    print("  - Integration with RAG-Anything pipeline")
    print("  - Directory processing with recursive file discovery")
    print("  - Comprehensive error handling and recovery")
-    
+
    results = {}
-    
+
    # Run demonstrations
    print("\n🚀 Starting demonstrations...")
-    
+
    # Basic batch processing
-    results['basic'] = demonstrate_basic_batch_processing()
-    
+    results["basic"] = demonstrate_basic_batch_processing()
+
    # Asynchronous processing
-    results['async'] = await demonstrate_async_batch_processing()
-    
+    results["async"] = await demonstrate_async_batch_processing()
+
    # RAG integration
-    results['rag'] = await demonstrate_rag_integration()
-    
+    results["rag"] = await demonstrate_rag_integration()
+
    # Directory processing
-    results['directory'] = demonstrate_directory_processing()
-    
+    results["directory"] = demonstrate_directory_processing()
+
    # Error handling
-    results['error_handling'] = demonstrate_error_handling()
-    
+    results["error_handling"] = demonstrate_error_handling()
+
    # Summary
    print("\n" + "=" * 70)
    print("DEMONSTRATION SUMMARY")
    print("=" * 70)
-    
+
    for demo_name, result in results.items():
        if result:
-            if hasattr(result, 'success_rate'):
-                print(f"✅ {demo_name.upper()}: {result.success_rate:.1f}% success rate")
+            if hasattr(result, "success_rate"):
+                print(
+                    f"✅ {demo_name.upper()}: {result.success_rate:.1f}% success rate"
+                )
            else:
                print(f"✅ {demo_name.upper()}: Completed successfully")
        else:
            print(f"❌ {demo_name.upper()}: Failed or had limitations")
-    
+
    print("\n📊 Key Features Demonstrated:")
    print("  - Parallel document processing with configurable worker counts")
    print("  - Real-time progress tracking with tqdm progress bars")
@@ -536,7 +547,7 @@ async def main():
    print("  - Integration with RAG-Anything document pipeline")
    print("  - Retry logic for failed documents")
    print("  - Detailed processing statistics and timing")
-    
+
    print("\n💡 Best Practices Highlighted:")
    print("  - Use appropriate worker counts for your system")
    print("  - Enable progress tracking for long-running operations")
@@ -547,4 +558,4 @@ async def main():


 if __name__ == "__main__":
-    asyncio.run(main()) 
+    asyncio.run(main())
--- a/examples/enhanced_markdown_example.py
+++ b/examples/enhanced_markdown_example.py
@@ -20,6 +20,7 @@ import tempfile

 # Add project root directory to Python path
 import sys
+
 sys.path.append(str(Path(__file__).parent.parent))

 from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownConfig
@@ -27,7 +28,7 @@ from raganything.enhanced_markdown import EnhancedMarkdownConverter, MarkdownCon

 def create_sample_markdown_content():
    """Create comprehensive sample markdown content for testing"""
-    
+
    # Basic sample
    basic_content = """# Basic Markdown Sample

@@ -136,7 +137,7 @@ converter:

 ### Processing Times
 - **Small documents** (< 10 pages): 1-3 seconds
- **Medium documents** (10-50 pages): 3-10 seconds  
+- **Medium documents** (10-50 pages): 3-10 seconds
 - **Large documents** (> 50 pages): 10-30 seconds

 ## Advanced Features
@@ -200,17 +201,17 @@ The enhanced markdown conversion system provides professional-quality PDF genera

 ---

-*Generated on: 2024-01-15*  
+*Generated on: 2024-01-15*
 *Version: 1.0.0*
 """

    # Academic paper sample
    academic_content = """# Research Paper: Advanced Document Processing

-**Authors:** Alice Johnson¹, Bob Smith², Carol Williams¹  
-**Affiliations:**  
-¹ University of Technology  
-² Research Institute  
+**Authors:** Alice Johnson¹, Bob Smith², Carol Williams¹
+**Affiliations:**
+¹ University of Technology
+² Research Institute

 ## Abstract

@@ -294,18 +295,18 @@ Both backends support syntax highlighting through Pygments:
 def analyze_performance(backend, documents):
    '''Analyze conversion performance for given backend'''
    results = []
-    
+
    for doc in documents:
        start_time = time.time()
        success = backend.convert(doc)
        end_time = time.time()
-        
+
        results.append({
            'document': doc,
            'time': end_time - start_time,
            'success': success
        })
-    
+
    return results
 ```

@@ -356,15 +357,15 @@ This research demonstrates that enhanced markdown conversion provides significan

 ---

-**Manuscript received:** January 10, 2024  
-**Accepted for publication:** January 15, 2024  
+**Manuscript received:** January 10, 2024
+**Accepted for publication:** January 15, 2024
 **Published online:** January 20, 2024
 """

    return {
        "basic": basic_content,
        "technical": technical_content,
-        "academic": academic_content
+        "academic": academic_content,
    }


@@ -373,11 +374,11 @@ def demonstrate_basic_conversion():
    print("\n" + "=" * 60)
    print("BASIC MARKDOWN CONVERSION DEMONSTRATION")
    print("=" * 60)
-    
+
    try:
        # Create converter with default settings
        converter = EnhancedMarkdownConverter()
-        
+
        # Show backend information
        backend_info = converter.get_backend_info()
        print("Available conversion backends:")
@@ -385,32 +386,32 @@ def demonstrate_basic_conversion():
            status = "✅" if available else "❌"
            print(f"  {status} {backend}")
        print(f"Recommended backend: {backend_info['recommended_backend']}")
-        
+
        # Get sample content
        samples = create_sample_markdown_content()
        temp_dir = Path(tempfile.mkdtemp())
-        
+
        # Convert basic sample
        basic_md_path = temp_dir / "basic_sample.md"
-        with open(basic_md_path, 'w', encoding='utf-8') as f:
-            f.write(samples['basic'])
-        
+        with open(basic_md_path, "w", encoding="utf-8") as f:
+            f.write(samples["basic"])
+
        print(f"\nConverting basic sample: {basic_md_path}")
-        
+
        success = converter.convert_file_to_pdf(
            input_path=str(basic_md_path),
            output_path=str(temp_dir / "basic_sample.pdf"),
-            method="auto"  # Let the system choose the best backend
+            method="auto",  # Let the system choose the best backend
        )
-        
+
        if success:
            print("✅ Basic conversion successful!")
            print(f"   Output: {temp_dir / 'basic_sample.pdf'}")
        else:
            print("❌ Basic conversion failed")
-        
+
        return success, temp_dir
-        
+
    except Exception as e:
        print(f"❌ Basic conversion demonstration failed: {str(e)}")
        return False, None
@@ -421,71 +422,76 @@ def demonstrate_backend_comparison():
    print("\n" + "=" * 60)
    print("BACKEND COMPARISON DEMONSTRATION")
    print("=" * 60)
-    
+
    try:
        samples = create_sample_markdown_content()
        temp_dir = Path(tempfile.mkdtemp())
-        
+
        # Create technical document
        tech_md_path = temp_dir / "technical.md"
-        with open(tech_md_path, 'w', encoding='utf-8') as f:
-            f.write(samples['technical'])
-        
-        print(f"Testing different backends with technical document...")
-        
+        with open(tech_md_path, "w", encoding="utf-8") as f:
+            f.write(samples["technical"])
+
+        print("Testing different backends with technical document...")
+
        # Test different backends
        backends = ["auto", "weasyprint", "pandoc"]
        results = {}
-        
+
        for backend in backends:
            try:
                print(f"\nTesting {backend} backend...")
-                
+
                converter = EnhancedMarkdownConverter()
                output_path = temp_dir / f"technical_{backend}.pdf"
-                
+
                import time
+
                start_time = time.time()
-                
+
                success = converter.convert_file_to_pdf(
                    input_path=str(tech_md_path),
                    output_path=str(output_path),
-                    method=backend
+                    method=backend,
                )
-                
+
                end_time = time.time()
                conversion_time = end_time - start_time
-                
+
                if success:
-                    file_size = output_path.stat().st_size if output_path.exists() else 0
-                    print(f"  ✅ {backend}: Success in {conversion_time:.2f}s, {file_size} bytes")
+                    file_size = (
+                        output_path.stat().st_size if output_path.exists() else 0
+                    )
+                    print(
+                        f"  ✅ {backend}: Success in {conversion_time:.2f}s, {file_size} bytes"
+                    )
                    results[backend] = {
-                        'success': True,
-                        'time': conversion_time,
-                        'size': file_size,
-                        'output': str(output_path)
+                        "success": True,
+                        "time": conversion_time,
+                        "size": file_size,
+                        "output": str(output_path),
                    }
                else:
                    print(f"  ❌ {backend}: Failed")
-                    results[backend] = {'success': False, 'time': conversion_time}
-                    
+                    results[backend] = {"success": False, "time": conversion_time}
+
            except Exception as e:
                print(f"  ❌ {backend}: Error - {str(e)}")
-                results[backend] = {'success': False, 'error': str(e)}
-        
+                results[backend] = {"success": False, "error": str(e)}
+
        # Summary
        print("\n" + "-" * 40)
        print("BACKEND COMPARISON SUMMARY")
        print("-" * 40)
-        successful_backends = [b for b, r in results.items() if r.get('success', False)]
+        successful_backends = [b for b, r in results.items() if r.get("success", False)]
        print(f"Successful backends: {successful_backends}")
-        
+
        if successful_backends:
-            fastest = min(successful_backends, key=lambda b: results[b]['time'])
+            fastest = min(successful_backends, key=lambda b: results[b]["time"])
            print(f"Fastest backend: {fastest} ({results[fastest]['time']:.2f}s)")
-        
+
        return results, temp_dir
-        
+
    except Exception as e:
        print(f"❌ Backend comparison demonstration failed: {str(e)}")
        return None, None
@@ -496,11 +502,11 @@ def demonstrate_custom_styling():
    print("\n" + "=" * 60)
    print("CUSTOM STYLING DEMONSTRATION")
    print("=" * 60)
-    
+
    try:
        samples = create_sample_markdown_content()
        temp_dir = Path(tempfile.mkdtemp())
-        
+
        # Create custom CSS
        custom_css = """
        body {
@@ -512,7 +518,7 @@ def demonstrate_custom_styling():
            margin: 0 auto;
            padding: 20px;
        }
-        
+
        h1 {
            color: #c0392b;
            font-size: 2.2em;
@@ -520,7 +526,7 @@ def demonstrate_custom_styling():
            padding-bottom: 0.5em;
            margin-top: 2em;
        }
-        
+
        h2 {
            color: #8e44ad;
            font-size: 1.6em;
@@ -528,13 +534,13 @@ def demonstrate_custom_styling():
            padding-bottom: 0.3em;
            margin-top: 1.5em;
        }
-        
+
        h3 {
            color: #2980b9;
            font-size: 1.3em;
            margin-top: 1.2em;
        }
-        
+
        code {
            background-color: #ecf0f1;
            color: #e74c3c;
@@ -543,7 +549,7 @@ def demonstrate_custom_styling():
            font-family: 'Courier New', monospace;
            font-size: 0.9em;
        }
-        
+
        pre {
            background-color: #2c3e50;
            color: #ecf0f1;
@@ -553,13 +559,13 @@ def demonstrate_custom_styling():
            overflow-x: auto;
            font-size: 0.9em;
        }
-        
+
        pre code {
            background-color: transparent;
            color: inherit;
            padding: 0;
        }
-        
+
        blockquote {
            background-color: #f8f9fa;
            border-left: 5px solid #3498db;
@@ -568,7 +574,7 @@ def demonstrate_custom_styling():
            font-style: italic;
            color: #555;
        }
-        
+
        table {
            border-collapse: collapse;
            width: 100%;
@@ -578,7 +584,7 @@ def demonstrate_custom_styling():
            overflow: hidden;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
-        
+
        th {
            background-color: #3498db;
            color: white;
@@ -586,41 +592,41 @@ def demonstrate_custom_styling():
            text-align: left;
            font-weight: bold;
        }
-        
+
        td {
            padding: 10px 15px;
            border-bottom: 1px solid #ecf0f1;
        }
-        
+
        tr:nth-child(even) {
            background-color: #f8f9fa;
        }
-        
+
        tr:hover {
            background-color: #e8f4fd;
        }
-        
+
        ul, ol {
            margin-bottom: 1em;
            padding-left: 2em;
        }
-        
+
        li {
            margin-bottom: 0.5em;
            line-height: 1.6;
        }
-        
+
        a {
            color: #3498db;
            text-decoration: none;
            border-bottom: 1px dotted #3498db;
        }
-        
+
        a:hover {
            color: #2980b9;
            border-bottom: 1px solid #2980b9;
        }
-        
+
        .toc {
            background-color: #f8f9fa;
            border: 2px solid #e9ecef;
@@ -628,29 +634,29 @@ def demonstrate_custom_styling():
            padding: 20px;
            margin: 2em 0;
        }
-        
+
        .toc h2 {
            color: #2c3e50;
            margin-top: 0;
            border-bottom: none;
        }
-        
+
        .toc ul {
            list-style-type: none;
            padding-left: 0;
        }
-        
+
        .toc li {
            margin-bottom: 0.8em;
        }
-        
+
        .toc a {
            color: #2c3e50;
            font-weight: 500;
            border-bottom: none;
        }
        """
-        
+
        # Create custom configuration
        config = MarkdownConfig(
            page_size="A4",
@@ -659,16 +665,16 @@ def demonstrate_custom_styling():
            line_height="1.4",
            include_toc=True,
            syntax_highlighting=True,
-            custom_css=custom_css
+            custom_css=custom_css,
        )
-        
+
        converter = EnhancedMarkdownConverter(config)
-        
+
        # Convert academic sample with custom styling
        academic_md_path = temp_dir / "academic_styled.md"
-        with open(academic_md_path, 'w', encoding='utf-8') as f:
-            f.write(samples['academic'])
-        
+        with open(academic_md_path, "w", encoding="utf-8") as f:
+            f.write(samples["academic"])
+
        print("Converting academic paper with custom styling...")
        print("Custom styling features:")
        print("  - Custom color scheme (reds, purples, blues)")
@@ -677,32 +683,32 @@ def demonstrate_custom_styling():
        print("  - Styled code blocks with dark theme")
        print("  - Custom blockquote styling")
        print("  - Professional header styling")
-        
+
        success = converter.convert_file_to_pdf(
            input_path=str(academic_md_path),
            output_path=str(temp_dir / "academic_styled.pdf"),
-            method="weasyprint"  # WeasyPrint is best for custom CSS
+            method="weasyprint",  # WeasyPrint is best for custom CSS
        )
-        
+
        if success:
            print("✅ Custom styling conversion successful!")
            print(f"   Output: {temp_dir / 'academic_styled.pdf'}")
-            
+
            # Also create a default version for comparison
            default_converter = EnhancedMarkdownConverter()
            default_success = default_converter.convert_file_to_pdf(
                input_path=str(academic_md_path),
                output_path=str(temp_dir / "academic_default.pdf"),
-                method="weasyprint"
+                method="weasyprint",
            )
-            
+
            if default_success:
                print(f"   Comparison (default): {temp_dir / 'academic_default.pdf'}")
        else:
            print("❌ Custom styling conversion failed")
-        
+
        return success, temp_dir
-        
+
    except Exception as e:
        print(f"❌ Custom styling demonstration failed: {str(e)}")
        return False, None
@@ -713,7 +719,7 @@ def demonstrate_content_conversion():
    print("\n" + "=" * 60)
    print("CONTENT CONVERSION DEMONSTRATION")
    print("=" * 60)
-    
+
    try:
        # Create markdown content programmatically
        dynamic_content = f"""# Dynamic Content Example
@@ -762,38 +768,38 @@ Direct content conversion is useful for:
 - API-based document services
 - Real-time content processing
 """
-        
+
        temp_dir = Path(tempfile.mkdtemp())
        converter = EnhancedMarkdownConverter()
-        
+
        print("Converting dynamically generated markdown content...")
        print("Content includes:")
        print("  - System information")
        print("  - Dynamic tables with current values")
        print("  - Generated timestamps")
        print("  - Programmatic examples")
-        
+
        # Convert content directly to PDF
        output_path = temp_dir / "dynamic_content.pdf"
-        
+
        success = converter.convert_markdown_to_pdf(
            markdown_content=dynamic_content,
            output_path=str(output_path),
-            method="auto"
+            method="auto",
        )
-        
+
        if success:
            print("✅ Content conversion successful!")
            print(f"   Output: {output_path}")
-            
+
            # Show file size
            file_size = output_path.stat().st_size
            print(f"   Generated PDF size: {file_size} bytes")
        else:
            print("❌ Content conversion failed")
-        
+
        return success, temp_dir
-        
+
    except Exception as e:
        print(f"❌ Content conversion demonstration failed: {str(e)}")
        return False, None
@@ -804,14 +810,14 @@ def demonstrate_error_handling():
    print("\n" + "=" * 60)
    print("ERROR HANDLING DEMONSTRATION")
    print("=" * 60)
-    
+
    try:
        temp_dir = Path(tempfile.mkdtemp())
-        
+
        # Test cases with various issues
        test_cases = {
            "invalid_markdown": """# Invalid Markdown
-            
+
 This markdown has some {{invalid}} syntax and [broken links](http://nonexistent.invalid).

 ```unknown_language
@@ -841,98 +847,106 @@ Symbols: ♠ ♣ ♥ ♦ ☀ ☁ ☂ ☃ ☄ ★ ☆ ☉ ☊ ☋ ☌ ☍ ☎ ☏
 Arrows: ← ↑ → ↓ ↔ ↕ ↖ ↗ ↘ ↙
 """,
            "empty_content": "",
-            "minimal_content": "# Just a title"
+            "minimal_content": "# Just a title",
        }
-        
+
        print("Testing error handling with various content types...")
-        
+
        results = {}
-        
+
        for test_name, content in test_cases.items():
            print(f"\nTesting: {test_name}")
-            
+
            try:
                # Try multiple backends for each test case
                for backend in ["auto", "weasyprint", "pandoc"]:
                    try:
                        converter = EnhancedMarkdownConverter()
                        output_path = temp_dir / f"{test_name}_{backend}.pdf"
-                        
+
                        success = converter.convert_markdown_to_pdf(
                            markdown_content=content,
                            output_path=str(output_path),
-                            method=backend
+                            method=backend,
                        )
-                        
+
                        if success:
-                            file_size = output_path.stat().st_size if output_path.exists() else 0
+                            file_size = (
+                                output_path.stat().st_size
+                                if output_path.exists()
+                                else 0
+                            )
                            print(f"  ✅ {backend}: Success ({file_size} bytes)")
                            results[f"{test_name}_{backend}"] = {
-                                'success': True,
-                                'size': file_size
+                                "success": True,
+                                "size": file_size,
                            }
                        else:
                            print(f"  ❌ {backend}: Failed")
-                            results[f"{test_name}_{backend}"] = {'success': False}
-                            
+                            results[f"{test_name}_{backend}"] = {"success": False}
+
                    except Exception as e:
                        print(f"  ❌ {backend}: Error - {str(e)[:60]}...")
                        results[f"{test_name}_{backend}"] = {
-                            'success': False,
-                            'error': str(e)
+                            "success": False,
+                            "error": str(e),
                        }
-                        
+
            except Exception as e:
                print(f"  ❌ Test case failed: {str(e)}")
-        
+
        # Demonstrate robust conversion with fallbacks
-        print(f"\nDemonstrating robust conversion with fallback logic...")
-        
+        print("\nDemonstrating robust conversion with fallback logic...")
+
        def robust_convert(content, output_path):
            """Convert with multiple backend fallbacks"""
            backends = ["weasyprint", "pandoc", "auto"]
-            
+
            for backend in backends:
                try:
                    converter = EnhancedMarkdownConverter()
                    success = converter.convert_markdown_to_pdf(
                        markdown_content=content,
                        output_path=output_path,
-                        method=backend
+                        method=backend,
                    )
                    if success:
                        return backend, True
                except Exception:
                    continue
-            
+
            return None, False
-        
+
        # Test robust conversion
        test_content = test_cases["complex_content"]
        robust_output = temp_dir / "robust_conversion.pdf"
-        
+
        successful_backend, success = robust_convert(test_content, str(robust_output))
-        
+
        if success:
            print(f"✅ Robust conversion successful using {successful_backend}")
            print(f"   Output: {robust_output}")
        else:
            print("❌ All backends failed for robust conversion")
-        
+
        # Summary
        print("\n" + "-" * 40)
        print("ERROR HANDLING SUMMARY")
        print("-" * 40)
-        successful_conversions = sum(1 for r in results.values() if r.get('success', False))
+        successful_conversions = sum(
+            1 for r in results.values() if r.get("success", False)
+        )
        total_attempts = len(results)
-        success_rate = (successful_conversions / total_attempts * 100) if total_attempts > 0 else 0
-        
+        success_rate = (
+            (successful_conversions / total_attempts * 100) if total_attempts > 0 else 0
+        )
+
        print(f"Total conversion attempts: {total_attempts}")
        print(f"Successful conversions: {successful_conversions}")
        print(f"Success rate: {success_rate:.1f}%")
-        
+
        return results, temp_dir
-        
+
    except Exception as e:
        print(f"❌ Error handling demonstration failed: {str(e)}")
        return None, None
@@ -943,62 +957,72 @@ def main():
    # Configure logging
    logging.basicConfig(
        level=logging.INFO,
-        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+        format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    )
-    
+
    print("RAG-Anything Enhanced Markdown Conversion Demonstration")
    print("=" * 70)
-    print("This example demonstrates various enhanced markdown conversion capabilities:")
+    print(
+        "This example demonstrates various enhanced markdown conversion capabilities:"
+    )
    print("  - Basic markdown to PDF conversion")
    print("  - Multiple backend comparison (WeasyPrint vs Pandoc)")
    print("  - Custom CSS styling and professional formatting")
    print("  - Direct content conversion without file I/O")
    print("  - Comprehensive error handling and fallback mechanisms")
-    
+
    results = {}
-    
+
    # Run demonstrations
    print("\n🚀 Starting demonstrations...")
-    
+
    # Basic conversion
    success, temp_dir = demonstrate_basic_conversion()
-    results['basic'] = success
-    
+    results["basic"] = success
+
    # Backend comparison
    backend_results, _ = demonstrate_backend_comparison()
-    results['backends'] = backend_results
-    
+    results["backends"] = backend_results
+
    # Custom styling
    styling_success, _ = demonstrate_custom_styling()
-    results['styling'] = styling_success
-    
+    results["styling"] = styling_success
+
    # Content conversion
    content_success, _ = demonstrate_content_conversion()
-    results['content'] = content_success
-    
+    results["content"] = content_success
+
    # Error handling
    error_results, _ = demonstrate_error_handling()
-    results['error_handling'] = error_results
-    
+    results["error_handling"] = error_results
+
    # Summary
    print("\n" + "=" * 70)
    print("DEMONSTRATION SUMMARY")
    print("=" * 70)
-    
+
    print("✅ Features Successfully Demonstrated:")
-    if results['basic']:
+    if results["basic"]:
        print("  - Basic markdown to PDF conversion")
-    if results['backends']:
-        successful_backends = [b for b, r in results['backends'].items() if r.get('success', False)]
+    if results["backends"]:
+        successful_backends = [
+            b for b, r in results["backends"].items() if r.get("success", False)
+        ]
        print(f"  - Multiple backends: {successful_backends}")
-    if results['styling']:
+    if results["styling"]:
        print("  - Custom CSS styling and professional formatting")
-    if results['content']:
+    if results["content"]:
        print("  - Direct content conversion without file I/O")
-    if results['error_handling']:
-        success_rate = sum(1 for r in results['error_handling'].values() if r.get('success', False)) / len(results['error_handling']) * 100
+    if results["error_handling"]:
+        success_rate = (
+            sum(
+                1 for r in results["error_handling"].values() if r.get("success", False)
+            )
+            / len(results["error_handling"])
+            * 100
+        )
        print(f"  - Error handling with {success_rate:.1f}% overall success rate")
-    
+
    print("\n📊 Key Capabilities Highlighted:")
    print("  - Professional PDF generation with high-quality typography")
    print("  - Multiple conversion backends with automatic selection")
@@ -1008,7 +1032,7 @@ def main():
    print("  - Image embedding with proper scaling")
    print("  - Table of contents generation with navigation")
    print("  - Comprehensive error handling and fallback mechanisms")
-    
+
    print("\n💡 Best Practices Demonstrated:")
    print("  - Choose WeasyPrint for web-style documents and custom CSS")
    print("  - Choose Pandoc for academic papers and complex formatting")
@@ -1018,7 +1042,7 @@ def main():
    print("  - Test custom CSS with simple content first")
    print("  - Handle errors gracefully with multiple backend attempts")
    print("  - Use appropriate page sizes and margins for target use case")
-    
+
    print("\n🎯 Integration Patterns:")
    print("  - Standalone conversion for document generation")
    print("  - Integration with RAG-Anything document pipeline")
@@ -1028,4 +1052,4 @@ def main():


 if __name__ == "__main__":
-    main() 
+    main()