mirror of
https://github.com/HKUDS/RAG-Anything.git
synced 2025-08-09 13:53:04 +03:00
236 lines
7.6 KiB
Python
236 lines
7.6 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Image Format Parsing Test Script for RAG-Anything
|
||
|
||
This script demonstrates how to parse various image formats
|
||
using MinerU, including JPG, PNG, BMP, TIFF, GIF, and WebP files.
|
||
|
||
Requirements:
|
||
- PIL/Pillow library for format conversion
|
||
- RAG-Anything package
|
||
|
||
Usage:
|
||
python image_format_test.py --file path/to/image.bmp
|
||
"""
|
||
|
||
import argparse
|
||
import asyncio
|
||
import sys
|
||
from pathlib import Path
|
||
from raganything import RAGAnything
|
||
|
||
|
||
def check_pillow_installation():
|
||
"""Check if PIL/Pillow is installed and available"""
|
||
try:
|
||
from PIL import Image
|
||
|
||
print(
|
||
f"✅ PIL/Pillow found: PIL version {Image.__version__ if hasattr(Image, '__version__') else 'Unknown'}"
|
||
)
|
||
return True
|
||
except ImportError:
|
||
print("❌ PIL/Pillow not found. Please install Pillow:")
|
||
print(" pip install Pillow")
|
||
return False
|
||
|
||
|
||
def get_image_info(image_path: Path):
|
||
"""Get detailed image information"""
|
||
try:
|
||
from PIL import Image
|
||
|
||
with Image.open(image_path) as img:
|
||
return {
|
||
"format": img.format,
|
||
"mode": img.mode,
|
||
"size": img.size,
|
||
"has_transparency": img.mode in ("RGBA", "LA")
|
||
or "transparency" in img.info,
|
||
}
|
||
except Exception as e:
|
||
return {"error": str(e)}
|
||
|
||
|
||
async def test_image_format_parsing(file_path: str):
|
||
"""Test image format parsing with MinerU"""
|
||
|
||
print(f"🧪 Testing image format parsing: {file_path}")
|
||
|
||
# Check if file exists and is a supported image format
|
||
file_path = Path(file_path)
|
||
if not file_path.exists():
|
||
print(f"❌ File does not exist: {file_path}")
|
||
return False
|
||
|
||
supported_extensions = {
|
||
".jpg",
|
||
".jpeg",
|
||
".png",
|
||
".bmp",
|
||
".tiff",
|
||
".tif",
|
||
".gif",
|
||
".webp",
|
||
}
|
||
if file_path.suffix.lower() not in supported_extensions:
|
||
print(f"❌ Unsupported file format: {file_path.suffix}")
|
||
print(f" Supported formats: {', '.join(supported_extensions)}")
|
||
return False
|
||
|
||
print(f"📸 File format: {file_path.suffix.upper()}")
|
||
print(f"📏 File size: {file_path.stat().st_size / 1024:.1f} KB")
|
||
|
||
# Get detailed image information
|
||
img_info = get_image_info(file_path)
|
||
if "error" not in img_info:
|
||
print("🖼️ Image info:")
|
||
print(f" • Format: {img_info['format']}")
|
||
print(f" • Mode: {img_info['mode']}")
|
||
print(f" • Size: {img_info['size'][0]}x{img_info['size'][1]}")
|
||
print(f" • Has transparency: {img_info['has_transparency']}")
|
||
|
||
# Check format compatibility with MinerU
|
||
mineru_native_formats = {".jpg", ".jpeg", ".png"}
|
||
needs_conversion = file_path.suffix.lower() not in mineru_native_formats
|
||
|
||
if needs_conversion:
|
||
print(
|
||
f"ℹ️ Format {file_path.suffix.upper()} will be converted to PNG for MinerU compatibility"
|
||
)
|
||
else:
|
||
print(f"✅ Format {file_path.suffix.upper()} is natively supported by MinerU")
|
||
|
||
# Initialize RAGAnything (only for parsing functionality)
|
||
rag = RAGAnything()
|
||
|
||
try:
|
||
# Test image parsing with MinerU
|
||
print("\n🔄 Testing image parsing with MinerU...")
|
||
content_list, md_content = await rag.parse_document(
|
||
file_path=str(file_path),
|
||
output_dir="./test_output",
|
||
parse_method="ocr", # Images use OCR method
|
||
display_stats=True,
|
||
)
|
||
|
||
print("✅ Parsing successful!")
|
||
print(f" 📊 Content blocks: {len(content_list)}")
|
||
print(f" 📝 Markdown length: {len(md_content)} characters")
|
||
|
||
# Analyze content types
|
||
content_types = {}
|
||
for item in content_list:
|
||
if isinstance(item, dict):
|
||
content_type = item.get("type", "unknown")
|
||
content_types[content_type] = content_types.get(content_type, 0) + 1
|
||
|
||
if content_types:
|
||
print(" 📋 Content distribution:")
|
||
for content_type, count in sorted(content_types.items()):
|
||
print(f" • {content_type}: {count}")
|
||
|
||
# Display extracted text (if any)
|
||
if md_content.strip():
|
||
print("\n📄 Extracted text preview (first 500 characters):")
|
||
preview = md_content.strip()[:500]
|
||
print(f" {preview}{'...' if len(md_content) > 500 else ''}")
|
||
else:
|
||
print("\n📄 No text extracted from the image")
|
||
|
||
# Display image processing results
|
||
image_items = [
|
||
item
|
||
for item in content_list
|
||
if isinstance(item, dict) and item.get("type") == "image"
|
||
]
|
||
if image_items:
|
||
print(f"\n🖼️ Found {len(image_items)} processed image(s):")
|
||
for i, item in enumerate(image_items, 1):
|
||
print(f" {i}. Image path: {item.get('img_path', 'N/A')}")
|
||
if item.get("img_caption"):
|
||
print(
|
||
f" Caption: {item.get('img_caption', [])[0] if item.get('img_caption') else 'N/A'}"
|
||
)
|
||
|
||
# Display text blocks (OCR results)
|
||
text_items = [
|
||
item
|
||
for item in content_list
|
||
if isinstance(item, dict) and item.get("type") == "text"
|
||
]
|
||
if text_items:
|
||
print("\n📝 OCR text blocks found:")
|
||
for i, item in enumerate(text_items, 1):
|
||
text_content = item.get("text", "")
|
||
if text_content.strip():
|
||
preview = text_content.strip()[:200]
|
||
print(
|
||
f" {i}. {preview}{'...' if len(text_content) > 200 else ''}"
|
||
)
|
||
|
||
# Check for any tables detected in the image
|
||
table_items = [
|
||
item
|
||
for item in content_list
|
||
if isinstance(item, dict) and item.get("type") == "table"
|
||
]
|
||
if table_items:
|
||
print(f"\n📊 Found {len(table_items)} table(s) in image:")
|
||
for i, item in enumerate(table_items, 1):
|
||
print(f" {i}. Table detected with content")
|
||
|
||
print("\n🎉 Image format parsing test completed successfully!")
|
||
print("📁 Output files saved to: ./test_output")
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"\n❌ Image format parsing failed: {str(e)}")
|
||
import traceback
|
||
|
||
print(f" Full error: {traceback.format_exc()}")
|
||
return False
|
||
|
||
|
||
def main():
|
||
"""Main function"""
|
||
parser = argparse.ArgumentParser(
|
||
description="Test image format parsing with MinerU"
|
||
)
|
||
parser.add_argument("--file", help="Path to the image file to test")
|
||
parser.add_argument(
|
||
"--check-pillow", action="store_true", help="Only check PIL/Pillow installation"
|
||
)
|
||
|
||
args = parser.parse_args()
|
||
|
||
# Check PIL/Pillow installation
|
||
print("🔧 Checking PIL/Pillow installation...")
|
||
if not check_pillow_installation():
|
||
return 1
|
||
|
||
if args.check_pillow:
|
||
print("✅ PIL/Pillow installation check passed!")
|
||
return 0
|
||
|
||
# If not just checking dependencies, file argument is required
|
||
if not args.file:
|
||
print("❌ Error: --file argument is required when not using --check-pillow")
|
||
parser.print_help()
|
||
return 1
|
||
|
||
# Run the parsing test
|
||
try:
|
||
success = asyncio.run(test_image_format_parsing(args.file))
|
||
return 0 if success else 1
|
||
except KeyboardInterrupt:
|
||
print("\n⏹️ Test interrupted by user")
|
||
return 1
|
||
except Exception as e:
|
||
print(f"\n❌ Unexpected error: {str(e)}")
|
||
return 1
|
||
|
||
|
||
if __name__ == "__main__":
|
||
sys.exit(main())
|