Optimizing Images and Removing EXIF Data with Python

Technology · 1 December 2024 · 6 min read

As a website owner, image optimization is crucial for maintaining your site’s performance. Here’s a Python script that optimizes images by removing EXIF metadata, adding watermarks, and compressing images – all while maintaining quality.

Optimizing Images and Removing Exif Data with Python Featured Image

Why Remove EXIF Data?

Digital photos contain more than just pixels. They carry EXIF data that might include:

GPS coordinates where the photo was taken
Device information (camera model, phone type)
Original creation dates and times
Technical settings used to take the photo

For privacy and security reasons, removing this metadata before publishing images online is crucial. Plus, cleaning out unnecessary EXIF data helps reduce file sizes.

The Image Optimization Challenge

Beyond privacy concerns, web images need optimization for:

Faster page loading speeds
Reduced storage space
Better user experience on mobile devices
Consistent quality across the website

Required Setup

Install these Python libraries:

pip install pillow
pip install pillow-heif

Features Overview

The script combines several powerful features using the Pillow library:

Privacy Protection

Strips all EXIF metadata while preserving important orientation information
Ensures images are clean of any personal data
Maintains image quality during the cleaning process

Optimization Features

Converts images to WebP format for better compression
Resizes large images to maximum width of 1500px
Maintains aspect ratios automatically
Sets optimal compression levels for web use

Brand Protection

Adds a watermark (© ijalfauzi.com)
Centers text at 80% from top
Scales watermark size to 4.5% of image width
Applies white text with 85% opacity

Format Support

Handles multiple input formats (JPG, JPEG, PNG, BMP, WebP, HEIC)
Converts everything to optimized WebP
Preserves transparency in PNG files

The Streamlined Workflow

The automated process simplifies image preparation to just three steps:

Place new images in the input folder
Run the script
Collect optimized images from the output folder

What used to take 30 minutes of manual work now takes just a couple of minutes.

Real Results

The optimization process consistently delivers:

40-80% reduction in file sizes
Maintained image quality
Faster page load times
Enhanced privacy protection
Consistent visual branding

Tips for Usage

Batch Processing: Put all images in input folder for bulk processing
Quality Control: Check processed images in output folder
Customization: Easily modify:
- Watermark text and position
- Maximum image width
- Output quality
- Watermark opacity

from PIL import Image, ImageDraw, ImageFont, ExifTags
import os
from PIL.ExifTags import TAGS
from pillow_heif import register_heif_opener

# Register HEIF opener
register_heif_opener()

def preserve_orientation(img):
    """
    Preserve image orientation while removing EXIF data
    """
    try:
        exif = img._getexif()
        if exif:
            for tag_id in exif:
                # Get orientation tag (usually tag 274)
                if ExifTags.TAGS.get(tag_id, '') == 'Orientation':
                    orientation = exif[tag_id]
                    # Apply orientation
                    if orientation == 2:
                        img = img.transpose(Image.Transpose.FLIP_LEFT_RIGHT)
                    elif orientation == 3:
                        img = img.transpose(Image.Transpose.ROTATE_180)
                    elif orientation == 4:
                        img = img.transpose(Image.Transpose.FLIP_TOP_BOTTOM)
                    elif orientation == 5:
                        img = img.transpose(Image.Transpose.FLIP_LEFT_RIGHT).transpose(
                            Image.Transpose.ROTATE_90)
                    elif orientation == 6:
                        img = img.transpose(Image.Transpose.ROTATE_270)
                    elif orientation == 7:
                        img = img.transpose(Image.Transpose.FLIP_LEFT_RIGHT).transpose(
                            Image.Transpose.ROTATE_270)
                    elif orientation == 8:
                        img = img.transpose(Image.Transpose.ROTATE_90)
                    break
    except:
        pass
    return img

def strip_exif(img):
    """
    Remove EXIF data from image while preserving essential image data and orientation
    """
    # First preserve orientation
    img = preserve_orientation(img)
    
    # Create a new image without EXIF
    data = list(img.getdata())
    image_without_exif = Image.new(img.mode, img.size)
    image_without_exif.putdata(data)
    return image_without_exif

def resize_image(img, max_width):
    """
    Resize image to specified max width while maintaining aspect ratio
    """
    if img.width > max_width:
        ratio = max_width / float(img.width)
        height = int(float(img.height) * ratio)
        return img.resize((max_width, height), Image.Resampling.LANCZOS)
    return img

def process_image(image_path, output_path, watermark_text="© ijalfauzi.com", opacity=85):
    """
    Process image: strip EXIF, resize to max 1500px width, add watermark, convert to WebP
    """
    try:
        with Image.open(image_path) as img:
            # Handle PNG with transparency
            if img.format == 'PNG':
                if img.mode != 'RGBA':
                    img = img.convert('RGBA')
                # Create a white background
                background = Image.new('RGBA', img.size, (255, 255, 255, 255))
                # Paste the image on the white background
                background.paste(img, (0, 0), img)
                img = background
            
            # Strip EXIF data while preserving orientation
            img = strip_exif(img)
            
            # Resize image to max 1500px width
            img = resize_image(img, 1500)
            
            if img.mode != 'RGBA':
                img = img.convert('RGBA')
            
            # Calculate font size based on new width
            font_size = int(img.width * 0.045)
            
            overlay = Image.new('RGBA', img.size, (255, 255, 255, 0))
            draw = ImageDraw.Draw(overlay)
            
            try:
                font = ImageFont.truetype('arial.ttf', font_size)
            except:
                font = ImageFont.load_default()
            
            text_bbox = draw.textbbox((0, 0), watermark_text, font=font)
            text_width = text_bbox[2] - text_bbox[0]
            text_height = text_bbox[3] - text_bbox[1]
            
            x = (img.width - text_width) // 2
            y = int(img.height * 0.80) - (text_height // 2)

            draw.text((x, y), watermark_text, font=font, fill=(255, 255, 255, opacity))
            
            watermarked = Image.alpha_composite(img, overlay)
            
            # Convert to RGB for WebP output
            if watermarked.mode == 'RGBA':
                watermarked = watermarked.convert('RGB')
            
            # Save as WebP with quality=85
            watermarked.save(output_path, 'WEBP', quality=85)
            return True
            
    except Exception as e:
        print(f"Error processing {image_path}: {str(e)}")
        return False

def batch_process_images(input_dir, output_dir):
    """
    Process multiple images - strip EXIF, max width 1500px, convert to WebP
    Convert HEIC to WebP during processing
    """
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    supported_formats = ['.jpg', '.jpeg', '.png', '.bmp', '.webp', '.heic']
    successful = 0
    failed = 0
    
    for filename in os.listdir(input_dir):
        if any(filename.lower().endswith(fmt) for fmt in supported_formats):
            input_path = os.path.join(input_dir, filename)
            
            # Change all outputs to WebP
            output_filename = os.path.splitext(filename)[0] + '.webp'
            output_path = os.path.join(output_dir, output_filename)
            
            if process_image(input_path, output_path):
                successful += 1
                print(f"Processed: {filename} (Converted to WebP)")
            else:
                failed += 1
                print(f"Failed: {filename}")
    
    return successful, failed

if __name__ == "__main__":
    input_directory = r"C:\Users\Ijal Fauzi\Desktop\Optimized\opt"
    output_directory = r"C:\Users\Ijal Fauzi\Desktop\Optimized"
    
    print("Processing images (max 1500px width, converting to WebP)...")
    success, failed = batch_process_images(input_directory, output_directory)
    print(f"\nComplete!\nSuccessful: {success}\nFailed: {failed}")

Using this script has streamlined the image optimization workflow for my website. Whether you’re handling a few images or hundreds, it makes the process automatic and consistent while maintaining quality and protecting privacy.