Python: Zero to Hero
Home/The Python Ecosystem
Share

Chapter 46: Automation and Scripting

The best programmers are lazy — in the best way. They hate doing the same thing twice. When they spot a repetitive task, they write a script to do it for them.

Renaming 500 files? Three lines of Python. Sending a weekly report by email? Ten lines. Downloading 100 PDFs and merging them? Twenty lines. Resizing 200 images? Fifteen lines.

This chapter is your automation toolkit.

File System Automation with pathlib and shutil

You already know pathlib from Chapter 8. Here's how to put it to work at scale.

Rename files in bulk

from pathlib import Path

folder = Path("photos")

# Rename all .jpeg files to .jpg
for f in folder.glob("*.jpeg"):
    f.rename(f.with_suffix(".jpg"))
    print(f"Renamed: {f.name} -> {f.stem}.jpg")

# Add a date prefix to all .txt files
from datetime import date
today = date.today().strftime("%Y-%m-%d")

for f in folder.glob("*.txt"):
    new_name = f.parent / f"{today}_{f.name}"
    f.rename(new_name)
    print(f"Renamed: {f.name} -> {new_name.name}")

Organise files by extension

from pathlib import Path
import shutil

def organise_downloads(source: str = "Downloads") -> None:
    """Sort files in a folder into subfolders by extension."""
    source_dir = Path(source)

    extension_map = {
        "Images":    {".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg"},
        "Documents": {".pdf", ".doc", ".docx", ".txt", ".md", ".xlsx"},
        "Videos":    {".mp4", ".mov", ".avi", ".mkv"},
        "Audio":     {".mp3", ".wav", ".flac", ".aac"},
        "Archives":  {".zip", ".tar", ".gz", ".rar", ".7z"},
        "Code":      {".py", ".js", ".ts", ".html", ".css", ".json"},
    }

    # Build reverse lookup: extension -> folder name
    ext_to_folder = {}
    for folder_name, extensions in extension_map.items():
        for ext in extensions:
            ext_to_folder[ext] = folder_name

    moved = 0
    for file in source_dir.iterdir():
        if not file.is_file():
            continue
        folder_name = ext_to_folder.get(file.suffix.lower(), "Other")
        dest_dir    = source_dir / folder_name
        dest_dir.mkdir(exist_ok=True)
        dest        = dest_dir / file.name

        # Avoid overwriting existing files
        if dest.exists():
            stem = file.stem
            i    = 1
            while dest.exists():
                dest = dest_dir / f"{stem}_{i}{file.suffix}"
                i   += 1

        shutil.move(str(file), str(dest))
        print(f"Moved: {file.name} -> {folder_name}/")
        moved += 1

    print(f"\nOrganised {moved} files.")


organise_downloads("Downloads")

Copy, move, and delete with shutil

import shutil
from pathlib import Path

# Copy a file
shutil.copy("report.pdf", "backup/report.pdf")

# Copy a file and preserve metadata
shutil.copy2("report.pdf", "backup/report.pdf")

# Copy an entire directory
shutil.copytree("project/", "project_backup/")

# Move (rename) a file or directory
shutil.move("old_name.txt", "new_name.txt")
shutil.move("old_folder/", "archive/old_folder/")

# Delete a directory tree
shutil.rmtree("temp_folder/")

# Get folder size
def folder_size(path: str) -> int:
    return sum(f.stat().st_size for f in Path(path).rglob("*") if f.is_file())

size_bytes = folder_size("project/")
print(f"Folder size: {size_bytes / 1_000_000:.1f} MB")

Find files matching a pattern

from pathlib import Path

# All Python files, recursively
for f in Path(".").rglob("*.py"):
    print(f)

# Files larger than 10 MB
for f in Path("Downloads").rglob("*"):
    if f.is_file() and f.stat().st_size > 10 * 1024 * 1024:
        print(f"{f.name}: {f.stat().st_size / 1_000_000:.1f} MB")

# Files modified in the last 7 days
import time
week_ago = time.time() - 7 * 24 * 3600
for f in Path(".").rglob("*"):
    if f.is_file() and f.stat().st_mtime > week_ago:
        print(f)

Scheduling Tasks with schedule

pip install schedule
import schedule
import time
from datetime import datetime

def backup_database():
    """Run every day at 2am."""
    import shutil
    shutil.copy("production.db", f"backups/backup_{datetime.now():%Y%m%d_%H%M}.db")
    print(f"Backup completed at {datetime.now()}")

def send_daily_report():
    print(f"Sending report at {datetime.now()}")
    # ... send email, generate PDF, etc.

def cleanup_temp():
    import shutil
    shutil.rmtree("temp/", ignore_errors=True)
    print("Temp files cleaned")

# Schedule tasks
schedule.every().day.at("02:00").do(backup_database)
schedule.every().day.at("08:00").do(send_daily_report)
schedule.every().hour.do(cleanup_temp)
schedule.every(30).minutes.do(lambda: print("Still running..."))
schedule.every().monday.at("09:00").do(send_daily_report)

# Run the scheduler
print("Scheduler running. Press Ctrl+C to stop.")
while True:
    schedule.run_pending()
    time.sleep(60)   # check every minute

Sending Emails with smtplib

import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text      import MIMEText
from email.mime.base      import MIMEBase
from email                import encoders
from pathlib              import Path


def send_email(
    to:          str | list[str],
    subject:     str,
    body:        str,
    attachments: list[str] = (),
    html:        bool = False,
    smtp_host:   str = "smtp.gmail.com",
    smtp_port:   int = 587,
    username:    str = "",
    password:    str = "",
) -> None:
    """Send an email with optional attachments."""
    msg = MIMEMultipart()
    msg["From"]    = username
    msg["To"]      = to if isinstance(to, str) else ", ".join(to)
    msg["Subject"] = subject

    # Body
    mime_type = "html" if html else "plain"
    msg.attach(MIMEText(body, mime_type))

    # Attachments
    for path_str in attachments:
        path = Path(path_str)
        with open(path, "rb") as f:
            part = MIMEBase("application", "octet-stream")
            part.set_payload(f.read())
        encoders.encode_base64(part)
        part.add_header("Content-Disposition", f"attachment; filename={path.name}")
        msg.attach(part)

    # Send
    with smtplib.SMTP(smtp_host, smtp_port) as server:
        server.starttls()
        server.login(username, password)
        server.send_message(msg)

    recipients = to if isinstance(to, list) else [to]
    print(f"Email sent to {len(recipients)} recipient(s): {subject}")


# Usage
import os

send_email(
    to       = "boss@example.com",
    subject  = "Daily Sales Report",
    body     = "<h1>Report</h1><p>See attached PDF.</p>",
    html     = True,
    attachments = ["report.pdf"],
    username = os.environ["EMAIL_ADDRESS"],
    password = os.environ["EMAIL_PASSWORD"],
)

Use environment variables for credentials — never hardcode passwords. For Gmail, create an App Password in your Google Account security settings.

Working with PDFs

pip install pypdf reportlab

Read and extract text

from pypdf import PdfReader

reader = PdfReader("annual_report.pdf")

print(f"Pages: {len(reader.pages)}")

# Extract text from all pages
full_text = ""
for page in reader.pages:
    full_text += page.extract_text() + "\n"

print(full_text[:500])

Merge PDFs

from pypdf import PdfReader, PdfWriter

def merge_pdfs(paths: list[str], output: str) -> None:
    writer = PdfWriter()
    for path in paths:
        reader = PdfReader(path)
        for page in reader.pages:
            writer.add_page(page)
    with open(output, "wb") as f:
        writer.write(f)
    print(f"Merged {len(paths)} PDFs into {output}")


merge_pdfs(["invoice_1.pdf", "invoice_2.pdf", "invoice_3.pdf"], "all_invoices.pdf")

Split a PDF

from pypdf import PdfReader, PdfWriter
from pathlib import Path

def split_pdf(path: str, pages_per_chunk: int = 10) -> None:
    reader  = PdfReader(path)
    stem    = Path(path).stem
    total   = len(reader.pages)

    for start in range(0, total, pages_per_chunk):
        writer = PdfWriter()
        end    = min(start + pages_per_chunk, total)
        for i in range(start, end):
            writer.add_page(reader.pages[i])
        output = f"{stem}_part_{start // pages_per_chunk + 1}.pdf"
        with open(output, "wb") as f:
            writer.write(f)
        print(f"Created {output} (pages {start+1}--{end})")


split_pdf("big_report.pdf", pages_per_chunk=5)

Generate a PDF from scratch with ReportLab

from reportlab.pdfgen  import canvas
from reportlab.lib.pagesizes import A4

def generate_invoice(filename: str, invoice_data: dict) -> None:
    c = canvas.Canvas(filename, pagesize=A4)
    width, height = A4

    # Title
    c.setFont("Helvetica-Bold", 20)
    c.drawString(50, height - 80, "INVOICE")

    # Invoice details
    c.setFont("Helvetica", 12)
    c.drawString(50, height - 120, f"Invoice #: {invoice_data['number']}")
    c.drawString(50, height - 140, f"Date: {invoice_data['date']}")
    c.drawString(50, height - 160, f"To: {invoice_data['client']}")

    # Table header
    y = height - 220
    c.setFont("Helvetica-Bold", 11)
    c.drawString(50,  y, "Description")
    c.drawString(350, y, "Qty")
    c.drawString(420, y, "Price")
    c.drawString(490, y, "Total")
    c.line(50, y - 5, 550, y - 5)

    # Line items
    c.setFont("Helvetica", 11)
    total = 0
    for item in invoice_data["items"]:
        y -= 25
        subtotal = item["qty"] * item["price"]
        total   += subtotal
        c.drawString(50,  y, item["description"])
        c.drawString(350, y, str(item["qty"]))
        c.drawString(420, y, f"${item['price']:.2f}")
        c.drawString(490, y, f"${subtotal:.2f}")

    # Total
    y -= 30
    c.line(50, y, 550, y)
    c.setFont("Helvetica-Bold", 12)
    c.drawString(420, y - 20, f"TOTAL: ${total:.2f}")

    c.save()
    print(f"Invoice saved to {filename}")


generate_invoice("invoice_001.pdf", {
    "number": "001",
    "date":   "2026-03-09",
    "client": "Acme Corp",
    "items": [
        {"description": "Python Consulting",  "qty": 10, "price": 150.00},
        {"description": "Code Review",        "qty": 5,  "price": 75.00},
        {"description": "Training Session",   "qty": 2,  "price": 500.00},
    ],
})

Image Processing with Pillow

pip install Pillow
from PIL import Image, ImageDraw, ImageFont
from pathlib import Path


# ── Basic operations ──────────────────────────────────────────────────────────

img = Image.open("photo.jpg")
print(img.size)    # (1920, 1080)
print(img.mode)    # RGB

# Resize
img_small = img.resize((800, 450))
img_small.save("photo_800.jpg", quality=85)

# Crop
region = img.crop((100, 100, 600, 400))   # (left, top, right, bottom)
region.save("cropped.jpg")

# Rotate
rotated = img.rotate(90, expand=True)
rotated.save("rotated.jpg")

# Convert to greyscale
grey = img.convert("L")
grey.save("grey.jpg")

# Convert format
img.save("photo.png")   # saves as PNG regardless of original format


# ── Batch resize all images in a folder ───────────────────────────────────────

def batch_resize(
    folder:    str,
    max_width:  int = 800,
    max_height: int = 600,
    quality:    int = 85,
) -> None:
    output_dir = Path(folder) / "resized"
    output_dir.mkdir(exist_ok=True)

    image_exts = {".jpg", ".jpeg", ".png", ".webp"}
    files      = [f for f in Path(folder).iterdir()
                  if f.suffix.lower() in image_exts]

    for f in files:
        img = Image.open(f)
        img.thumbnail((max_width, max_height))   # resize in-place, keeps aspect ratio
        dest = output_dir / f.name
        img.save(dest, quality=quality)
        print(f"Resized: {f.name} -> {img.size}")

    print(f"Done. {len(files)} images resized.")


batch_resize("product_photos")


# ── Add a watermark ───────────────────────────────────────────────────────────

def add_watermark(image_path: str, text: str, output_path: str) -> None:
    img  = Image.open(image_path).convert("RGBA")
    overlay = Image.new("RGBA", img.size, (255, 255, 255, 0))
    draw    = ImageDraw.Draw(overlay)

    # Position: bottom-right
    font_size = max(20, img.width // 30)
    try:
        font = ImageFont.truetype("arial.ttf", font_size)
    except OSError:
        font = ImageFont.load_default()

    bbox  = draw.textbbox((0, 0), text, font=font)
    tw    = bbox[2] - bbox[0]
    th    = bbox[3] - bbox[1]
    x     = img.width  - tw - 20
    y     = img.height - th - 20

    draw.text((x, y), text, fill=(255, 255, 255, 128), font=font)

    watermarked = Image.alpha_composite(img, overlay)
    watermarked.convert("RGB").save(output_path)
    print(f"Watermark added: {output_path}")


add_watermark("product.jpg", "(c) MyBrand 2026", "product_watermarked.jpg")

Automating Excel with openpyxl

pip install openpyxl
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.chart  import BarChart, Reference


def generate_sales_report(data: list[dict], filename: str = "report.xlsx") -> None:
    wb = openpyxl.Workbook()
    ws = wb.active
    ws.title = "Sales Report"

    # Header row
    headers = ["Month", "Revenue", "Orders", "Avg Order"]
    header_font = Font(bold=True, color="FFFFFF")
    header_fill = PatternFill("solid", fgColor="2196F3")

    for col, header in enumerate(headers, start=1):
        cell = ws.cell(row=1, column=col, value=header)
        cell.font      = header_font
        cell.fill      = header_fill
        cell.alignment = Alignment(horizontal="center")

    # Data rows
    for row_idx, row in enumerate(data, start=2):
        ws.cell(row=row_idx, column=1, value=row["month"])
        ws.cell(row=row_idx, column=2, value=row["revenue"])
        ws.cell(row=row_idx, column=3, value=row["orders"])
        ws.cell(row=row_idx, column=4, value=row["revenue"] / row["orders"])

        # Alternate row colour
        if row_idx % 2 == 0:
            fill = PatternFill("solid", fgColor="E3F2FD")
            for col in range(1, 5):
                ws.cell(row=row_idx, column=col).fill = fill

    # Total row
    total_row = len(data) + 2
    ws.cell(row=total_row, column=1, value="TOTAL").font = Font(bold=True)
    ws.cell(row=total_row, column=2,
            value=f"=SUM(B2:B{total_row-1})").font = Font(bold=True)
    ws.cell(row=total_row, column=3,
            value=f"=SUM(C2:C{total_row-1})").font = Font(bold=True)

    # Format numbers
    for row in ws.iter_rows(min_row=2, max_row=total_row, min_col=2, max_col=4):
        for cell in row:
            cell.number_format = "#,##0.00"

    # Column widths
    ws.column_dimensions["A"].width = 12
    ws.column_dimensions["B"].width = 15
    ws.column_dimensions["C"].width = 10
    ws.column_dimensions["D"].width = 15

    # Bar chart
    chart = BarChart()
    chart.title = "Monthly Revenue"
    chart.y_axis.title = "Revenue (£)"
    chart.x_axis.title = "Month"

    data_ref   = Reference(ws, min_col=2, min_row=1, max_row=len(data)+1)
    cats_ref   = Reference(ws, min_col=1, min_row=2, max_row=len(data)+1)
    chart.add_data(data_ref, titles_from_data=True)
    chart.set_categories(cats_ref)
    chart.shape = 4
    ws.add_chart(chart, "F2")

    wb.save(filename)
    print(f"Report saved to {filename}")


generate_sales_report([
    {"month": "Jan", "revenue": 42000, "orders": 180},
    {"month": "Feb", "revenue": 48500, "orders": 210},
    {"month": "Mar", "revenue": 45000, "orders": 195},
    {"month": "Apr", "revenue": 53000, "orders": 230},
    {"month": "May", "revenue": 61000, "orders": 265},
    {"month": "Jun", "revenue": 58000, "orders": 250},
])

Project: Automated Daily Report Generator

"""
daily_report.py

Generates a daily summary report and emails it.
Run this with: python daily_report.py
Or schedule it with cron / Task Scheduler.
"""
import os
import sqlite3
import smtplib
from datetime import date, timedelta
from email.mime.multipart import MIMEMultipart
from email.mime.text      import MIMEText
from email.mime.base      import MIMEBase
from email                import encoders
from pathlib              import Path
from reportlab.pdfgen     import canvas
from reportlab.lib.pagesizes import A4


DB_PATH      = "sales.db"
REPORT_DIR   = Path("reports")
REPORT_DIR.mkdir(exist_ok=True)


def fetch_yesterday_stats() -> dict:
    yesterday = (date.today() - timedelta(days=1)).isoformat()
    conn = sqlite3.connect(DB_PATH)
    cur  = conn.cursor()
    stats = cur.execute("""
        SELECT
            COUNT(*)           AS orders,
            SUM(total)         AS revenue,
            AVG(total)         AS avg_order,
            COUNT(DISTINCT customer_id) AS customers
        FROM orders
        WHERE DATE(created_at) = ?
    """, (yesterday,)).fetchone()
    conn.close()
    return {
        "date":      yesterday,
        "orders":    stats[0] or 0,
        "revenue":   stats[1] or 0.0,
        "avg_order": stats[2] or 0.0,
        "customers": stats[3] or 0,
    }


def generate_pdf_report(stats: dict) -> str:
    filename = REPORT_DIR / f"report_{stats['date']}.pdf"
    c = canvas.Canvas(str(filename), pagesize=A4)
    w, h = A4

    c.setFont("Helvetica-Bold", 18)
    c.drawString(50, h - 80, f"Daily Sales Report — {stats['date']}")

    c.setFont("Helvetica", 13)
    items = [
        ("Total Orders",    f"{stats['orders']:,}"),
        ("Total Revenue",   f"£{stats['revenue']:,.2f}"),
        ("Avg Order Value", f"£{stats['avg_order']:.2f}"),
        ("Unique Customers", f"{stats['customers']:,}"),
    ]
    y = h - 140
    for label, value in items:
        c.setFont("Helvetica-Bold", 12)
        c.drawString(50, y, label + ":")
        c.setFont("Helvetica", 12)
        c.drawString(220, y, value)
        y -= 30

    c.save()
    return str(filename)


def send_report(pdf_path: str, stats: dict) -> None:
    msg = MIMEMultipart()
    msg["From"]    = os.environ["EMAIL_FROM"]
    msg["To"]      = os.environ["REPORT_RECIPIENT"]
    msg["Subject"] = f"Daily Report — {stats['date']}"

    body = f"""
    <h2>Daily Sales Summary — {stats['date']}</h2>
    <ul>
        <li><b>Orders:</b> {stats['orders']:,}</li>
        <li><b>Revenue:</b> £{stats['revenue']:,.2f}</li>
        <li><b>Avg Order:</b> £{stats['avg_order']:.2f}</li>
        <li><b>Customers:</b> {stats['customers']:,}</li>
    </ul>
    <p>Full report attached.</p>
    """
    msg.attach(MIMEText(body, "html"))

    with open(pdf_path, "rb") as f:
        part = MIMEBase("application", "octet-stream")
        part.set_payload(f.read())
    encoders.encode_base64(part)
    part.add_header("Content-Disposition",
                    f"attachment; filename={Path(pdf_path).name}")
    msg.attach(part)

    with smtplib.SMTP("smtp.gmail.com", 587) as server:
        server.starttls()
        server.login(os.environ["EMAIL_FROM"], os.environ["EMAIL_PASSWORD"])
        server.send_message(msg)

    print(f"Report emailed to {msg['To']}")


if __name__ == "__main__":
    stats   = fetch_yesterday_stats()
    pdf     = generate_pdf_report(stats)
    send_report(pdf, stats)
    print(f"Done. Report saved to {pdf}")

Schedule it on Linux/Mac with cron:

# Run at 7am every day
0 7 * * * /usr/bin/python3 /home/user/daily_report.py

Schedule it on Windows with Task Scheduler:

  1. Open Task Scheduler -> Create Basic Task
  2. Set trigger: Daily, 7:00 AM
  3. Action: Start a program -> python.exe -> arguments: C:\scripts\daily_report.py

What You Learned in This Chapter

  • pathlib.Path.glob() and .rglob() find files matching a pattern. .rename(), .mkdir(), .stat() manage files.
  • shutil.copy(), shutil.move(), shutil.rmtree(), shutil.copytree() move and copy files and directories.
  • schedule runs Python functions on a timer — daily, hourly, weekly, or on a custom interval.
  • smtplib + email.mime.* sends emails with HTML bodies and file attachments. Always use environment variables for credentials.
  • pypdf.PdfReader reads and extracts text from PDFs. PdfWriter merges and splits them. reportlab generates PDFs from scratch.
  • PIL.Image (Pillow) opens, resizes, crops, rotates, converts, and saves images. thumbnail() resizes while preserving aspect ratio.
  • openpyxl reads and writes Excel files with formatting, formulas, and charts.
  • schedule + smtplib + reportlab together make a fully automated reporting pipeline.

What's Next?

Chapter 47 is the first of the Advanced Projects — you'll build a complete REST API with FastAPI, SQLAlchemy, JWT authentication, and a full pytest test suite from scratch.

© 2026 Abhilash Sahoo. Python: Zero to Hero.