Chapter 46: Automation and Scripting
The best programmers are lazy — in the best way. They hate doing the same thing twice. When they spot a repetitive task, they write a script to do it for them.
Renaming 500 files? Three lines of Python. Sending a weekly report by email? Ten lines. Downloading 100 PDFs and merging them? Twenty lines. Resizing 200 images? Fifteen lines.
This chapter is your automation toolkit.
File System Automation with pathlib and shutil
You already know pathlib from Chapter 8. Here's how to put it to work at scale.
Rename files in bulk
from pathlib import Path
folder = Path("photos")
# Rename all .jpeg files to .jpg
for f in folder.glob("*.jpeg"):
f.rename(f.with_suffix(".jpg"))
print(f"Renamed: {f.name} -> {f.stem}.jpg")
# Add a date prefix to all .txt files
from datetime import date
today = date.today().strftime("%Y-%m-%d")
for f in folder.glob("*.txt"):
new_name = f.parent / f"{today}_{f.name}"
f.rename(new_name)
print(f"Renamed: {f.name} -> {new_name.name}")
Organise files by extension
from pathlib import Path
import shutil
def organise_downloads(source: str = "Downloads") -> None:
"""Sort files in a folder into subfolders by extension."""
source_dir = Path(source)
extension_map = {
"Images": {".jpg", ".jpeg", ".png", ".gif", ".webp", ".svg"},
"Documents": {".pdf", ".doc", ".docx", ".txt", ".md", ".xlsx"},
"Videos": {".mp4", ".mov", ".avi", ".mkv"},
"Audio": {".mp3", ".wav", ".flac", ".aac"},
"Archives": {".zip", ".tar", ".gz", ".rar", ".7z"},
"Code": {".py", ".js", ".ts", ".html", ".css", ".json"},
}
# Build reverse lookup: extension -> folder name
ext_to_folder = {}
for folder_name, extensions in extension_map.items():
for ext in extensions:
ext_to_folder[ext] = folder_name
moved = 0
for file in source_dir.iterdir():
if not file.is_file():
continue
folder_name = ext_to_folder.get(file.suffix.lower(), "Other")
dest_dir = source_dir / folder_name
dest_dir.mkdir(exist_ok=True)
dest = dest_dir / file.name
# Avoid overwriting existing files
if dest.exists():
stem = file.stem
i = 1
while dest.exists():
dest = dest_dir / f"{stem}_{i}{file.suffix}"
i += 1
shutil.move(str(file), str(dest))
print(f"Moved: {file.name} -> {folder_name}/")
moved += 1
print(f"\nOrganised {moved} files.")
organise_downloads("Downloads")
Copy, move, and delete with shutil
import shutil
from pathlib import Path
# Copy a file
shutil.copy("report.pdf", "backup/report.pdf")
# Copy a file and preserve metadata
shutil.copy2("report.pdf", "backup/report.pdf")
# Copy an entire directory
shutil.copytree("project/", "project_backup/")
# Move (rename) a file or directory
shutil.move("old_name.txt", "new_name.txt")
shutil.move("old_folder/", "archive/old_folder/")
# Delete a directory tree
shutil.rmtree("temp_folder/")
# Get folder size
def folder_size(path: str) -> int:
return sum(f.stat().st_size for f in Path(path).rglob("*") if f.is_file())
size_bytes = folder_size("project/")
print(f"Folder size: {size_bytes / 1_000_000:.1f} MB")
Find files matching a pattern
from pathlib import Path
# All Python files, recursively
for f in Path(".").rglob("*.py"):
print(f)
# Files larger than 10 MB
for f in Path("Downloads").rglob("*"):
if f.is_file() and f.stat().st_size > 10 * 1024 * 1024:
print(f"{f.name}: {f.stat().st_size / 1_000_000:.1f} MB")
# Files modified in the last 7 days
import time
week_ago = time.time() - 7 * 24 * 3600
for f in Path(".").rglob("*"):
if f.is_file() and f.stat().st_mtime > week_ago:
print(f)
Scheduling Tasks with schedule
pip install schedule
import schedule
import time
from datetime import datetime
def backup_database():
"""Run every day at 2am."""
import shutil
shutil.copy("production.db", f"backups/backup_{datetime.now():%Y%m%d_%H%M}.db")
print(f"Backup completed at {datetime.now()}")
def send_daily_report():
print(f"Sending report at {datetime.now()}")
# ... send email, generate PDF, etc.
def cleanup_temp():
import shutil
shutil.rmtree("temp/", ignore_errors=True)
print("Temp files cleaned")
# Schedule tasks
schedule.every().day.at("02:00").do(backup_database)
schedule.every().day.at("08:00").do(send_daily_report)
schedule.every().hour.do(cleanup_temp)
schedule.every(30).minutes.do(lambda: print("Still running..."))
schedule.every().monday.at("09:00").do(send_daily_report)
# Run the scheduler
print("Scheduler running. Press Ctrl+C to stop.")
while True:
schedule.run_pending()
time.sleep(60) # check every minute
Sending Emails with smtplib
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
from pathlib import Path
def send_email(
to: str | list[str],
subject: str,
body: str,
attachments: list[str] = (),
html: bool = False,
smtp_host: str = "smtp.gmail.com",
smtp_port: int = 587,
username: str = "",
password: str = "",
) -> None:
"""Send an email with optional attachments."""
msg = MIMEMultipart()
msg["From"] = username
msg["To"] = to if isinstance(to, str) else ", ".join(to)
msg["Subject"] = subject
# Body
mime_type = "html" if html else "plain"
msg.attach(MIMEText(body, mime_type))
# Attachments
for path_str in attachments:
path = Path(path_str)
with open(path, "rb") as f:
part = MIMEBase("application", "octet-stream")
part.set_payload(f.read())
encoders.encode_base64(part)
part.add_header("Content-Disposition", f"attachment; filename={path.name}")
msg.attach(part)
# Send
with smtplib.SMTP(smtp_host, smtp_port) as server:
server.starttls()
server.login(username, password)
server.send_message(msg)
recipients = to if isinstance(to, list) else [to]
print(f"Email sent to {len(recipients)} recipient(s): {subject}")
# Usage
import os
send_email(
to = "boss@example.com",
subject = "Daily Sales Report",
body = "<h1>Report</h1><p>See attached PDF.</p>",
html = True,
attachments = ["report.pdf"],
username = os.environ["EMAIL_ADDRESS"],
password = os.environ["EMAIL_PASSWORD"],
)
Use environment variables for credentials — never hardcode passwords. For Gmail, create an App Password in your Google Account security settings.
Working with PDFs
pip install pypdf reportlab
Read and extract text
from pypdf import PdfReader
reader = PdfReader("annual_report.pdf")
print(f"Pages: {len(reader.pages)}")
# Extract text from all pages
full_text = ""
for page in reader.pages:
full_text += page.extract_text() + "\n"
print(full_text[:500])
Merge PDFs
from pypdf import PdfReader, PdfWriter
def merge_pdfs(paths: list[str], output: str) -> None:
writer = PdfWriter()
for path in paths:
reader = PdfReader(path)
for page in reader.pages:
writer.add_page(page)
with open(output, "wb") as f:
writer.write(f)
print(f"Merged {len(paths)} PDFs into {output}")
merge_pdfs(["invoice_1.pdf", "invoice_2.pdf", "invoice_3.pdf"], "all_invoices.pdf")
Split a PDF
from pypdf import PdfReader, PdfWriter
from pathlib import Path
def split_pdf(path: str, pages_per_chunk: int = 10) -> None:
reader = PdfReader(path)
stem = Path(path).stem
total = len(reader.pages)
for start in range(0, total, pages_per_chunk):
writer = PdfWriter()
end = min(start + pages_per_chunk, total)
for i in range(start, end):
writer.add_page(reader.pages[i])
output = f"{stem}_part_{start // pages_per_chunk + 1}.pdf"
with open(output, "wb") as f:
writer.write(f)
print(f"Created {output} (pages {start+1}--{end})")
split_pdf("big_report.pdf", pages_per_chunk=5)
Generate a PDF from scratch with ReportLab
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
def generate_invoice(filename: str, invoice_data: dict) -> None:
c = canvas.Canvas(filename, pagesize=A4)
width, height = A4
# Title
c.setFont("Helvetica-Bold", 20)
c.drawString(50, height - 80, "INVOICE")
# Invoice details
c.setFont("Helvetica", 12)
c.drawString(50, height - 120, f"Invoice #: {invoice_data['number']}")
c.drawString(50, height - 140, f"Date: {invoice_data['date']}")
c.drawString(50, height - 160, f"To: {invoice_data['client']}")
# Table header
y = height - 220
c.setFont("Helvetica-Bold", 11)
c.drawString(50, y, "Description")
c.drawString(350, y, "Qty")
c.drawString(420, y, "Price")
c.drawString(490, y, "Total")
c.line(50, y - 5, 550, y - 5)
# Line items
c.setFont("Helvetica", 11)
total = 0
for item in invoice_data["items"]:
y -= 25
subtotal = item["qty"] * item["price"]
total += subtotal
c.drawString(50, y, item["description"])
c.drawString(350, y, str(item["qty"]))
c.drawString(420, y, f"${item['price']:.2f}")
c.drawString(490, y, f"${subtotal:.2f}")
# Total
y -= 30
c.line(50, y, 550, y)
c.setFont("Helvetica-Bold", 12)
c.drawString(420, y - 20, f"TOTAL: ${total:.2f}")
c.save()
print(f"Invoice saved to {filename}")
generate_invoice("invoice_001.pdf", {
"number": "001",
"date": "2026-03-09",
"client": "Acme Corp",
"items": [
{"description": "Python Consulting", "qty": 10, "price": 150.00},
{"description": "Code Review", "qty": 5, "price": 75.00},
{"description": "Training Session", "qty": 2, "price": 500.00},
],
})
Image Processing with Pillow
pip install Pillow
from PIL import Image, ImageDraw, ImageFont
from pathlib import Path
# ── Basic operations ──────────────────────────────────────────────────────────
img = Image.open("photo.jpg")
print(img.size) # (1920, 1080)
print(img.mode) # RGB
# Resize
img_small = img.resize((800, 450))
img_small.save("photo_800.jpg", quality=85)
# Crop
region = img.crop((100, 100, 600, 400)) # (left, top, right, bottom)
region.save("cropped.jpg")
# Rotate
rotated = img.rotate(90, expand=True)
rotated.save("rotated.jpg")
# Convert to greyscale
grey = img.convert("L")
grey.save("grey.jpg")
# Convert format
img.save("photo.png") # saves as PNG regardless of original format
# ── Batch resize all images in a folder ───────────────────────────────────────
def batch_resize(
folder: str,
max_width: int = 800,
max_height: int = 600,
quality: int = 85,
) -> None:
output_dir = Path(folder) / "resized"
output_dir.mkdir(exist_ok=True)
image_exts = {".jpg", ".jpeg", ".png", ".webp"}
files = [f for f in Path(folder).iterdir()
if f.suffix.lower() in image_exts]
for f in files:
img = Image.open(f)
img.thumbnail((max_width, max_height)) # resize in-place, keeps aspect ratio
dest = output_dir / f.name
img.save(dest, quality=quality)
print(f"Resized: {f.name} -> {img.size}")
print(f"Done. {len(files)} images resized.")
batch_resize("product_photos")
# ── Add a watermark ───────────────────────────────────────────────────────────
def add_watermark(image_path: str, text: str, output_path: str) -> None:
img = Image.open(image_path).convert("RGBA")
overlay = Image.new("RGBA", img.size, (255, 255, 255, 0))
draw = ImageDraw.Draw(overlay)
# Position: bottom-right
font_size = max(20, img.width // 30)
try:
font = ImageFont.truetype("arial.ttf", font_size)
except OSError:
font = ImageFont.load_default()
bbox = draw.textbbox((0, 0), text, font=font)
tw = bbox[2] - bbox[0]
th = bbox[3] - bbox[1]
x = img.width - tw - 20
y = img.height - th - 20
draw.text((x, y), text, fill=(255, 255, 255, 128), font=font)
watermarked = Image.alpha_composite(img, overlay)
watermarked.convert("RGB").save(output_path)
print(f"Watermark added: {output_path}")
add_watermark("product.jpg", "(c) MyBrand 2026", "product_watermarked.jpg")
Automating Excel with openpyxl
pip install openpyxl
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.chart import BarChart, Reference
def generate_sales_report(data: list[dict], filename: str = "report.xlsx") -> None:
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Sales Report"
# Header row
headers = ["Month", "Revenue", "Orders", "Avg Order"]
header_font = Font(bold=True, color="FFFFFF")
header_fill = PatternFill("solid", fgColor="2196F3")
for col, header in enumerate(headers, start=1):
cell = ws.cell(row=1, column=col, value=header)
cell.font = header_font
cell.fill = header_fill
cell.alignment = Alignment(horizontal="center")
# Data rows
for row_idx, row in enumerate(data, start=2):
ws.cell(row=row_idx, column=1, value=row["month"])
ws.cell(row=row_idx, column=2, value=row["revenue"])
ws.cell(row=row_idx, column=3, value=row["orders"])
ws.cell(row=row_idx, column=4, value=row["revenue"] / row["orders"])
# Alternate row colour
if row_idx % 2 == 0:
fill = PatternFill("solid", fgColor="E3F2FD")
for col in range(1, 5):
ws.cell(row=row_idx, column=col).fill = fill
# Total row
total_row = len(data) + 2
ws.cell(row=total_row, column=1, value="TOTAL").font = Font(bold=True)
ws.cell(row=total_row, column=2,
value=f"=SUM(B2:B{total_row-1})").font = Font(bold=True)
ws.cell(row=total_row, column=3,
value=f"=SUM(C2:C{total_row-1})").font = Font(bold=True)
# Format numbers
for row in ws.iter_rows(min_row=2, max_row=total_row, min_col=2, max_col=4):
for cell in row:
cell.number_format = "#,##0.00"
# Column widths
ws.column_dimensions["A"].width = 12
ws.column_dimensions["B"].width = 15
ws.column_dimensions["C"].width = 10
ws.column_dimensions["D"].width = 15
# Bar chart
chart = BarChart()
chart.title = "Monthly Revenue"
chart.y_axis.title = "Revenue (£)"
chart.x_axis.title = "Month"
data_ref = Reference(ws, min_col=2, min_row=1, max_row=len(data)+1)
cats_ref = Reference(ws, min_col=1, min_row=2, max_row=len(data)+1)
chart.add_data(data_ref, titles_from_data=True)
chart.set_categories(cats_ref)
chart.shape = 4
ws.add_chart(chart, "F2")
wb.save(filename)
print(f"Report saved to {filename}")
generate_sales_report([
{"month": "Jan", "revenue": 42000, "orders": 180},
{"month": "Feb", "revenue": 48500, "orders": 210},
{"month": "Mar", "revenue": 45000, "orders": 195},
{"month": "Apr", "revenue": 53000, "orders": 230},
{"month": "May", "revenue": 61000, "orders": 265},
{"month": "Jun", "revenue": 58000, "orders": 250},
])
Project: Automated Daily Report Generator
"""
daily_report.py
Generates a daily summary report and emails it.
Run this with: python daily_report.py
Or schedule it with cron / Task Scheduler.
"""
import os
import sqlite3
import smtplib
from datetime import date, timedelta
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
from pathlib import Path
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import A4
DB_PATH = "sales.db"
REPORT_DIR = Path("reports")
REPORT_DIR.mkdir(exist_ok=True)
def fetch_yesterday_stats() -> dict:
yesterday = (date.today() - timedelta(days=1)).isoformat()
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()
stats = cur.execute("""
SELECT
COUNT(*) AS orders,
SUM(total) AS revenue,
AVG(total) AS avg_order,
COUNT(DISTINCT customer_id) AS customers
FROM orders
WHERE DATE(created_at) = ?
""", (yesterday,)).fetchone()
conn.close()
return {
"date": yesterday,
"orders": stats[0] or 0,
"revenue": stats[1] or 0.0,
"avg_order": stats[2] or 0.0,
"customers": stats[3] or 0,
}
def generate_pdf_report(stats: dict) -> str:
filename = REPORT_DIR / f"report_{stats['date']}.pdf"
c = canvas.Canvas(str(filename), pagesize=A4)
w, h = A4
c.setFont("Helvetica-Bold", 18)
c.drawString(50, h - 80, f"Daily Sales Report — {stats['date']}")
c.setFont("Helvetica", 13)
items = [
("Total Orders", f"{stats['orders']:,}"),
("Total Revenue", f"£{stats['revenue']:,.2f}"),
("Avg Order Value", f"£{stats['avg_order']:.2f}"),
("Unique Customers", f"{stats['customers']:,}"),
]
y = h - 140
for label, value in items:
c.setFont("Helvetica-Bold", 12)
c.drawString(50, y, label + ":")
c.setFont("Helvetica", 12)
c.drawString(220, y, value)
y -= 30
c.save()
return str(filename)
def send_report(pdf_path: str, stats: dict) -> None:
msg = MIMEMultipart()
msg["From"] = os.environ["EMAIL_FROM"]
msg["To"] = os.environ["REPORT_RECIPIENT"]
msg["Subject"] = f"Daily Report — {stats['date']}"
body = f"""
<h2>Daily Sales Summary — {stats['date']}</h2>
<ul>
<li><b>Orders:</b> {stats['orders']:,}</li>
<li><b>Revenue:</b> £{stats['revenue']:,.2f}</li>
<li><b>Avg Order:</b> £{stats['avg_order']:.2f}</li>
<li><b>Customers:</b> {stats['customers']:,}</li>
</ul>
<p>Full report attached.</p>
"""
msg.attach(MIMEText(body, "html"))
with open(pdf_path, "rb") as f:
part = MIMEBase("application", "octet-stream")
part.set_payload(f.read())
encoders.encode_base64(part)
part.add_header("Content-Disposition",
f"attachment; filename={Path(pdf_path).name}")
msg.attach(part)
with smtplib.SMTP("smtp.gmail.com", 587) as server:
server.starttls()
server.login(os.environ["EMAIL_FROM"], os.environ["EMAIL_PASSWORD"])
server.send_message(msg)
print(f"Report emailed to {msg['To']}")
if __name__ == "__main__":
stats = fetch_yesterday_stats()
pdf = generate_pdf_report(stats)
send_report(pdf, stats)
print(f"Done. Report saved to {pdf}")
Schedule it on Linux/Mac with cron:
# Run at 7am every day
0 7 * * * /usr/bin/python3 /home/user/daily_report.py
Schedule it on Windows with Task Scheduler:
- Open Task Scheduler -> Create Basic Task
- Set trigger: Daily, 7:00 AM
- Action: Start a program ->
python.exe-> arguments:C:\scripts\daily_report.py
What You Learned in This Chapter
pathlib.Path.glob()and.rglob()find files matching a pattern..rename(),.mkdir(),.stat()manage files.shutil.copy(),shutil.move(),shutil.rmtree(),shutil.copytree()move and copy files and directories.scheduleruns Python functions on a timer — daily, hourly, weekly, or on a custom interval.smtplib+email.mime.*sends emails with HTML bodies and file attachments. Always use environment variables for credentials.pypdf.PdfReaderreads and extracts text from PDFs.PdfWritermerges and splits them.reportlabgenerates PDFs from scratch.PIL.Image(Pillow) opens, resizes, crops, rotates, converts, and saves images.thumbnail()resizes while preserving aspect ratio.openpyxlreads and writes Excel files with formatting, formulas, and charts.schedule+smtplib+reportlabtogether make a fully automated reporting pipeline.
What's Next?
Chapter 47 is the first of the Advanced Projects — you'll build a complete REST API with FastAPI, SQLAlchemy, JWT authentication, and a full pytest test suite from scratch.