
最近折腾 Python 自动化脚本,踩了几个坑,这篇把经验整理清楚。Python 确实是自动化领域的首选语言:语法简洁、库生态丰富、跨平台。文件处理、网页抓取、API(应用程序接口)调用还是系统管理,一套都能搞定。
给一堆文件重命名是最常见的痛点,用正则匹配几行搞定:
import os
import re def batch_rename(folder, pattern, replacement): """批量重命名文件夹中的文件""" for filename in os.listdir(folder): new_name = re.sub(pattern, replacement, filename) if new_name != filename: old_path = os.path.join(folder, filename) new_path = os.path.join(folder, new_name) os.rename(old_path, new_path) print(f"Renamed: {filename} → {new_name}") # 示例:给所有 .txt 文件加前缀
batch_rename("./docs", r"^", "2025-")
日常数据导出经常需要在 CSV 和 JSON 之间互转:
import csv
import json def csv_to_json(csv_file, json_file): """CSV 转 JSON""" with open(csv_file, 'r') as f: reader = csv.DictReader(f) data = list(reader) with open(json_file, 'w') as f: json.dump(data, f, indent=2, ensure_ascii=False) print(f"Converted {len(data)} rows: {csv_file} → {json_file}")
大多数网站用这套组合拳就够用:
import requests
from bs4 import BeautifulSoup def scrape_titles(url): """抓取网页中的所有标题""" resp = requests.get(url, headers={'User-Agent': 'Mozilla/5.0'}) soup = BeautifulSoup(resp.text, 'html.parser') titles = [] for tag in ['h1', 'h2', 'h3']: for elem in soup.find_all(tag): titles.append(elem.get_text(strip=True)) return titles
遇到 JS(JavaScript)渲染的页面就得请出浏览器自动化工具:
from selenium import webdriver
from selenium.webdriver.common.by import By def scrape_dynamic(url): """抓取 JavaScript 渲染的页面""" options = webdriver.ChromeOptions() options.add_argument('--headless') driver = webdriver.Chrome(options=options) driver.get(url) elements = driver.find_elements(By.CSS_SELECTOR, '.result-item') data = [e.text for e in elements] driver.quit() return data
封装一个通用的 API 客户端,后续调用各种接口都方便:
import requests
from typing import Optional, Dict, Any class APIClient: def __init__(self, base_url: str, api_key: str): self.base_url = base_url self.session = requests.Session() self.session.headers.update({ 'Authorization': f'Bearer {api_key}', 'Content-Type': 'application/json' }) def get(self, path: str, params: Optional[Dict] = None) -> Any: resp = self.session.get(f"{self.base_url}{path}", params=params) resp.raise_for_status() return resp.json() def post(self, path: str, data: Dict) -> Any: resp = self.session.post(f"{self.base_url}{path}", json=data) resp.raise_for_status() return resp.json() def put(self, path: str, data: Dict) -> Any: resp = self.session.put(f"{self.base_url}{path}", json=data) resp.raise_for_status() return resp.json()
服务器磁盘告警,配合邮件通知运维人员:
import shutil
import smtplib
from email.mime.text import MIMEText def check_disk_usage(threshold=80): """检查磁盘使用率,超过阈值发邮件""" usage = shutil.disk_usage('/') percent = usage.used / usage.total * 100 if percent > threshold: msg = MIMEText(f"Disk usage: {percent:.1f}%") msg['Subject'] = f"⚠️ Disk Alert: {percent:.1f}% used" msg['From'] = 'monitor@yourcompany.com' msg['To'] = 'ops@yourcompany.com' with smtplib.SMTP('localhost') as smtp: smtp.send_message(msg) return percent
关键时刻进程挂了怎么办?自动检测重启:
import psutil
import time def monitor_process(name, interval=60): """监控进程是否存在,不存在则重启""" while True: running = any(p.name() == name for p in psutil.process_iter(['name'])) if not running: print(f"Process {name} not found, restarting...") # subprocess.Popen([name]) time.sleep(interval)
轻量级定时任务不需要上 Celery,schedule 库几行搞定:
import schedule
import time def job(): print("Running scheduled task...") # 每天 9:00 执行
schedule.every().day.at("09:00").do(job) # 每小时执行
schedule.every().hour.do(job) while True: schedule.run_pending() time.sleep(60)
Python 自动化的核心优势:
从今天开始,把重复性工作交给 Python 吧!
原文链接:https://dev.to/...