from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import base64
import os
from PIL import Image
import pytesseract
import cv2
import numpy as np
# 指定 tesseract 可执行文件的路径
pytesseract.pytesseract.tesseract_cmd = r'D:\Program Files\Tesseract-OCR\tesseract.exe'
def preprocess_image(image_path):
"""
优化预处理:增加降噪和形态学处理
"""
image = Image.open(image_path)
image_cv = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
# 转为灰度并高斯模糊降噪
gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
gray = cv2.GaussianBlur(gray, (3, 3), 0)
# 自适应阈值处理
binary = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY_INV, 21, 4)
# 形态学闭运算连接字符
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
closed = cv2.morphologyEx(binary, cv2.MORPH_CLOSE, kernel)
# 中值滤波去噪
processed_image = cv2.medianBlur(closed, 3)
return Image.fromarray(processed_image)
def recognize_captcha(image_path):
processed_image = preprocess_image(image_path)
# 调试:保存预处理后的图片
debug_path = "processed_debug.png"
processed_image.save(debug_path)
print(f"预处理后的图片已保存至:{os.path.abspath(debug_path)}")
# 调整Tesseract参数(尝试不同PSM模式)
custom_config = r'--oem 3 --psm 8 -c tessedit_char_whitelist=ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
captcha_text = pytesseract.image_to_string(processed_image, config=custom_config)
return captcha_text.strip()
def download_captcha():
driver = webdriver.Chrome()
try:
driver.get("系统地址")
img_element = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "img[contains(@src, 'data:image/jpg; base64')]"))
)
src_data = img_element.get_attribute("src")
_, base64_str = src_data.split(",", 1)
image_data = base64.b64decode(base64_str)
save_path = "captcha.png"
with open(save_path, "wb") as f:
f.write(image_data)
return save_path
finally:
driver.quit() # 确保关闭浏览器
if __name__ == "__main__":
image_path = download_captcha()
if image_path:
captcha_text = recognize_captcha(image_path)
print(f"识别结果: {captcha_text}") 给此脚本添加个标题 用于发布