如何以编程方式将验证码图像上传到 python(给定 html)中的此求解服务?

How can I programatically upload captcha images to this solving service in python (html given)?

我正在编写一个 python 程序,需要能够解决网站上的验证码问题。我想使用 2captcha。我已经使用 selenium 编写了一个 python 脚本,除了解决验证码之外,它可以完成我需要做的所有事情。当我单击 2captcha.com "API" 选项卡时,将显示以下内容(以及其他参数):

您可以上传两种格式的验证码:

多部分和 Base64。:

Multipad 示例:

<form method="post" action="http://2captcha.com/in.php" enctype="multipart/form-data">
<input type="hidden" name="method" value="post">
Your key:
<input type="text" name="key" value="YOUR_APIKEY">
The CAPTCHA file:
<input type="file" name="file">
<input type="submit" value="download and get the ID">
</form>

YOUR_APIKEY - 是您的 32 个符号长度的密钥。

Base64 样本:

<form method="post" action="http://2captcha.com/in.php">
<input type="hidden" name="method" value="base64">
Your key:
<input type="text" name="key" value="YOUR_APIKEY">
The CAPTCHA file body in base64 format:
<textarea name="body">BASE64_FILE</textarea>
<input type="submit" value="download and get the ID">
</form>

YOUR_APIKEY - 是您的 32 个符号长度的密钥。

BASE64_FILE - 是 base 64 编码的图像主体。

我知道 python,它的大部分科学和数学模块都很好,但我对网络相关编程有点陌生。上面的代码看起来像 html。我如何让 python 程序执行上面的 html 指令?

查看requests模块

url = 'http://2captcha.com/in.php'
files = {'file': open('image.png', 'rb')}
data = {'key': 'key', 'method': 'post'}
r = requests.post(url, files=files, data=data)
if r.ok:
    # do something with the response data

我来自 2captcha 团队,我们有样本 python。 您可以使用代码:

"""

这是如何通过 google recaptcha v2 的示例。 我已经使用 python + selenium + phantomJS 来做到这一点。 Phantom JS 是一种用于自动化 Web 测试的无头浏览器。我用它来废弃页面并绕过 google 验证码。 要使用它,您应该安装 phantomJS(并从 linux 系统案例中的源代码构建)和安装 selenium python 模块

Google 验证码出现在 iframe 的页面上。你应该废弃它的元素,单击图像并在单击确定后检查错误消息。

"""

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import *
import re
import os
import sys
import time
import random
import requests
from PIL import Image

class capcha_resolver:
    def __init__(self, captchakey, proxy = None):
        """
       It is assumed that you have phantomjs installed into /bin folder on your linux system.
       """

        self.TWOCAPTCHA_API_KEY = captchakey
        phantom_args = []
        if proxy:
            self.PROXY = proxy
            phantom_args = ['--proxy='+self.PROXY, '--proxy-type=http', '--proxy-type=https']
        self.driver = webdriver.PhantomJS('/bin/phantomjs',service_args=phantom_args)
        self.driver.set_page_load_timeout(20)

    def fail(self, msg):
        print "[!] Error: " + msg
        self.driver.save_screenshot('error.png')

    def get_page(self):
        self.driver.get('https://www.google.com/recaptcha/api2/demo')
        self.driver.save_screenshot('page.png')
        return 0

    def send_capcha(self, filename):
        numbers = []
        captchafile = {'file': open(filename, 'rb')}
        data = {'key': self.TWOCAPTCHA_API_KEY, 'method': 'post'}
        r = requests.post('http://2captcha.com/in.php', files=captchafile, data=data)
        if r.ok and r.text.find('OK') > -1:
            reqid = r.text[r.text.find('|')+1:]
            print "[+] Capcha id: "+reqid
            for timeout in range(40):
                r = requests.get('http://2captcha.com/res.php?key={0}&action=get&id={1}'.format(self.TWOCAPTCHA_API_KEY, reqid))
                if r.text.find('CAPCHA_NOT_READY') > -1:
                    print r.text
                    time.sleep(3)
                if r.text.find('ERROR') > -1:
                    return []
                if r.text.find('OK') > -1:
                    return list(r.text[r.text.find('|')+1:])
        return []

    def bypass_captcha(self):
        """
       Google recaptcha could be found by id. Frame with checkbox has id which starts with I0, recapcha frame has id with I1
       """

        capthcaboxframe = self.driver.find_element_by_xpath('//iframe[starts-with(@id, "I0")]')
        self.driver.switch_to.frame(capthcaboxframe)
        time.sleep(1)
        checkbox = self.driver.find_element_by_id('recaptcha-anchor')
        checkbox.click()
        print "[*] Clicked on checkbox"
        time.sleep(2)
        self.driver.switch_to.default_content()

        capcthaframe = self.driver.find_element_by_xpath('//iframe[starts-with(@id, "I1")]')

        bounding_box = (
            capcthaframe.location['x'], # left
            capcthaframe.location['y'], # upper
            (capcthaframe.location['x'] + capcthaframe.size['width']), # right
            (capcthaframe.location['y'] + capcthaframe.size['height'])) # bottom
        imgname = 'capcha.jpeg' #use jpeg because png images can exceed 2capcha file size limit
        time.sleep(2)
        self.driver.save_screenshot(imgname)
        base_image = Image.open(imgname)
        cropped_image = base_image.crop(bounding_box)
        base_image = base_image.resize(cropped_image.size)
        base_image.paste(cropped_image, (0, 0))
        base_image.save(imgname)

        numbers = self.send_capcha(imgname)
        if numbers == []:
            return -1

        self.driver.switch_to.frame(capcthaframe)
        picturetable = self.driver.find_element_by_css_selector('.rc-imageselect-table-3')
        images = []
        for row in picturetable.find_elements_by_tag_name('tr'):
            for col in row.find_elements_by_tag_name('td'):
                images.append(col.find_element_by_tag_name('img'))
        if images == []:
            self.fail("Found no captcha images")
            return -1

        print "[*] Got answer : " + str(numbers)
        for number in numbers:
            index = int(number) - 1
            images[index].click()
            print '[+] clicked on image '+str(index)
        self.driver.save_screenshot('res.png')
        verifybutton = self.driver.find_element_by_id('recaptcha-verify-button')
        verifybutton.click()
        print "[*] Clicked verify button"
        time.sleep(2)
        if self.driver.find_element_by_css_selector('.rc-imageselect-incorrect-response').is_displayed() or \
                        self.driver.find_element_by_css_selector('.rc-imageselect-error-select-one').is_displayed() or \
                        self.driver.find_element_by_css_selector('.rc-imageselect-error-select-more').is_displayed():
            self.fail("Incorrect answer from 2captcha")
            return -1
        self.driver.switch_to.default_content()

        self.driver.switch_to.frame(capthcaboxframe)
        if self.driver.find_element_by_css_selector('.recaptcha-checkbox').get_attribute('aria-checked') == 'false':
            self.fail("Capctha not passed")
            return -1
        self.driver.switch_to.default_content()
        self.driver.save_screenshot('passed.png')
        return 0

proxy = None
if len(sys.argv) < 2:
    print "Usage: python resolver.py 2CAPCHA_API_KEY [PROXY]"
if len(sys.argv) > 2:
    proxy = sys.argv[2]
resolver = capcha_resolver(sys.argv[1], proxy)

if resolver.get_page() == -1:
    print "[!] Error while getting page"
else:
    print "[+] Opened URL"

if resolver.bypass_captcha() == -1:
    print "[!] Error on captcha resolving"
else:
    print "[+] Resolved captcha"

祝你好运!