`蓝牙:bluetooth_adapter_winrt.cc:1073 获取默认适配器失败`Windows VM Python 多处理硒

`Bluetooth: bluetooth_adapter_winrt.cc:1073 Getting Default Adapter failed` Windows VM Python multiprocessing selenium

我有一个使用 python 和 selenium 创建的爬虫:见下文(请随意测试并留下评论/提示!)。这在我的 Mac(本地)上运行良好,但它表示需要大约 6 天才能提取完整数据。所以,我决定添加多处理来缩短时间。这在我的 Mac 上仍然完美运行,但是当我在 windows VM (Azure D8s_v3) 上尝试 运行 时,我收到错误:

DevTools listening on ws://127.0.0.1:56800/devtools/browser/de9e5088-9659-4604-b43f-8ea1fae02a66 [11728:11308:0805/085310.771:ERROR:device_event_log_impl.cc(214)] [08:53:10.782] Bluetooth: bluetooth_adapter_winrt.cc:1073 Getting Default Adapter failed.

你们在 windows 上 运行 时是否也遇到错误?提前谢谢你

# Jonathan Augustin

# BELOW IS THE LINK WE WOULD LIKE YOU TO SCRAPE AS A TEST OF YOUR ABILITY:
# Dixie State University : https://registration.dixie.edu/transfer-guide/

# Please write a python script to extract the “To” and “From” transfer information from the highlighted link.
# The output should be in .JSON format. We would also like you to send the python script as well.
# We want ALL of the transfer information “TO” Dixie State University, “FROM” every other institution in every state.

import requests
from bs4 import BeautifulSoup
import json
import time
from itertools import chain
import logging
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.keys import Keys
import pprint
import time
import progressbar
import threading
import multiprocessing

jsonClass = {
    "from_school": "",
    "from_course_department": "",
    "from_course_code": "",
    "from_course_name": "",
    "from_course_credit_hours": "",
    "from_extra_department": "",
    "from_extra_code": "",
    "from_extra_name": "",
    "from_extra_credit_hours": "",
    "to_school": "Dixie State University",
    "to_course_department": "",
    "to_course_code": "",
    "to_course_name": "",
    "to_course_credit_hours": "",
    "to_extra_department": "",
    "to_extra_code": "",
    "to_extra_name": "",
    "to_extra_credit_hours": ""
}

states = ['Alabama','Alaska','Arizona','Arkansas','California','Colorado','Connecticut','Delaware','Florida','Georgia','Hawaii','Idaho','Illinois','Indiana','Iowa','Kansas','Kentucky','Louisiana','Maine','Maryland','Massachusetts','Michigan','Minnesota','Mississippi','Missouri','Montana','Nebraska','Nevada','New Hampshire','New Jersey','New Mexico','New York','North Carolina','North Dakota','Ohio','Oklahoma','Oregon','Pennsylvania','Rhode Island','South Carolina','South Dakota','Tennessee','Texas','Utah','Vermont','Virginia','Washington','West Virginia','Wisconsin','Wyoming']
  

class searchPage(object):
    def __init__(self, number):
    #        #^ The first variable is the class instance in methods.  
    #        #  This is called "self" by convention, but could be any name you want.
    #^ double underscore (dunder) methods are usually special.  This one 
    #  gets called immediately after a new instance is created
        PATH = "/Users/jonathanaugustin/Desktop/chromedriver"
        options = Options()
        self.number = number
        self.driver = webdriver.Chrome(PATH, options=options)
        url = 'https://widgets.collegetransfer.net/EquivWidget?institution=2734&name=Dixie%20State%20University&theme=/Content/Themes/Selene/jquery-ui-1.8.17.custom.css&direction=receiver&zip=84770-3876'
        self.driver.get(url)


    def getSchools(self):
        beforeScroll = 'initial'
        afterScroll = ''
        schoolsText = ''
        while (beforeScroll != afterScroll):
            beforeScroll = afterScroll
            schools = self.driver.find_element_by_id('schoolsbyname')
            self.driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', schools)
            time.sleep(0.4)
            afterScroll = schools.text[-20:]
            schoolsText = schools
        mylist = schools.find_elements_by_class_name('selectableContainer')
        return mylist
    
    def updateJson(self, numthreads):
        # print('numthreads', numthreads)
        mylist = self.getSchools()
        with progressbar.ProgressBar(max_value=len(mylist)) as bar:
            for x in range(len(mylist)):
                bar.update(x)
                # print('x',x, 'numthreads', numthreads,'mod', (x % numthreads))
                # print('self.number', self.number)
                if x % numthreads == self.number:
                    #check to see if school is in US
                    address = mylist[x].find_element_by_class_name('address').text.split(', ')[1]
                    # print('SURE', address)
                    # print(address)
                    if address not in states:
                        # print('no')
                        continue
                    # print('yes')
                    jsonClass["from_school"] = mylist[x].get_attribute("data-sendername")

                    #First click
                    mylist[x].click()
                    time.sleep(2)

                    equivList = self.driver.find_element_by_id('equivcontent')
                    equivalencies = equivList.find_elements_by_class_name('selectableContainer')
                    i = 0
                    for i in range(len(equivalencies)):

                        fromCourse = equivalencies[i].find_element_by_class_name('equivSourceContainer')
                        # print(fromCourse.text)
                        courses = fromCourse.find_elements_by_class_name('course')
                        course = courses[0].find_element_by_class_name('courseId').text.split()
                        jsonClass["from_course_department"] = course[0]
                        jsonClass["from_course_code"] = course[1]
                        jsonClass["from_course_name"] = fromCourse.find_element_by_class_name('courseTitle').text
                        if len(courses) > 1:
                            from_extra_departments = []
                            from_extra_codes = []
                            from_extra_names = []
                            for eClass in courses[1:]:
                                eClass1 = eClass.find_element_by_class_name('courseId').text.split()
                                from_extra_departments.append(eClass1[0])
                                from_extra_codes.append(eClass1[1])
                                from_extra_names.append(eClass.find_element_by_class_name('courseTitle').text)
                            jsonClass["from_extra_department"] = str(from_extra_departments)
                            jsonClass["from_extra_code"] = str(from_extra_codes)
                            jsonClass["from_extra_name"] = str(from_extra_names)
                        else:
                            jsonClass["from_extra_department"] = ""
                            jsonClass["from_extra_code"] = ""
                            jsonClass["from_extra_name"] = ""


                        toCourse = equivalencies[i].find_element_by_class_name('equivTargetContainer')
                        toCourses = toCourse.find_elements_by_class_name('course')
                        tcourse = toCourses[0].find_element_by_class_name('courseId').text.split()
                        jsonClass["to_course_department"] = tcourse[0]
                        jsonClass["to_course_code"] = tcourse[1]
                        jsonClass["to_course_name"] = toCourse.find_element_by_class_name('courseTitle').text
                        if len(toCourses) > 1:
                            to_extra_departments = []
                            to_extra_codes = []
                            to_extra_names = []
                            for eClass in toCourses[1:]:
                                eClass1 = eClass.find_element_by_class_name('courseId').text.split()
                                to_extra_departments.append(eClass1[0])
                                to_extra_codes.append(eClass1[1])
                                to_extra_names.append(eClass.find_element_by_class_name('courseTitle').text)
                            jsonClass["to_extra_department"] = str(to_extra_departments)
                            jsonClass["to_extra_code"] = str(to_extra_codes)
                            jsonClass["to_extra_name"] = str(to_extra_names)
                        else:
                            jsonClass["to_extra_department"] = ""
                            jsonClass["to_extra_code"] = ""
                            jsonClass["to_extra_name"] = ""


                        #Second click
                        equivalencies[i].click()
                        time.sleep(2)

                        transferList = self.driver.find_elements_by_class_name('courseListContainer')[0]
                        details = transferList.find_elements_by_class_name('courseDetailContainer')
                        try:
                            creditsCont = transferList.find_element_by_class_name('courseCreditsLine')
                            credits = creditsCont.find_elements_by_tag_name('span')
                            # print(credits[0].text)
                            if credits[0].text == "Credits:":
                                jsonClass["from_course_credit_hours"] = credits[1].text

                            if len(details) > 1:
                                from_extra_credit_hours = []
                                for detail in details[1:]:
                                    try:
                                        detCont = detail.find_element_by_class_name('courseCreditsLine')
                                        detcredits = detCont.find_elements_by_tag_name('span')
                                        # print(detcredits[0].text)
                                        if detcredits[0].text == "Credits:":
                                            from_extra_credit_hours.append(detcredits[1].text)
                                    except:
                                        pass
                                        # print("No credit")
                                jsonClass["from_extra_credit_hours"] = str(from_extra_credit_hours)
                            else:
                                jsonClass["from_extra_credit_hours"] = ""

                        except:
                            # print("No credits")
                            pass

                        dixieList = self.driver.find_elements_by_class_name('courseListContainer')[1]
                        todetails = dixieList.find_elements_by_class_name('courseDetailContainer')
                        try:
                            creditsCont = dixieList.find_element_by_class_name('courseCreditsLine')
                            credits = creditsCont.find_elements_by_tag_name('span')
                            # print(credits[0].text)
                            if credits[0].text == "Credits:":
                                jsonClass["to_course_credit_hours"] = credits[1].text

                            if len(todetails) > 1:
                                from_extra_credit_hours = []
                                for detail in todetails[1:]:
                                    try:
                                        detCont = detail.find_element_by_class_name('courseCreditsLine')
                                        detcredits = detCont.find_elements_by_tag_name('span')
                                        # print(detcredits[0].text)
                                        if detcredits[0].text == "Credits:":
                                            from_extra_credit_hours.append(detcredits[1].text)
                                    except:
                                        # print("No credit")
                                        pass
                                jsonClass["to_extra_credit_hours"] = str(from_extra_credit_hours)
                            else:
                                jsonClass["to_extra_credit_hours"] = ""
                        except:
                            # print("No credits")
                            pass
                        
                        # pp = pprint.PrettyPrinter(indent=4)
                        # pp.pprint(jsonClass)

                        my_file = open("dixie.json", "a")        # Open a file
                        my_file.write(json.dumps(jsonClass, indent=4))    # write a line to the file
                        my_file.write(",") 
                        my_file.close()                        
                        
                        self.driver.find_element_by_id('detail').find_element_by_class_name('ui-corner-top').click()
                        time.sleep(2)
                        
                    self.driver.find_element_by_id('equivs').find_element_by_class_name('ui-state-default').click()
                    time.sleep(2)
                    

    def tearDown(self):
        self.driver.close()


if __name__ == "__main__":
    my_file = open("dixie.json", "w")        # Open a file  # write a line to the file
    my_file.write("[") 
    my_file.close()  
    a = searchPage(0)
    b = searchPage(1)
    c = searchPage(2)
    d = searchPage(3)
    e = searchPage(4)
    f = searchPage(5)
    g = searchPage(6)

    t1 = multiprocessing.Process(target=a.updateJson, args=[7])
    t2 = multiprocessing.Process(target=b.updateJson, args=[7])
    t3 = multiprocessing.Process(target=c.updateJson, args=[7])
    t4 = multiprocessing.Process(target=d.updateJson, args=[7])
    t5 = multiprocessing.Process(target=e.updateJson, args=[7])
    t6 = multiprocessing.Process(target=f.updateJson, args=[7])
    t7 = multiprocessing.Process(target=g.updateJson, args=[7])
    

    t1.start()
    t2.start()
    t3.start()
    t4.start()
    t5.start()
    t6.start()
    t7.start()


    t1.join()
    t2.join()
    t3.join()
    t4.join()
    t5.join()
    t6.join()
    t7.join()
    
    a.tearDown()
    b.tearDown()
    c.tearDown()
    d.tearDown()
    e.tearDown()
    f.tearDown()
    g.tearDown()

    my_file = open("dixie.json", "a")        # Open a file  # write a line to the file
    my_file.write("]") 
    my_file.close()                        
          

我使用以下方法之一解决了这个问题

>     options.add_argument('--no-sandbox')
>     options.add_argument('--headless')
>     options.add_argument('--disable-gpu')