Beautifulsoup python 错误(10060)紧急

Beautifulsoup python error(10060)Emergency

我是编程新手,我在 python 中使用漂亮的汤创建了一个网络爬虫但是当我 运行 这个程序它打开 python 命令行并且只是光标闪烁它并没有任何反应......现在我收到这些错误

TimeoutError: [WinError 10060]连接尝试失败,因为连接方在一段时间后没有正确响应,或者建立连接失败,因为连接的主机没有响应

ConnectionResetError: [WinError 10054] 现有连接被远程主机强行关闭

...请不要介意缩进,

下面是我的代码:

import urllib.request
import urllib
import json
import xml.etree.ElementTree as ET
import csv
from bs4 import BeautifulSoup

link = 'https://maharerait.mahaonline.gov.in/searchlist/searchlist'
talukaLink = "https://maharerait.mahaonline.gov.in/SearchList/GetTaluka"
distlink = "https://maharerait.mahaonline.gov.in/SearchList/GetDistrict"
prjLink = "https://maharerait.mahaonline.gov.in/SearchList/GetProjectName"

alldata = []

links = {}
certificatedata = []

def getData(url, values):
    data = urllib.parse.urlencode(values)
    data = data.encode('utf-8')
    req = urllib.request.Request(url, data)
    response=urllib.request.urlopen(req)
    data = response.read()
    data = data.decode("utf-8")
    return data


def getDivsion():
    ## for now we are taking 6 districts.. it needs to updated when the data 
gets updatedd
    return range(1,7)

  def getDistrict(divId):
      global distlink
      values = {'DivID': divId}
      data = getData(distlink, values)
    return data

def parseJson(data):
    parsed = json.loads(data)
    return parsed

def getTaluka(disId):
   global talukaLink
   values= {'DisID': disId}
   data = getData(talukaLink, values)
    return data

def getProjects(divId, disId):
    global prjLink
    values= {'DisID': disId, 'DivID': divId}
    #print(values)
    data = getData( prjLink, values)
    if len(data)<10:
    return "{}"
return data

def getProjectsList():
    divList = getDivsion()
    flag = 0
    for divId in divList:
        disData = getDistrict(divId)
        disList = parseJson(disData)
        for disObj in disList:
            disId = disObj["ID"]
            prjData = getProjects(divId, disId)
        #print(" >>>> "+str(disId)+" >> "+str(divId))
        #print(prjData)
        prjJson = parseJson(prjData)
        for prjObj in prjJson:
            flag += 1
            prjId = prjObj["ID"]
            values = {'ID':0, 'pageTraverse': 1, 'Division': divId,     'hdnDistrict': '', 'hdnProject':'', 'District': disId, 'Taluka':'', 'Village': '', 'Project': prjId, 'CertiNo':'', 'btnSearch':'Search'}
            finalPrjData = getData(link, values)
            parseXMLData(finalPrjData)
            #if len(alldata)>100:
            #    break

def parseXMLData(htmldata):
    global alldata, links
    soup = BeautifulSoup(htmldata, "html.parser")
    tables = soup.find_all("table")
    for table in tables:
        print(len(alldata))
    attr = table.attrs
    if "table" in attr['class']:
        tbody = table.find_all("tbody")
        if len(tbody)>0:
            tbody = tbody[0]
            tr_lst = tbody.find_all("tr")
            for tr in tr_lst:
                sublist = []
                td_lst = tr.find_all("td")
                if len(td_lst)>6:
                    prjname = td_lst[1].text
                    proname = td_lst[2].text
                    certNo = td_lst[3].text
                    sublist.append(prjname)
                    sublist.append(proname)
                    sublist.append(certNo)
                    td = td_lst[4]
                    a_lst = td.find_all("a")
                    if len(a_lst)>0:
                        a = a_lst[0]
                        href = a.attrs['href']
                        link = "https://maharerait.mahaonline.gov.in/"+href
                        links[certNo] = link
                        sublist.append(link)
                if len(sublist)>0:
                    alldata.append(sublist)
return alldata


def writedata(alldata1, filename):
    print(" >>>> FINAL PRINTING DATA >>>> ")
    #import pdb; pdb.set_trace()
    with open("./"+filename,'w') as csvfile:
        csvfile = csv.writer(csvfile, delimiter=',')
        #csvfile.writerow(titleRow)
        csvfile.writerow("")
        for i in range(0, len( alldata1 )):
            #print(alldata1[i])
            csvfile.writerow( alldata1[i]  )


def processlinksforcert():
    global links, certificatedata
    print(">> Came in fetching certificates data >>> " )
    for certno in links.keys():
        link = links[certno]
        htmldata = getData(link, {})
        soup = BeautifulSoup(htmldata, "html.parser") 
        divs = soup.find_all("div")
        for div in divs:
            attr = div.attrs
        if "id" in attr.keys() and "DivProfessional" in attr['id']:
            table = div.find_all("table")
            if len(table)<=0:
                continue
            t_attr = table[0].attrs
            if "table" in t_attr["class"]:
                print(len(certificatedata))
                table = table[0]
                tr_lst = table.find_all("tr")
                index = 1
                while index<len(tr_lst):
                    #import pdb; pdb.set_trace()
                    #for tr in tr_lst:
                    #if index==0:
                    #    continue
                    tr = tr_lst[index]
                    index += 1
                    sublist = []
                    td_lst = tr.find_all("td")
                    if len(td_lst)>2:
                        sublist.append(certno)
                        pername = formattext( td_lst[0].text)
                        cerno = formattext( td_lst[1].text )
                        proftype = formattext( td_lst[2].text )
                        sublist.append(pername)
                        sublist.append(cerno)
                        sublist.append(proftype)
                        certificatedata.append(sublist)
return certificatedata

def formattext(text):
    while text.find("\r\n")>=0:
        text = text.replace("\r\n","")

while text.find("   ")>=0:
    text = text.replace("   ","")
return text

def main():
    global alldata, certificatedata
    #data = getData(url, {})
    getProjectsList()
    print("Before write the projects data to the file. Count >> 
"+str(len(alldata)))
    writedata(alldata, "data.csv")
    data = processlinksforcert()
    print("Before write the certificates data to the file. Count >> 
"+str(len(data)))
    writedata( data, "certificates.csv" )


main()

有人可以告诉我我做错了什么吗...我已经安装了 pip 和 pip beautifulsoup 也...请不要介意缩进,它只是在这里....

我用selenium解决了。非常感谢大家