我怎样才能让这两个脚本一起工作?
How can I make this two scripts work together?
我有两个代码可以抓取基本上是搜索引擎的页面。它从 google sheet 读取信息,在 URL 上搜索,获取一些信息,然后将它们写入 sheet.
问题是我使用了两个代码,第二个是将信息写入 google sheet.
第一个代码执行所有搜索,然后当每个搜索完成时,第二个代码开始将获取的信息写入 google sheets。
我想做的是搜索一个然后写,搜索第二个然后写.....我尝试了不同的方法,但这是我的第一个代码和我第一次编程,所以我很挣扎
k_bot.py(网页抓取工具)
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import UnexpectedAlertPresentException
import re
import time
class BOT(object):
def __init__(self, cpfs):
# SETUP FOR URL
self.bot_url = 'http://www.3kplus.net/'
self.cpfs = cpfs
self.profile = webdriver.FirefoxProfile()
self.options = Options()
self.driver = webdriver.Firefox(firefox_profile=self.profile,
executable_path='C:\Users\MOISA\Documents\geckodriver.exe',
options=self.options)
# NAVIGATE TO URL
self.driver.get(self.bot_url)
login_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[2]/input')
login_box.send_keys('daiane')
pass_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[3]/input')
pass_box.send_keys('789456')
login_btn = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/button')
login_btn.click()
def search_cpfs(self):
# SEARCH THROUGH THE LIST OF CLIENT CODES (1ST COLUMN OF THE SPREADSHEET), AND OBTAIN THESE INFOS
nomes = []
idades = []
beneficios = []
concessoes = []
salarios = []
bancoss = []
bancoscard = []
consigs = []
cards = []
for cpf in self.cpfs:
print(f"Procurando {cpf}.")
self.driver.get(self.bot_url)
self.delay = 3 # seconds
# SEARCH CLIENT CODE
try:
cpf_input = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[1]/input')
cpf_input.send_keys(cpf)
cpf_btn = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
cpf_btn.click()
cpf_btn.click()
time.sleep(2)
# CLIENT CODE IS VALID
# CLIENT CODE HAVE NOTIFICATION
if self.driver.find_element_by_xpath('//*[@id="notification"]').is_displayed():
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[5]/span/b ").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print('NOTIFICACAO')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# CLIENT CODE DOESN'T HAVE NOTIFICATION
else:
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[5]/span/b").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# IF THE CLIENT CODE IS WRONG
except (NoSuchElementException, UnexpectedAlertPresentException):
nome = ''
idade = ''
age = ''
concessao = ''
salario = ''
bancos = ''
bancosw = ''
bankslist = ''
bancocard = ''
bcardw = ''
bcardlist = ''
consig = ''
card = ''
print('CPF Invalido')
nomes.append(nome)
idades.append(age)
beneficios.append(beneficio)
concessoes.append(concessao)
salarios.append(salario)
bancoss.append(bankslist)
bancoscard.append(bcardlist)
consigs.append(consig)
cards.append(card)
return nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards
cpf_updater.py(信息 google sheet 作者)
from k_bot import BOT
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time
from gspread.exceptions import APIError
class CpfSearch(object):
def __init__(self, spreadsheet_name):
self.cpf_col = 1
self.nome_col = 2
self.age_col = 3
self.beneficio_col = 4
self.concessao_col = 5
self.salario_col = 6
self.bancos_col = 7
self.bancocard_col = 9
self.consig_col = 10
self.card_col = 16
scope = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive.readonly']
creds = ServiceAccountCredentials.from_json_keyfile_name('CONSULTAS.json', scope)
client = gspread.authorize(creds)
self.sheet = client.open(spreadsheet_name).sheet1
def process_cpf_list(self):
# SKIP OVER COLUMN HEADING IN THE SPREADSHEET
cpfs = self.sheet.col_values(self.cpf_col)[1:]
bot_url = BOT(cpfs)
nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards = bot_url.search_cpfs()
# UPDATE THE SHEET
print("Atualizando...")
for cpfs in range(len(nomes)):
try:
self.sheet.update_cell(cpfs + 2, self.nome_col, nomes[cpfs])
self.sheet.update_cell(cpfs + 2, self.age_col, idades[cpfs])
self.sheet.update_cell(cpfs + 2, self.beneficio_col, beneficios[cpfs])
self.sheet.update_cell(cpfs + 2, self.concessao_col, concessoes[cpfs])
self.sheet.update_cell(cpfs + 2, self.salario_col, salarios[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancos_col, bancoss[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancocard_col, bancoscard[cpfs])
self.sheet.update_cell(cpfs + 2, self.consig_col, consigs[cpfs])
self.sheet.update_cell(cpfs + 2, self.card_col, cards[cpfs])
print('Cliente atualizado!')
except APIError:
print('Esperando para atualizar...')
time.sleep(100)
continue
cpf_updater = CpfSearch('TESTE')
cpf_updater.process_cpf_list()
编辑k_bot.py
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import UnexpectedAlertPresentException
import re
import time
class BOT(object):
def __init__(self, cpfs):
# SETUP FOR URL
self.bot_url = 'http://www.3kplus.net/'
self.cpfs = cpfs
self.profile = webdriver.FirefoxProfile()
self.options = Options()
self.driver = webdriver.Firefox(firefox_profile=self.profile,
executable_path='C:\Users\MOISA\Documents\geckodriver.exe',
options=self.options)
# NAVIGATE TO URL
self.driver.get(self.bot_url)
login_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[2]/input')
login_box.send_keys('daiane')
pass_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[3]/input')
pass_box.send_keys('789456')
login_btn = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/button')
login_btn.click()
def search_cpfs(self, cpf):
# SEARCH THROUGH THE LIST OF CLIENT CODES (1ST COLUMN OF THE SPREADSHEET), AND OBTAIN THESE INFOS
nomes = []
idades = []
beneficios = []
concessoes = []
salarios = []
bancoss = []
bancoscard = []
consigs = []
cards = []
print(f"Procurando {cpf}.")
self.driver.get(self.bot_url)
self.delay = 3 # seconds
# SEARCH CLIENT CODE
try:
cpf_input = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[1]/input')
cpf_input.send_keys(cpf)
cpf_btn = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
cpf_btn.click()
cpf_btn.click()
time.sleep(2)
# CLIENT CODE IS VALID
# CLIENT CODE HAVE NOTIFICATION
if self.driver.find_element_by_xpath('//*[@id="notification"]').is_displayed():
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[5]/span/b ").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print('NOTIFICACAO')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# CLIENT CODE DOESN'T HAVE NOTIFICATION
else:
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[5]/span/b").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# IF THE CLIENT CODE IS WRONG
except (NoSuchElementException, UnexpectedAlertPresentException):
nome = ''
idade = ''
age = ''
concessao = ''
salario = ''
bancos = ''
bancosw = ''
bankslist = ''
bancocard = ''
bcardw = ''
bcardlist = ''
consig = ''
card = ''
print('CPF Invalido')
nomes.append(nome)
idades.append(age)
beneficios.append(beneficio)
concessoes.append(concessao)
salarios.append(salario)
bancoss.append(bankslist)
bancoscard.append(bcardlist)
consigs.append(consig)
cards.append(card)
return nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards
编辑cpf_updater.py
from k_bot import BOT
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time
from gspread.exceptions import APIError
class CpfSearch(object):
def __init__(self, spreadsheet_name):
self.cpf_col = 1
self.nome_col = 2
self.age_col = 3
self.beneficio_col = 4
self.concessao_col = 5
self.salario_col = 6
self.bancos_col = 7
self.bancocard_col = 9
self.consig_col = 10
self.card_col = 16
scope = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive.readonly']
creds = ServiceAccountCredentials.from_json_keyfile_name('CONSULTAS.json', scope)
client = gspread.authorize(creds)
self.sheet = client.open(spreadsheet_name).sheet1
def process_cpf_list(self):
# SKIP OVER COLUMN HEADING IN THE SPREADSHEET
cpfs = self.sheet.col_values(self.cpf_col)[1:]
bot_url = BOT()
for cpf in self.cpfs:
nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards = bot_url.search_cpfs()
# UPDATE THE SHEET
print("Atualizando...")
for cpfs in range(len(nomes)):
try:
self.sheet.update_cell(cpfs + 2, self.nome_col, nomes[cpfs])
self.sheet.update_cell(cpfs + 2, self.age_col, idades[cpfs])
self.sheet.update_cell(cpfs + 2, self.beneficio_col, beneficios[cpfs])
self.sheet.update_cell(cpfs + 2, self.concessao_col, concessoes[cpfs])
self.sheet.update_cell(cpfs + 2, self.salario_col, salarios[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancos_col, bancoss[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancocard_col, bancoscard[cpfs])
self.sheet.update_cell(cpfs + 2, self.consig_col, consigs[cpfs])
self.sheet.update_cell(cpfs + 2, self.card_col, cards[cpfs])
self.sheet.add_rows(self.sheet)
print('Cliente atualizado!')
except APIError:
print('Esperando para atualizar...')
time.sleep(100)
continue
cpf_updater = CpfSearch('TESTE')
cpf_updater.process_cpf_list()
很快:您应该将 for cpf in self.cpfs:
从第一个脚本移动到第二个脚本。
在第一个脚本中你应该有函数
def search_cpfs(self, cpf):
只搜索一个 cpf
。
所以你必须从 search_cpfs()
和 运行 Bot()
中删除 for cpf in self.cpfs:
而没有 cpfs
但是当你的 运行 search_cpfs()
.
在第二个脚本中,您应该使用此 for
循环到具有不同值的 运行 search_cpfs(cpf)
bot_url = BOT()
for cpf in cpfs:
...variables... = bot_url.search_cpfs(cpf)
# UPDATE THE SHEET
print("Atualizando...")
编辑:
在 class BOT()
中你必须使用 __init__(self)
没有 cpfs
和没有 self.cpfs = cpfs
因为 search_cpfs(self, cpf):
只搜索一项,所以你可以使用名称 search_cpf
而不用 s
(但这不是强制性的)并且你不需要列表
nomes = []
idades = []
beneficios = []
concessoes = []
salarios = []
bancoss = []
bancoscard = []
consigs = []
cards = []
但是你可以return直接得到结果
return nome, idade, beneficio, concessoe, salario, bancos, bancocard, consig, card
在process_cpf_list
中你必须加入两个for
循环
for cpf in cpfs:
# code 1
nomes = ...
for cpfs in range(len(nomes)):
# code 2
self.sheet.update_cell(cpfs + 2, self.nome_col, nomes[cpfs])
创建一个 for
-loop
for row, cpf in enumerate(cpfs):
# code 1
nomes, idades, ... = BOT.search_cpfs()
# code 2
self.sheet.update_cell(row + 2, self.nome_col, nomes[row])
self.sheet.update_cell(row + 2, self.age_col, idades[row])
我将使用名称 row
而不是第二个 cpfs
以使其更具可读性。
因为 search_cpfs
给我一个结果列表(而不是列表列表),所以我可以使用 nome
而不是 nomes[cpfs]
然后我可以使用 row = row + 2
for row, cpf in enumerate(cpfs):
# code 1
nome, idade, ... = BOT.search_cpfs(cpf)
# code 2
row = row + 2
self.sheet.update_cell(row, self.nome_col, nome)
self.sheet.update_cell(row, self.age_col, idade)
我什至可以使用 enumerate(cpfs, 2)
而不是 row = row + 2
完整代码 - 未测试
k_bot.py
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import UnexpectedAlertPresentException
import re
import time
class BOT(object):
def __init__(self):
# SETUP FOR URL
self.bot_url = 'http://www.3kplus.net/'
self.profile = webdriver.FirefoxProfile()
self.options = Options()
self.driver = webdriver.Firefox(firefox_profile=self.profile,
executable_path='C:\Users\MOISA\Documents\geckodriver.exe',
options=self.options)
# NAVIGATE TO URL
self.driver.get(self.bot_url)
login_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[2]/input')
login_box.send_keys('daiane')
pass_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[3]/input')
pass_box.send_keys('789456')
login_btn = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/button')
login_btn.click()
def search_cpf(self, cpf):
print(f"Procurando {cpf}.")
self.driver.get(self.bot_url)
self.delay = 3 # seconds
# SEARCH CLIENT CODE
try:
cpf_input = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[1]/input')
cpf_input.send_keys(cpf)
cpf_btn = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
cpf_btn.click()
cpf_btn.click()
time.sleep(2)
# CLIENT CODE IS VALID
# CLIENT CODE HAVE NOTIFICATION
if self.driver.find_element_by_xpath('//*[@id="notification"]').is_displayed():
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[5]/span/b ").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print('NOTIFICACAO')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# CLIENT CODE DOESN'T HAVE NOTIFICATION
else:
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[5]/span/b").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# IF THE CLIENT CODE IS WRONG
except (NoSuchElementException, UnexpectedAlertPresentException):
nome = ''
idade = ''
age = ''
concessao = ''
salario = ''
bancos = ''
bancosw = ''
bankslist = ''
bancocard = ''
bcardw = ''
bcardlist = ''
consig = ''
card = ''
print('CPF Invalido')
return nome, idade, beneficio, concessoe, salario, bancos, bancocard, consig, card
cpf_updater.py
from k_bot import BOT
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time
from gspread.exceptions import APIError
class CpfSearch(object):
def __init__(self, spreadsheet_name):
self.cpf_col = 1
self.nome_col = 2
self.age_col = 3
self.beneficio_col = 4
self.concessao_col = 5
self.salario_col = 6
self.bancos_col = 7
self.bancocard_col = 9
self.consig_col = 10
self.card_col = 16
scope = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive.readonly']
creds = ServiceAccountCredentials.from_json_keyfile_name('CONSULTAS.json', scope)
client = gspread.authorize(creds)
self.sheet = client.open(spreadsheet_name).sheet1
def process_cpf_list(self):
# SKIP OVER COLUMN HEADING IN THE SPREADSHEET
cpfs = self.sheet.col_values(self.cpf_col)[1:]
bot_url = BOT()
for row, cpf in enumerate(cpfs): # if you use `enumerate(cpfs, 2)` then you don't need `row = row + 2`
#old version gives many results
# nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards = bot_url.search_cpfs()
# new version gives only one result
nome, idade, beneficio, concessoe, salario, bancos, bancocard, consig, card = bot_url.search_cpfs(cpf)
# UPDATE THE SHEET
print("Atualizando...")
try:
row = row + 2
self.sheet.update_cell(row, self.nome_col, nome)
self.sheet.update_cell(row, self.age_col, idade)
self.sheet.update_cell(row, self.beneficio_col, beneficio)
self.sheet.update_cell(row, self.concessao_col, concessoe)
self.sheet.update_cell(row, self.salario_col, salario)
self.sheet.update_cell(row, self.bancos_col, bancos)
self.sheet.update_cell(row, self.bancocard_col, bancocard)
self.sheet.update_cell(row, self.consig_col, consig)
self.sheet.update_cell(row, self.card_col, card)
print('Cliente atualizado!')
except APIError:
print('Esperando para atualizar...')
time.sleep(100)
continue
cpf_updater = CpfSearch('TESTE')
cpf_updater.process_cpf_list()
我有两个代码可以抓取基本上是搜索引擎的页面。它从 google sheet 读取信息,在 URL 上搜索,获取一些信息,然后将它们写入 sheet.
问题是我使用了两个代码,第二个是将信息写入 google sheet.
第一个代码执行所有搜索,然后当每个搜索完成时,第二个代码开始将获取的信息写入 google sheets。
我想做的是搜索一个然后写,搜索第二个然后写.....我尝试了不同的方法,但这是我的第一个代码和我第一次编程,所以我很挣扎
k_bot.py(网页抓取工具)
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import UnexpectedAlertPresentException
import re
import time
class BOT(object):
def __init__(self, cpfs):
# SETUP FOR URL
self.bot_url = 'http://www.3kplus.net/'
self.cpfs = cpfs
self.profile = webdriver.FirefoxProfile()
self.options = Options()
self.driver = webdriver.Firefox(firefox_profile=self.profile,
executable_path='C:\Users\MOISA\Documents\geckodriver.exe',
options=self.options)
# NAVIGATE TO URL
self.driver.get(self.bot_url)
login_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[2]/input')
login_box.send_keys('daiane')
pass_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[3]/input')
pass_box.send_keys('789456')
login_btn = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/button')
login_btn.click()
def search_cpfs(self):
# SEARCH THROUGH THE LIST OF CLIENT CODES (1ST COLUMN OF THE SPREADSHEET), AND OBTAIN THESE INFOS
nomes = []
idades = []
beneficios = []
concessoes = []
salarios = []
bancoss = []
bancoscard = []
consigs = []
cards = []
for cpf in self.cpfs:
print(f"Procurando {cpf}.")
self.driver.get(self.bot_url)
self.delay = 3 # seconds
# SEARCH CLIENT CODE
try:
cpf_input = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[1]/input')
cpf_input.send_keys(cpf)
cpf_btn = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
cpf_btn.click()
cpf_btn.click()
time.sleep(2)
# CLIENT CODE IS VALID
# CLIENT CODE HAVE NOTIFICATION
if self.driver.find_element_by_xpath('//*[@id="notification"]').is_displayed():
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[5]/span/b ").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print('NOTIFICACAO')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# CLIENT CODE DOESN'T HAVE NOTIFICATION
else:
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[5]/span/b").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# IF THE CLIENT CODE IS WRONG
except (NoSuchElementException, UnexpectedAlertPresentException):
nome = ''
idade = ''
age = ''
concessao = ''
salario = ''
bancos = ''
bancosw = ''
bankslist = ''
bancocard = ''
bcardw = ''
bcardlist = ''
consig = ''
card = ''
print('CPF Invalido')
nomes.append(nome)
idades.append(age)
beneficios.append(beneficio)
concessoes.append(concessao)
salarios.append(salario)
bancoss.append(bankslist)
bancoscard.append(bcardlist)
consigs.append(consig)
cards.append(card)
return nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards
cpf_updater.py(信息 google sheet 作者)
from k_bot import BOT
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time
from gspread.exceptions import APIError
class CpfSearch(object):
def __init__(self, spreadsheet_name):
self.cpf_col = 1
self.nome_col = 2
self.age_col = 3
self.beneficio_col = 4
self.concessao_col = 5
self.salario_col = 6
self.bancos_col = 7
self.bancocard_col = 9
self.consig_col = 10
self.card_col = 16
scope = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive.readonly']
creds = ServiceAccountCredentials.from_json_keyfile_name('CONSULTAS.json', scope)
client = gspread.authorize(creds)
self.sheet = client.open(spreadsheet_name).sheet1
def process_cpf_list(self):
# SKIP OVER COLUMN HEADING IN THE SPREADSHEET
cpfs = self.sheet.col_values(self.cpf_col)[1:]
bot_url = BOT(cpfs)
nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards = bot_url.search_cpfs()
# UPDATE THE SHEET
print("Atualizando...")
for cpfs in range(len(nomes)):
try:
self.sheet.update_cell(cpfs + 2, self.nome_col, nomes[cpfs])
self.sheet.update_cell(cpfs + 2, self.age_col, idades[cpfs])
self.sheet.update_cell(cpfs + 2, self.beneficio_col, beneficios[cpfs])
self.sheet.update_cell(cpfs + 2, self.concessao_col, concessoes[cpfs])
self.sheet.update_cell(cpfs + 2, self.salario_col, salarios[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancos_col, bancoss[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancocard_col, bancoscard[cpfs])
self.sheet.update_cell(cpfs + 2, self.consig_col, consigs[cpfs])
self.sheet.update_cell(cpfs + 2, self.card_col, cards[cpfs])
print('Cliente atualizado!')
except APIError:
print('Esperando para atualizar...')
time.sleep(100)
continue
cpf_updater = CpfSearch('TESTE')
cpf_updater.process_cpf_list()
编辑k_bot.py
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import UnexpectedAlertPresentException
import re
import time
class BOT(object):
def __init__(self, cpfs):
# SETUP FOR URL
self.bot_url = 'http://www.3kplus.net/'
self.cpfs = cpfs
self.profile = webdriver.FirefoxProfile()
self.options = Options()
self.driver = webdriver.Firefox(firefox_profile=self.profile,
executable_path='C:\Users\MOISA\Documents\geckodriver.exe',
options=self.options)
# NAVIGATE TO URL
self.driver.get(self.bot_url)
login_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[2]/input')
login_box.send_keys('daiane')
pass_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[3]/input')
pass_box.send_keys('789456')
login_btn = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/button')
login_btn.click()
def search_cpfs(self, cpf):
# SEARCH THROUGH THE LIST OF CLIENT CODES (1ST COLUMN OF THE SPREADSHEET), AND OBTAIN THESE INFOS
nomes = []
idades = []
beneficios = []
concessoes = []
salarios = []
bancoss = []
bancoscard = []
consigs = []
cards = []
print(f"Procurando {cpf}.")
self.driver.get(self.bot_url)
self.delay = 3 # seconds
# SEARCH CLIENT CODE
try:
cpf_input = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[1]/input')
cpf_input.send_keys(cpf)
cpf_btn = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
cpf_btn.click()
cpf_btn.click()
time.sleep(2)
# CLIENT CODE IS VALID
# CLIENT CODE HAVE NOTIFICATION
if self.driver.find_element_by_xpath('//*[@id="notification"]').is_displayed():
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[5]/span/b ").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print('NOTIFICACAO')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# CLIENT CODE DOESN'T HAVE NOTIFICATION
else:
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[5]/span/b").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# IF THE CLIENT CODE IS WRONG
except (NoSuchElementException, UnexpectedAlertPresentException):
nome = ''
idade = ''
age = ''
concessao = ''
salario = ''
bancos = ''
bancosw = ''
bankslist = ''
bancocard = ''
bcardw = ''
bcardlist = ''
consig = ''
card = ''
print('CPF Invalido')
nomes.append(nome)
idades.append(age)
beneficios.append(beneficio)
concessoes.append(concessao)
salarios.append(salario)
bancoss.append(bankslist)
bancoscard.append(bcardlist)
consigs.append(consig)
cards.append(card)
return nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards
编辑cpf_updater.py
from k_bot import BOT
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time
from gspread.exceptions import APIError
class CpfSearch(object):
def __init__(self, spreadsheet_name):
self.cpf_col = 1
self.nome_col = 2
self.age_col = 3
self.beneficio_col = 4
self.concessao_col = 5
self.salario_col = 6
self.bancos_col = 7
self.bancocard_col = 9
self.consig_col = 10
self.card_col = 16
scope = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive.readonly']
creds = ServiceAccountCredentials.from_json_keyfile_name('CONSULTAS.json', scope)
client = gspread.authorize(creds)
self.sheet = client.open(spreadsheet_name).sheet1
def process_cpf_list(self):
# SKIP OVER COLUMN HEADING IN THE SPREADSHEET
cpfs = self.sheet.col_values(self.cpf_col)[1:]
bot_url = BOT()
for cpf in self.cpfs:
nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards = bot_url.search_cpfs()
# UPDATE THE SHEET
print("Atualizando...")
for cpfs in range(len(nomes)):
try:
self.sheet.update_cell(cpfs + 2, self.nome_col, nomes[cpfs])
self.sheet.update_cell(cpfs + 2, self.age_col, idades[cpfs])
self.sheet.update_cell(cpfs + 2, self.beneficio_col, beneficios[cpfs])
self.sheet.update_cell(cpfs + 2, self.concessao_col, concessoes[cpfs])
self.sheet.update_cell(cpfs + 2, self.salario_col, salarios[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancos_col, bancoss[cpfs])
self.sheet.update_cell(cpfs + 2, self.bancocard_col, bancoscard[cpfs])
self.sheet.update_cell(cpfs + 2, self.consig_col, consigs[cpfs])
self.sheet.update_cell(cpfs + 2, self.card_col, cards[cpfs])
self.sheet.add_rows(self.sheet)
print('Cliente atualizado!')
except APIError:
print('Esperando para atualizar...')
time.sleep(100)
continue
cpf_updater = CpfSearch('TESTE')
cpf_updater.process_cpf_list()
很快:您应该将 for cpf in self.cpfs:
从第一个脚本移动到第二个脚本。
在第一个脚本中你应该有函数
def search_cpfs(self, cpf):
只搜索一个 cpf
。
所以你必须从 search_cpfs()
和 运行 Bot()
中删除 for cpf in self.cpfs:
而没有 cpfs
但是当你的 运行 search_cpfs()
.
在第二个脚本中,您应该使用此 for
循环到具有不同值的 运行 search_cpfs(cpf)
bot_url = BOT()
for cpf in cpfs:
...variables... = bot_url.search_cpfs(cpf)
# UPDATE THE SHEET
print("Atualizando...")
编辑:
在 class BOT()
中你必须使用 __init__(self)
没有 cpfs
和没有 self.cpfs = cpfs
因为 search_cpfs(self, cpf):
只搜索一项,所以你可以使用名称 search_cpf
而不用 s
(但这不是强制性的)并且你不需要列表
nomes = []
idades = []
beneficios = []
concessoes = []
salarios = []
bancoss = []
bancoscard = []
consigs = []
cards = []
但是你可以return直接得到结果
return nome, idade, beneficio, concessoe, salario, bancos, bancocard, consig, card
在process_cpf_list
中你必须加入两个for
循环
for cpf in cpfs:
# code 1
nomes = ...
for cpfs in range(len(nomes)):
# code 2
self.sheet.update_cell(cpfs + 2, self.nome_col, nomes[cpfs])
创建一个 for
-loop
for row, cpf in enumerate(cpfs):
# code 1
nomes, idades, ... = BOT.search_cpfs()
# code 2
self.sheet.update_cell(row + 2, self.nome_col, nomes[row])
self.sheet.update_cell(row + 2, self.age_col, idades[row])
我将使用名称 row
而不是第二个 cpfs
以使其更具可读性。
因为 search_cpfs
给我一个结果列表(而不是列表列表),所以我可以使用 nome
而不是 nomes[cpfs]
然后我可以使用 row = row + 2
for row, cpf in enumerate(cpfs):
# code 1
nome, idade, ... = BOT.search_cpfs(cpf)
# code 2
row = row + 2
self.sheet.update_cell(row, self.nome_col, nome)
self.sheet.update_cell(row, self.age_col, idade)
我什至可以使用 enumerate(cpfs, 2)
而不是 row = row + 2
完整代码 - 未测试
k_bot.py
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import UnexpectedAlertPresentException
import re
import time
class BOT(object):
def __init__(self):
# SETUP FOR URL
self.bot_url = 'http://www.3kplus.net/'
self.profile = webdriver.FirefoxProfile()
self.options = Options()
self.driver = webdriver.Firefox(firefox_profile=self.profile,
executable_path='C:\Users\MOISA\Documents\geckodriver.exe',
options=self.options)
# NAVIGATE TO URL
self.driver.get(self.bot_url)
login_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[2]/input')
login_box.send_keys('daiane')
pass_box = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/div[3]/input')
pass_box.send_keys('789456')
login_btn = self.driver.find_element_by_xpath('//*[@id="login"]/div[3]/div[2]/button')
login_btn.click()
def search_cpf(self, cpf):
print(f"Procurando {cpf}.")
self.driver.get(self.bot_url)
self.delay = 3 # seconds
# SEARCH CLIENT CODE
try:
cpf_input = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[1]/input')
cpf_input.send_keys(cpf)
cpf_btn = self.driver.find_element_by_xpath('//*[@id="search"]/div/div[2]/button')
cpf_btn.click()
cpf_btn.click()
time.sleep(2)
# CLIENT CODE IS VALID
# CLIENT CODE HAVE NOTIFICATION
if self.driver.find_element_by_xpath('//*[@id="notification"]').is_displayed():
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[5]/span/b ").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[3]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print('NOTIFICACAO')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# CLIENT CODE DOESN'T HAVE NOTIFICATION
else:
nome = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/h2").text
idade = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[1]/ul/li[2]").text
age = re.search(r'\((.*?)Anos', idade).group(1)
beneficio = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[5]/span/b").text
concessao = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[1]/div[2]/div[2]/span").text
salario = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[2]/div/div[3]/div[1]/div[1]/span").text
bancos = self.driver.find_element_by_xpath('//*[@id="loans"]').text
bancosw = re.findall(r'(?<=Banco )(\w+)', bancos)
bankslist = ', '.join(bancosw)
bancocard = self.driver.find_element_by_xpath('//*[@id="cards"]').text
bcardw = re.findall(r'(?<=Banco )(\w+)', bancocard)
bcardlist = ', '.join(bcardw)
consig = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[2]/span").text
card = self.driver.find_element_by_xpath(
"/html/body/main[1]/div[1]/div[1]/div[3]/div[3]/span").text
print('CPF Valido')
print(nome, age, beneficio, concessao, salario, bankslist, bcardlist, consig, card)
# IF THE CLIENT CODE IS WRONG
except (NoSuchElementException, UnexpectedAlertPresentException):
nome = ''
idade = ''
age = ''
concessao = ''
salario = ''
bancos = ''
bancosw = ''
bankslist = ''
bancocard = ''
bcardw = ''
bcardlist = ''
consig = ''
card = ''
print('CPF Invalido')
return nome, idade, beneficio, concessoe, salario, bancos, bancocard, consig, card
cpf_updater.py
from k_bot import BOT
import gspread
from oauth2client.service_account import ServiceAccountCredentials
import time
from gspread.exceptions import APIError
class CpfSearch(object):
def __init__(self, spreadsheet_name):
self.cpf_col = 1
self.nome_col = 2
self.age_col = 3
self.beneficio_col = 4
self.concessao_col = 5
self.salario_col = 6
self.bancos_col = 7
self.bancocard_col = 9
self.consig_col = 10
self.card_col = 16
scope = ['https://www.googleapis.com/auth/spreadsheets',
'https://www.googleapis.com/auth/drive.readonly']
creds = ServiceAccountCredentials.from_json_keyfile_name('CONSULTAS.json', scope)
client = gspread.authorize(creds)
self.sheet = client.open(spreadsheet_name).sheet1
def process_cpf_list(self):
# SKIP OVER COLUMN HEADING IN THE SPREADSHEET
cpfs = self.sheet.col_values(self.cpf_col)[1:]
bot_url = BOT()
for row, cpf in enumerate(cpfs): # if you use `enumerate(cpfs, 2)` then you don't need `row = row + 2`
#old version gives many results
# nomes, idades, beneficios, concessoes, salarios, bancoss, bancoscard, consigs, cards = bot_url.search_cpfs()
# new version gives only one result
nome, idade, beneficio, concessoe, salario, bancos, bancocard, consig, card = bot_url.search_cpfs(cpf)
# UPDATE THE SHEET
print("Atualizando...")
try:
row = row + 2
self.sheet.update_cell(row, self.nome_col, nome)
self.sheet.update_cell(row, self.age_col, idade)
self.sheet.update_cell(row, self.beneficio_col, beneficio)
self.sheet.update_cell(row, self.concessao_col, concessoe)
self.sheet.update_cell(row, self.salario_col, salario)
self.sheet.update_cell(row, self.bancos_col, bancos)
self.sheet.update_cell(row, self.bancocard_col, bancocard)
self.sheet.update_cell(row, self.consig_col, consig)
self.sheet.update_cell(row, self.card_col, card)
print('Cliente atualizado!')
except APIError:
print('Esperando para atualizar...')
time.sleep(100)
continue
cpf_updater = CpfSearch('TESTE')
cpf_updater.process_cpf_list()