如何添加计时器来计算我的代码执行

How to add a timer to calculate my code execution

我写了一个 python 脚本,它从 aliexpress 抓取产品。

这是我的代码:

from selenium.webdriver.edge.options import Options  
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver  
from pymongo import MongoClient
from time import sleep
from lxml import html 
import pandas as pd
import cssselect
import pymongo
import json 
import csv 


options = Options()
options.headless = True
driver = webdriver.Edge(executable_path=r"C:\Users\aicha\Desktop\mycode\aliexpress_scrap\scrap\codes\msedgedriver",options=options)
url = 'https://www.aliexpress.com/wholesale?trafficChannel=main&d=y&CatId=0&SearchText=bluetooth+earphones&ltype=wholesale&SortType=default&page={}'
baseurl = 'https://www.aliexpress.com'

for page_nb in range(1, 2):
    print('---', page_nb, '---')
    
    driver.get(url.format(page_nb))
    sleep(2)
    current_offset = 0
    while True:
        driver.execute_script("window.scrollBy(0, window.innerHeight);")
        sleep(.5)  # JavaScript has time to add elements
        new_offset = driver.execute_script("return window.pageYOffset;")
        print(new_offset,current_offset)
        if new_offset <= current_offset:
            break
        current_offset = new_offset
    
    sleep(3)
    
    tree = html.fromstring(driver.page_source)
    
    results = []
    
    for product in tree.xpath('//div[@class="JIIxO"]//a'):
        title = product.xpath('.//h1/text()')
        
        if title:
            title = title[0]
            
            price = product.cssselect('div.mGXnE._37W_B span')
            price = [x.text for x in price]

            currency = price[0]
            price = ''.join(price[1:])
            stars = product.xpath('.//span[@class="eXPaM"]/text()')
            if stars :
                stars  = stars [0]
            else:
                stars  = 'None'
                
            nb_sold = product.xpath('.//span[@class="_1kNf9"]/text()')
            if nb_sold:
                nb_sold = nb_sold[0]
            else:
                nb_sold = 'None'
            supl = product.xpath('.//a[@class="ox0KZ"]/text()')
            if supl:
                supl = supl[0]
            else:
                supl = 'None'

            ship_cost = product.xpath('.//span[@class="_2jcMA"]/text()')
            if ship_cost:
                ship_cost = ship_cost[0]
            else:
                ship_cost = 'None'
            
            product_links = product.xpath('./@href')
            if product_links:
                product_links = str(baseurl) + str( product_links[0])
            
            row = [title, price, currency, stars, nb_sold, ship_cost, supl, product_links]
            results.append(row)
            print('len(results):', len(results))

    driver.close()
df = pd.DataFrame(results , columns=("Title","Price", "Currency", "Stars", "Orders", "Shipcost", "Supplier", "Productlinks" ))

####### Insert in database #############
client = MongoClient("mongodb://localhost:27017/")     
collection = client['db2']['aliex2']     
data = df.to_dict(orient = 'records')     
collection.insert_many(data) 

我的问题:

我需要的是添加一个计时器来计算处理时间和 returns 一个值来知道代码需要多少时间。 而且我还想要一种方法,以便在每次抓取后获取新集合,因为当我第二次 运行 我的代码时,我使用旧集合获取数据。

感谢您的帮助。谢谢!

希望您的问题在下面的代码中得到解决:

from selenium.webdriver.edge.options import Options  
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium import webdriver  
from pymongo import MongoClient
from time import sleep
from lxml import html 
import pandas as pd
import cssselect
import pymongo
import json 
import csv 
import time as Time


options = Options()
options.headless = True
driver = webdriver.Edge(executable_path=r"C:\Users\aicha\Desktop\mycode\aliexpress_scrap\scrap\codes\msedgedriver",options=options)
url = 'https://www.aliexpress.com/wholesale?trafficChannel=main&d=y&CatId=0&SearchText=bluetooth+earphones&ltype=wholesale&SortType=default&page={}'
baseurl = 'https://www.aliexpress.com'

for page_nb in range(1, 2):
    print('---', page_nb, '---')
    
    driver.get(url.format(page_nb))
    sleep(2)
    current_offset = 0
    while True:
        driver.execute_script("window.scrollBy(0, window.innerHeight);")
        sleep(.5)  # JavaScript has time to add elements
        new_offset = driver.execute_script("return window.pageYOffset;")
        print(new_offset,current_offset)
        if new_offset <= current_offset:
            break
        current_offset = new_offset
    
    sleep(3)
    
    tree = html.fromstring(driver.page_source)
    
    results = []
    
    for product in tree.xpath('//div[@class="JIIxO"]//a'):
        start_time = Time.time()
        title = product.xpath('.//h1/text()')
        
        if title:
            title = title[0]
            
            price = product.cssselect('div.mGXnE._37W_B span')
            price = [x.text for x in price]

            currency = price[0]
            price = ''.join(price[1:])
            stars = product.xpath('.//span[@class="eXPaM"]/text()')
            if stars :
                stars  = stars [0]
            else:
                stars  = 'None'
                
            nb_sold = product.xpath('.//span[@class="_1kNf9"]/text()')
            if nb_sold:
                nb_sold = nb_sold[0]
            else:
                nb_sold = 'None'
            supl = product.xpath('.//a[@class="ox0KZ"]/text()')
            if supl:
                supl = supl[0]
            else:
                supl = 'None'

            ship_cost = product.xpath('.//span[@class="_2jcMA"]/text()')
            if ship_cost:
                ship_cost = ship_cost[0]
            else:
                ship_cost = 'None'
            
            product_links = product.xpath('./@href')
            if product_links:
                product_links = str(baseurl) + str( product_links[0])
            diffrence_time = Time.time() - start_time # calculate time taken by program
            
            row = [title, price, currency, stars, nb_sold, ship_cost, supl, product_links, diffrence_time] #diffrence_time store dataframe
            results.append(row)
            print('len(results):', len(results))

    driver.close()
df = pd.DataFrame(results , columns=("Title","Price", "Currency", "Stars", "Orders", "Shipcost", "Supplier", "Productlinks", "Time Taken"))

####### Insert in database #############
client = MongoClient("mongodb://localhost:27017/")     
collection = client['db2']['aliex2']     
data = df.to_dict(orient = 'records')     
collection.insert_many(data) 

我已经找出程序在循环中花费的时间并将该差异时间存储在数据帧中