日期时间对象比较

Datetime object compare

我正在尝试查找最近 2 小时内发布的最新新闻。

方法

我的目标是获取今天的 datetime 对象并将其与我从网上抓取的文章的日期进行比较。

首先我按日期比较日期时间,然后按小时比较。

问题

然而,即使给出了正确的日期,它似乎也说它不在正确的范围内。

假失败:

代码

from bs4 import BeautifulSoup
from urllib.request import urlopen
from urllib.request import Request
from datetime import timedelta
from datetime import datetime


def newz(stock_1):
    list_now=stock_1
    #list_now=chr(list_now)
    new_list=list_now
    list_now=[list_now]
    print("Stock:{}".format(list_now))
    n = 1 #the # of article headlines displayed per ticker
    tickers= list_now

    new_words = {
            'Insider Sells':-3.4,
            'common':2.0,
            'up':3.4,
            'bankruptcy':-3.4,
            'underperforms':-3.4,
            'overperforms':3.4,
            'outperforms':3.4,
            'overbought':-3.4,
            'oversold':3.4,
            'down':2.0,

            }   

    finviz_url = 'https://finviz.com/quote.ashx?t='
    news_tables = {}

    for ticker in tickers:
        url = finviz_url + ticker
        req = Request(url=url,headers={'user-agent': 'my-app/0.0.1'}) 
        resp = urlopen(req)    
        html = BeautifulSoup(resp, features="lxml")
        #print(html)
        news_table = html.find(id='news-table')
        news_tables[ticker] = news_table

    try:
        for ticker in tickers:
            df = news_tables[ticker]
            df_tr = df.findAll('tr')
    
            print ('\n')
            print ('Recent News Headlines for {}: '.format(ticker))
        
            for i, table_row in enumerate(df_tr):
                a_text = table_row.a.text
                td_text = table_row.td.text
                td_text = td_text.strip()
                print("{0}  {1}".format(a_text,td_text))
                td_text=str(td_text)
                a_text=str(a_text)
                # looks specfically for investigation or shareholder alert or investigation alert
                result=a_text.find("INVESTIGATION")
                result=int(result)

                result_1=a_text.find("SHAREHOLDER")
                result_1=int(result_1)

                result_2=a_text.find("ALERT")
                result_2=int(result_2)

                result_3=a_text.find("INVESTOR")
                result_3=int(result_3)

                result_4=a_text.find("NOTICE")
                result_4=int(result_4)



                if (result>=0 or result_1>=0) and result_2>=0:
                    print("Fails: Under Investigation")
                    return True
                elif result>=0 :
                    print("Fails: Under Investigation")
                    return True
                elif result_3>=0 and result_2>=0 :
                    print("Fails: Under Investigation")

                elif result_1>=0 and result_4>=0 :
                    print("Fails: Under Investigation")

                elif result_1>=0 and result_2>=0 :
                    print("Fails: Under Investigation")

                elif result==-1 and result_1==-1 and result_2==-1:
                    pass                    
                       

                if i == n-1:
                    break
    except AttributeError:
        return True
            
    
    td_text=td_text.split("-",2)
    #print("A",td_text)
    month=td_text[0]
    day=td_text[1]
    year=td_text[2]

  
   

    months={'Jan': 1, 'Feb':2, 'Mar': 3, 'Apr':4,'May':5, 'Jun': 6, 'Jul':7,'Aug':8,'Sep':9,'Oct':10, 'Nov':11,'Dec':12}
    month=months.pop(month)
    #print("MOnth",month)
    #print("Year",year)
    #print("Day",day)
    hour=year[3:5]
    #print("Hour data",hour)
    meridian=year[-1:-2]
    #print("Meridian",meridian)
    year=year[0:2]
    year=str(year)
    day=str(day)
    month=str(month)
    year="20"+year


    
    # date we are stripping from web
    t=year+'-'+month+'-'+day+'-'+hour
    #print(t)
    t_hour = datetime.strptime(hour, '%H')
    t = datetime.strptime(t, '%Y-%m-%d-%H')
    #print(t)
   
    
    
        
    
    # todays date in datetime object
    today =datetime.today().strftime('%Y-%m-%d-%H')
    today = datetime.strptime(today, '%Y-%m-%d-%H')

    
    hr_margin=timedelta(hours= 2)

    margin = timedelta(days = 1)
        

    

    #print( "Earnings date:{} ".format(t))
    #print("Today:",today)

    diff_minus=today - margin
    #print(diff_minus)
    diff_plus=today + margin

    diff_hr_plus= today +hr_margin
    diff_hr_minus= today-hr_margin
    #print(diff_plus)

    #t_hour=t_hour[10::]

    if date_comp(t,diff_plus,diff_minus,diff_hr_plus,diff_hr_minus,t_hour,today):
        return True



def date_comp(t,diff_plus,diff_minus,diff_hr_plus,diff_hr_minus,t_hour,today):      

if diff_plus<= t <= diff_plus:
   print("Day is good")
   print("Max allowed date {}".format(diff_plus))
   print("Min allowed date {}".format(diff_minus))
   print('Stripped Datetime {}'.format(t))
   
   #print("Measured time hr",t_hour)
   if  diff_hr_minus<=t_hour <=diff_hr_plus:
       print("Hour is good")
       print("Max allowed Hr {}".format(diff_hr_plus))
       print("Min allowed Hr {}".format(diff_hr_minus))
       print('Stripped Datetime {}'.format(t))
       
       print("News is up to date by Hour!! Time :{0} Story Hit: {1}".format(today,t))
       print("\n")
       return False
   else:
       print("News is NOT up to date by Hour!! Time :{0} Story Hit: {1}".format(today,t))
       print("Max allowed Hr {}".format(diff_hr_plus))
       print("Min allowed Hr {}".format(diff_hr_minus))
       print("\n")
       return True

else:
    print("News is NOT up to date by Day!! {} ".format(t))
    print("Max allowed date {}".format(diff_plus))
    print("Min allowed date {}".format(diff_minus))
    print("\n")
    return True

看看我如何通过几个步骤将您给定的代码简化为 minimal reproducible example

  1. 删除 web-scraping(因为对问题而言不是必需的)
  2. 去掉所有不解释的评论
  3. 删除对结构没有帮助的空行
  4. (可选)添加显示问题的测试(例如 function-call)

已解决问题

由于您声称日期时间比较存在问题,我发现了一个不一致之处:

在日比较中你有:

if diff_plus<= t <= diff_plus:

小时比较你有:

   if  diff_hr_minus<=t_hour <=diff_hr_plus:

需要解决的问题:

  • 将天的下限调整为diff_min
  • 可以使用 strptime 和适当的 格式文字:
  • 简化 date-parsing
  • 只需将 hour 与 24-hour-range 整数进行比较,使用 t.hour

最小可重现示例

from datetime import timedelta
from datetime import datetime

def compared_date_from_td(td_text):    
    """Returns datetime for input of format 'Jan-24-22 05:48PM'."""
    # date from web
    t = datetime.strptime(td_text.strip(), '%b-%d-%y %H:%M%p')
    print("Earnings date: {} ".format(t))
    
    # todays date in datetime object
    today = datetime.today()
    print("Today: {}".format(today))

    margin = timedelta(days = 1)
    diff_minus = today - margin
    diff_plus = today + margin

    t_hour = t.hour  # get the hour part of datetime t

    diff_hr_plus = today.hour + 2
    diff_hr_minus = today.hour - 2

    if date_comp(t,diff_plus,diff_minus,diff_hr_plus,diff_hr_minus,t_hour,today):
        return True


def date_comp(t,diff_plus,diff_minus,diff_hr_plus,diff_hr_minus,t_hour,today):      
    if diff_minus <= t <= diff_plus:  # issue fixed! was: diff_plus<= t <= diff_plus
        print("Day is good")
        print("Max allowed date {}".format(diff_plus))
        print("Min allowed date {}".format(diff_minus))
        print('Stripped Datetime {}'.format(t))

        #print("Measured time hr",t_hour)
        if  diff_hr_minus <= t_hour <= diff_hr_plus:
           print("Hour is good")
           print("Max allowed Hr {}".format(diff_hr_plus))
           print("Min allowed Hr {}".format(diff_hr_minus))
           print('Stripped Datetime {}'.format(t))
           
           print("News is up to date by Hour!! Time :{0} Story Hit: {1}".format(today,t))
           print("\n")
           return False
        else:
           print("News is NOT up to date by Hour!! Time :{0} Story Hit: {1}".format(today,t))
           print("Max allowed Hr {}".format(diff_hr_plus))
           print("Min allowed Hr {}".format(diff_hr_minus))
           print("\n")
           return True

    else:
        print("News is NOT up to date by Day!! {} ".format(t))
        print("Max allowed date {}".format(diff_plus))
        print("Min allowed date {}".format(diff_minus))
        print("\n")
        return True


date_text = 'Jan-24-22 05:48PM'
not_uptodate = compared_date_from_td(date_text)
print("date: {}, compared as not_uptodate => {}".format(date_text, not_uptodate))

这输出:

Earnings date: 2022-01-24 05:48:00 
Today: 2022-01-25 00:54:16.122160
Day is good
Max allowed date 2022-01-26 00:54:16.122160
Min allowed date 2022-01-24 00:54:16.122160
Stripped Datetime 2022-01-24 05:48:00
News is NOT up to date by Hour!! Time :2022-01-25 00:54:16.122160 Story Hit: 2022-01-24 05:48:00
Max allowed Hr 2
Min allowed Hr -2


date: Jan-24-22 05:48PM, compared as not_uptodate => True

注意:某些格式需要调整才能有意义,例如 Max allowed Hr 2 等。还有比较 returns True if outside margins as "NOT up to date" (就像在上面的例子中,小时相差超过 2 小时)。

正确的解决方案

我的目标是否正确:测试日期时间是否在最近 2 小时内。

(1) 给定一段从网络上抓取的文本,您将确定格式并将其解析为日期时间。

(2) 给定日期时间,您将计算到现在的时间增量。然后测试这个是否小于2小时:

from datetime import datetime, timedelta

scrapedText = 'Jan-24-22 05:48PM'
newsTime = datetime.strptime(scrapedText, '%b-%d-%y %H:%M%p')  # (1) parse datetime

if datetime.now() - newsTime < timedelta(hours=2):  # (2) whithin last 2 hours
    # news is less than 2 hours ago

另请参阅: How to find information from the last 24 hours