如何使用 Python 从 Sqlite 数据库中提取正确的数据?

How to extract correct data from Sqlite database using Python?

我有一个关于人名和生日的数据库。生日的格式是mm/dd/yyyy,比如"3/13/1960".

我想提取在特定日期之后出生的人的列表。我把这个日期命名为 "base".

你在下面看到的程序,首先创建一个人的数据库(模拟我想要使用的真实数据库),然后提取所需的列表。问题是结果和我想的不一样:

import datetime as dt
import peewee as pw
db = pw.SqliteDatabase('people1.db')

class Person(pw.Model):
    name = pw.CharField()
    birthday = pw.DateField(formats=['%m/%d/%Y'])
    class Meta:
        database = db # This model uses the "people.db" database.

db.create_tables([Person])


bob0 = Person(name='Bob0', birthday='4/13/1940')
bob1 = Person(name='Bob1', birthday='5/13/1950')
bob2 = Person(name='Bob2', birthday='3/13/1960')
bob3 = Person(name='Bob3', birthday='3/13/1970')
bob4 = Person(name='Bob4', birthday='3/13/1980')
bob5 = Person(name='Bob5', birthday='3/13/1990')

base = Person(name="base", birthday='3/13/1960')

bob0.save()
bob1.save()
bob2.save()
bob3.save()
bob4.save()
bob5.save()
base.save()

for item in Person.select().where(Person.birthday > base.birthday):
    print item.name , item.birthday

输出:

>>> ================================ RESTART ================================
>>> 
Bob0 1940-04-13
Bob1 1950-05-13
Bob3 1970-03-13
Bob4 1980-03-13
Bob5 1990-03-13
>>> 

正如您在上面看到的,base = 3/13/1960。所以我不应该在输出中有 Bob0Bob1 !我该如何处理?

请注意,我不想更改数据库中生日的格式。我也不想获取所有行并稍后检查它们!我只想获取所需的行。

SQlite 将日期时间存储为字符串。因此,正如其他人在评论和其他 answers 中建议的那样,您应该使用不同的格式来存储日期,以便 "date ordering and lexical ordering work out the same":

import datetime as dt
import peewee as pw

db = pw.SqliteDatabase('people1.db')

class Person(pw.Model):
    name = pw.CharField()
    birthday = pw.DateField(formats=['%Y-%m-%d'])
    class Meta:
        database = db # This model uses the "people.db" database.

db.create_tables([Person])


Person.create(name='Bob0', birthday=dt.date(1940, 4, 13))
Person.create(name='Bob1', birthday=dt.date(1950, 5, 13))
Person.create(name='Bob2', birthday=dt.date(1960, 3, 13))
Person.create(name='Bob3', birthday=dt.date(1970, 3, 13))
Person.create(name='Bob4', birthday=dt.date(1980, 3, 13))
Person.create(name='Bob5', birthday=dt.date(1990, 3, 13))

base = Person.create(name="base", birthday=dt.date(1960, 3, 13))

for item in Person.select().where(Person.birthday > base.birthday):
    print item.name , item.birthday

这给出:

Bob3 1970-03-13
Bob4 1980-03-13
Bob5 1990-03-13

更新

我没有注意到你不想更改数据库的评论。

这是提取部分日期的疯狂方法:

SELECT
    birthday,
    CAST(substr(birthday, 1, instr(birthday, '/') - 1) AS integer),
    CAST(substr(substr(birthday, instr(birthday, '/') + 1), 1, instr(substr(birthday, instr(birthday, '/') + 1), '/') - 1) AS integer),
    CAST(substr(birthday, instr(birthday, '/') + instr(substr(birthday, instr(birthday, '/') + 1), '/') + 1) AS integer)
FROM person

我的测试数据给出:

4/13/1940   4   13  1940
12/13/1950  12  13  1950
3/3/1960    3   3   1960
3/25/1970   3   25  1970
3/13/1980   3   13  1980
3/13/1990   3   13  1990
3/13/1960   3   13  1960

您可以使用这些表达式将它们与给定日期的部分内容进行比较:

query = """
SELECT *
FROM person
WHERE
    (
        substr('0000' || CAST(substr(birthday, instr(birthday, '/') + instr(substr(birthday, instr(birthday, '/') + 1), '/') + 1) AS integer), -4, 4) || '-' || -- year
        substr('00' || CAST(substr(birthday, 1, instr(birthday, '/') - 1) AS integer), -2, 2) || '-' || -- month
        substr('00' || CAST(substr(substr(birthday, instr(birthday, '/') + 1), 1, instr(substr(birthday, instr(birthday, '/') + 1), '/') - 1) AS integer), -2, 2) -- day
    ) > '1960-03-03'
"""
for item in Person.raw(query):
    print item.name, item.birthday

我在这里重建ISO日期并用它来比较。

您可以使用 sqlite3.Connection.create_function 指定您自己的 sqlite 函数,它将您的日期转换为可以按字典顺序排序的内容:

import datetime as dt
import peewee as pw

# custom sqlite function to reformat our date string
def _sqlite_reformat_date(unfortunate_date_string):
    return dt.datetime \
        .strptime(unfortunate_date_string,'%m/%d/%Y') \
        .strftime('%Y-%m-%d')

# Subclass pw.SqliteDatabase to add our custom sqlite function
class MySqliteDatabase(pw.SqliteDatabase):
    def __init__(self, *args, **kwargs):
        super(MySqliteDatabase, self).__init__(*args, **kwargs)

    def _add_conn_hooks(self, conn):
        conn.create_function('reformat_date', 1, _sqlite_reformat_date)
        super(MySqliteDatabase, self)._add_conn_hooks(conn)

db = MySqliteDatabase('people1.db')

# ...
# Your model definition and data inserts from your example above
# ...

rd = pw.fn.reformat_date # Use our custom sqlite function
for item in Person.select().where(rd(Person.birthday) > rd(base.birthday)):
    print item.name , item.birthday

尽管这种方法将 'fetch the required rows only' 它仍然会 运行 这个 python 函数用于每一行!在 python 中进行日期比较比仅获取所有行好一点,甚至可能更慢!

然而,_sqlite_reformat_date 函数可以重构为更快,很高兴知道将自定义函数添加到 sqlite 中是多么容易。

SQLite 将日期存储为字符串。因此,它们应该存储为 YYYY-MM-DD,这样可以确保它们被正确排序。老实说,没有理由不这样做。

如果您查看 sqlite 的文档,它甚至无法识别您使用的格式的日期:

https://www.sqlite.org/lang_datefunc.html

因此,我的建议是更新您存储日期的方式。

否则,创建一个使用 strptime(假设使用 playhouse.sqlite_ext.SqliteExtDatabase)做正确事情的用户定义函数:

@db.func()
def fix_time(s):
    return datetime.datetime.strptime(s, '%m/%d/%Y').strftime('%Y-%m-%d')

如果你想坚持使用常规的旧 SqliteDatabase,你可以调用 sqlite3 方法 connection.create_function

只是 SQL 函数的一些链接看起来很有趣但有效,也许更快。

from datetime import datetime

import peewee as pw

db = pw.SqliteDatabase('people1.db')

class Person(pw.Model):
    name = pw.CharField()
    birthday = pw.DateField(formats=['%m/%d/%Y'])
    class Meta:
        database = db # This model uses the "people.db" database.

db.create_tables([Person])


bob0 = Person(name='Bob0', birthday='4/13/1940')
bob1 = Person(name='Bob1', birthday='5/13/1950')
bob2 = Person(name='Bob2', birthday='3/13/1960')
bob3 = Person(name='Bob3', birthday='3/13/1970')
bob4 = Person(name='Bob4', birthday='3/13/1980')
bob5 = Person(name='Bob5', birthday='3/13/1990')
bob6 = Person(name='Bob6', birthday='12/1/1990')

base = Person(name="base", birthday='3/13/1960')

bob0.save()
bob1.save()
bob2.save()
bob3.save()
bob4.save()
bob5.save()
bob6.save()
base.save()

month = 'substr(birthday,1,instr(birthday,"/")-1)'
iso_month = 'case when length({month}) = 1 then "0" || {month} else {month} end'.format(month=month)
day = 'trim(trim(birthday,"0123456789"),"/")'
iso_day = 'case when length({day}) = 1 then "0" || {day} else {day} end'.format(day=day)
year = 'substr(ltrim(ltrim(birthday,"0123456789"),"/"),instr(ltrim(ltrim(birthday,"0123456789"),"/"),"/")+1)'
iso_date = 'replace(replace(replace("yyyy-mm-dd","yyyy",{year}),"mm",{iso_month}),"dd",{iso_day})'.format(year=year,iso_month=iso_month,iso_day=iso_day)
iso_base = datetime.strptime(base.birthday,'%m/%d/%Y').date().isoformat()

if __name__ == '__main__':

    for item in Person.select().where(pw.SQL(iso_date) > iso_base):
        print item.name , item.birthday
#output
#Bob3 1970-03-13
#Bob4 1980-03-13
#Bob5 1990-03-13
#Bob6 1990-12-01