Writer.add_document() 函数错误 Whoosh - mysql 循环
Writer.add_document() function error Whoosh - mysql loop
我正在尝试从以 latin1 编码的数据库中索引大量文章。我已经解决了字符集的编码问题,但我无法将每一行添加到索引中。
我试过:
1)
writer.add_document(Id = unicode(row["Id"]),Body = unicode(row["Body"]), Name = unicode(row["Name"]), Brand = unicode(row["Brand"]), Familia = unicode(row["Familia"]))
这为文档编制了索引,但不考虑索引标签。
2)
writer.add_document(doc)
此报告 add_document() 正好接受 1 个参数(给定 2 个)错误
这里是完整代码:
# Open a writer for the index
with ix.writer() as writer:
con= mdb.connect(host="myhost",
user="myuser",
passwd="pass",
db="db",
charset="utf8",
use_unicode=True)
with con:
cur = con.cursor(mdb.cursors.DictCursor)
#cur.execute("SELECT Id, Body, Name, Brand, Familia FROM articles")
rows = cur.fetchall()
for row in rows:
print row
doc6 = row["Brand"]
doc2 = row["Name"]
print doc2
print 'body'
doc3 = row["Body"].replace("á", "a")
doc3 = doc3.replace("é", "e")
doc3 = doc3.replace("í", "i")
doc3 = doc3.replace("ó", "o")
doc3 = doc3.replace("ú", "u")
doc3 = doc3.replace("ñ", "n")
doc3 = doc3.replace(""", "")
print doc3
print 'familia'
doc4 = row["Familia"]
print doc4
print 'id'
doc5 = row["Id"]
print doc5
writer.add_document(Id = unicode(row["Id"]),Body = unicode(row["Body"]), Name = unicode(row["Name"]), Brand = unicode(row["Brand"]), Familia = unicode(row["Familia"]))
#
# doc = unicode(doc5),unicode(doc3), unicode(doc2), unicode(doc6), unicode(doc4)
# writer.add_document(doc) #reports add_document() takes exactly 1 argument (2 given) Error
#writer.add_document(Id = unicode(doc5),Body = unicode(doc3), Name = unicode(doc2), Brand = unicode(doc6), Familia = unicode(doc4))
numdocs = ix.doc_count_all()
print "docs indexed =", numdocs
提前谢谢大家!
这样解决的:
with con:
cur = con.cursor(mdb.cursors.DictCursor)
#cur.execute("SELECT Id, Body, Name, Brand, Familia FROM articles")
rows = cur.fetchall()
for row in rows:
#print row
row["Body"]= row["Body"].replace("á", "a")
row["Body"]= row["Body"].replace("é", "e")
row["Body"]= row["Body"].replace("í", "i")
row["Body"]= row["Body"].replace("ó", "o")
row["Body"]= row["Body"].replace("ú", "u")
row["Body"]= row["Body"].replace("ñ", "n")
row["Body"]= row["Body"].replace(""", "")
writer.add_document(Id=unicode(row["Id"]),
Body=unicode(row["Body"]),
Name=unicode(row["Name"]),
Brand=unicode(row["Brand"]),
Familia=unicode(row["Familia"]),
Relevancia=row["Relevancia"])
numdocs = ix.doc_count_all()
print "docs indexed =", numdocs
特别感谢Whoosh团队耐心耐心的解答了我所有的疑惑
我正在尝试从以 latin1 编码的数据库中索引大量文章。我已经解决了字符集的编码问题,但我无法将每一行添加到索引中。
我试过: 1)
writer.add_document(Id = unicode(row["Id"]),Body = unicode(row["Body"]), Name = unicode(row["Name"]), Brand = unicode(row["Brand"]), Familia = unicode(row["Familia"]))
这为文档编制了索引,但不考虑索引标签。
2)
writer.add_document(doc)
此报告 add_document() 正好接受 1 个参数(给定 2 个)错误
这里是完整代码:
# Open a writer for the index
with ix.writer() as writer:
con= mdb.connect(host="myhost",
user="myuser",
passwd="pass",
db="db",
charset="utf8",
use_unicode=True)
with con:
cur = con.cursor(mdb.cursors.DictCursor)
#cur.execute("SELECT Id, Body, Name, Brand, Familia FROM articles")
rows = cur.fetchall()
for row in rows:
print row
doc6 = row["Brand"]
doc2 = row["Name"]
print doc2
print 'body'
doc3 = row["Body"].replace("á", "a")
doc3 = doc3.replace("é", "e")
doc3 = doc3.replace("í", "i")
doc3 = doc3.replace("ó", "o")
doc3 = doc3.replace("ú", "u")
doc3 = doc3.replace("ñ", "n")
doc3 = doc3.replace(""", "")
print doc3
print 'familia'
doc4 = row["Familia"]
print doc4
print 'id'
doc5 = row["Id"]
print doc5
writer.add_document(Id = unicode(row["Id"]),Body = unicode(row["Body"]), Name = unicode(row["Name"]), Brand = unicode(row["Brand"]), Familia = unicode(row["Familia"]))
#
# doc = unicode(doc5),unicode(doc3), unicode(doc2), unicode(doc6), unicode(doc4)
# writer.add_document(doc) #reports add_document() takes exactly 1 argument (2 given) Error
#writer.add_document(Id = unicode(doc5),Body = unicode(doc3), Name = unicode(doc2), Brand = unicode(doc6), Familia = unicode(doc4))
numdocs = ix.doc_count_all()
print "docs indexed =", numdocs
提前谢谢大家!
这样解决的:
with con:
cur = con.cursor(mdb.cursors.DictCursor)
#cur.execute("SELECT Id, Body, Name, Brand, Familia FROM articles")
rows = cur.fetchall()
for row in rows:
#print row
row["Body"]= row["Body"].replace("á", "a")
row["Body"]= row["Body"].replace("é", "e")
row["Body"]= row["Body"].replace("í", "i")
row["Body"]= row["Body"].replace("ó", "o")
row["Body"]= row["Body"].replace("ú", "u")
row["Body"]= row["Body"].replace("ñ", "n")
row["Body"]= row["Body"].replace(""", "")
writer.add_document(Id=unicode(row["Id"]),
Body=unicode(row["Body"]),
Name=unicode(row["Name"]),
Brand=unicode(row["Brand"]),
Familia=unicode(row["Familia"]),
Relevancia=row["Relevancia"])
numdocs = ix.doc_count_all()
print "docs indexed =", numdocs
特别感谢Whoosh团队耐心耐心的解答了我所有的疑惑