根据列表创建多个 csv
create multiple csv based on lists
我有一个文件夹,其中有 4 个 csvs。 csvs 包含三列:标题、作者、ISBN
我想要做的是为文件中的每个 isbn 创建一个带有 api link 的新 csv。最后,我应该有四个csv,稍后会用到。到目前为止,这是我的代码:
import glob
import pandas as pd
from urllib.request import urlopen
#import generated csvs from other script
safepath='.' #currently the same path as the origin, should be /extract/isbn, wip
filelist=glob.glob('./extract/Reihe A/Reihe*_extract.csv',recursive=True) #there are currently 4 files in the folder
print(filelist)
for file in filelist:
#read csv, make a list of all isbns
data=pd.read_csv(file, sep="\t", encoding='utf8')
print(file)
isbnlist=[]
print (isbnlist)
for row in data['ISBN']:
isbnlist.append(row)
#for each isbn in list, get data from api
apisearch=[]
for isbn in isbnlist:
url = 'http://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D' + isbn + '&maximumRecords=10&recordSchema=marcxml'
print(url)
apisearch=[]
for isbn in isbnlist:
url = 'http://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D' + isbn + '&maximumRecords=10&recordSchema=marcxml'
for column in url:
apisearch.append(url)
#create new csv with data from api
urllinks=pd.DataFrame(apisearch)
urllinks.to_csv(str(safepath) +"/"+ file +"_" +"isbn.csv", sep='\t', encoding='utf8')
我现在面临的问题是所有行都被推送到一个 csv,这不是我想要的。
我需要更改什么才能单独处理每个文件并为每个源文件创建一个新的 csv?
感谢任何帮助
编辑:link 到文件,以防有人想尝试重现 csv 创建:sync.com
不需要使用pandas。这可以通过一些字符串操作来完成。
def add_urls(filename_in: str, filename_out: str):
with open(filename_in, encoding="utf-8") as infile:
# OS-agnostic split on newline
data = infile.read().splitlines()
with open(filename_out, "w", encoding="utf-8") as outfile:
# grab header (by removing it from data) and add URL column (and a newline)
outfile.write(f"{data.pop(0)}\tURL\n")
for row in data:
# last element is ISBN
isbn = row.split("\t")[-1]
# append tab, URL and newline
row += f"\thttp://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D{isbn}&maximumRecords=10&recordSchema=marcxml\n"
outfile.write(row)
# iterate over files
for filename in glob.glob('./Reihe A/Reihe*_extract.csv', recursive=True):
filename_out = f"{filename[:-4]}_ISBN.csv"
add_urls(filename, filename_out)
我有一个文件夹,其中有 4 个 csvs。 csvs 包含三列:标题、作者、ISBN
我想要做的是为文件中的每个 isbn 创建一个带有 api link 的新 csv。最后,我应该有四个csv,稍后会用到。到目前为止,这是我的代码:
import glob
import pandas as pd
from urllib.request import urlopen
#import generated csvs from other script
safepath='.' #currently the same path as the origin, should be /extract/isbn, wip
filelist=glob.glob('./extract/Reihe A/Reihe*_extract.csv',recursive=True) #there are currently 4 files in the folder
print(filelist)
for file in filelist:
#read csv, make a list of all isbns
data=pd.read_csv(file, sep="\t", encoding='utf8')
print(file)
isbnlist=[]
print (isbnlist)
for row in data['ISBN']:
isbnlist.append(row)
#for each isbn in list, get data from api
apisearch=[]
for isbn in isbnlist:
url = 'http://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D' + isbn + '&maximumRecords=10&recordSchema=marcxml'
print(url)
apisearch=[]
for isbn in isbnlist:
url = 'http://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D' + isbn + '&maximumRecords=10&recordSchema=marcxml'
for column in url:
apisearch.append(url)
#create new csv with data from api
urllinks=pd.DataFrame(apisearch)
urllinks.to_csv(str(safepath) +"/"+ file +"_" +"isbn.csv", sep='\t', encoding='utf8')
我现在面临的问题是所有行都被推送到一个 csv,这不是我想要的。
我需要更改什么才能单独处理每个文件并为每个源文件创建一个新的 csv?
感谢任何帮助
编辑:link 到文件,以防有人想尝试重现 csv 创建:sync.com
不需要使用pandas。这可以通过一些字符串操作来完成。
def add_urls(filename_in: str, filename_out: str):
with open(filename_in, encoding="utf-8") as infile:
# OS-agnostic split on newline
data = infile.read().splitlines()
with open(filename_out, "w", encoding="utf-8") as outfile:
# grab header (by removing it from data) and add URL column (and a newline)
outfile.write(f"{data.pop(0)}\tURL\n")
for row in data:
# last element is ISBN
isbn = row.split("\t")[-1]
# append tab, URL and newline
row += f"\thttp://sru.k10plus.de/gvk!rec=1?version=1.1&operation=searchRetrieve&query=pica.isb%3D{isbn}&maximumRecords=10&recordSchema=marcxml\n"
outfile.write(row)
# iterate over files
for filename in glob.glob('./Reihe A/Reihe*_extract.csv', recursive=True):
filename_out = f"{filename[:-4]}_ISBN.csv"
add_urls(filename, filename_out)