如何使用 Python 将包含图像的文件夹转换为 Excel 文件
How to convert a folder with images into an Excel File using Python
我有一个文件夹,里面装满了以相同方式命名的图片。
文件名:
..\name_ID。
我想创建一个电子表格,然后将名称、ID 和 link 的图片放入单独的列中。
应该使用 openpyxl、xlsxwriter 还是其他什么?
我没有使用 openpyxl 或 xlsxwriter 的经验,但如果我查看 openpyxl 的文档,我想该程序将是这样的
from openpyxl import Workbook
from openpyxl.styles import PatternFill
from scipy.misc import imread
wb = Workboo()
ws = wb.active
img = imread('image.jpg', mode='RGB')
for i in range(len(img)):
for j in range(len(img[0])):
# TODO a method to set turn (3, 1) into 'D2'
index = excel_coordinate(i, j)
# TODO a method to change RGB in a hex value, perhaps imread also support hex, not sure
hexval = RGB2hex(img[i][j])
cel = ws[index]
cel.PatternFill("Solid", fgColor=hexval)
我提供的答案展示了如何使用 xlsxwriter 实现这一点。它创建了一个电子表格,其中包含名称和 ID 以及一个 link 到三个单独的列中的关联图片。
答案使用urllib.request以便它可以重现(这个模块不是必需的,我只是把它放在那里下载三个测试图像)。我还将目录设置为当前目录,您可以根据需要进行修改。另外,在我的回答中,我将其设置为仅查找 .png 文件,但您也可以调整以查找其他文件格式。
import urllib.request
import xlsxwriter
import os
#comment out the next 4 lines if you don't want to download 3 pictures
url = 'https://upload.wikimedia.org/wikipedia/en/thumb/4/43/Ipswich_Town.svg/255px-Ipswich_Town.svg.png'
urllib.request.urlretrieve(url, "pica_1.png")
urllib.request.urlretrieve(url, "picb_2.png")
urllib.request.urlretrieve(url, "picc_3.png")
dir_wanted = os.getcwd()
#uncomment the following line if you don't want the current directory
#dir_wanted = "C:\users\doe_j"
file_list = [file for file in os.listdir(dir_wanted) if file.endswith('.png')]
full_path_list = [dir_wanted + '\' + file for file in file_list]
name_list = []
num_list = []
for item in file_list:
temp_list = item.rpartition('_')
name = str(temp_list[0])
num = str(temp_list[2].rpartition('.')[0])
name_list.append(name)
num_list.append(num)
workbook = xlsxwriter.Workbook('pics_and_links.xlsx')
ws = workbook.add_worksheet('Links')
#adding column titles and making them bold
bold = workbook.add_format({'bold': True})
ws.write('A1', "Name", bold)
ws.write('B1', "Number", bold)
ws.write('C1', "Link", bold)
#putting the three lists we made into the workbook
for i in range (0, len(full_path_list)):
row_num = i + 2
ws.write('A%d' % row_num, name_list[i])
ws.write('B%d' % row_num, int(num_list[i]))
ws.write_url('C%d' % row_num, full_path_list[i])
#Set the width of the column with the links in it
ws.set_column(2, 2, 40)
workbook.close()
您可以使用 pandas 包来完成:
import glob
import os
import pandas as pd
files_dir = '/home/username/files_dir' # here should be path to your directory with images
files = glob.glob(os.path.join(files_dir, '*'))
df = pd.DataFrame(columns=['name', 'id', 'hyperlink'])
for i, full_filename in enumerate(files):
filename = os.path.basename(full_filename)
name, id_ = filename.split('_')
id_ = os.path.splitext(id_)[0] # remove file extension from id_
hyperlink = '=HYPERLINK("file:///{}")'.format(full_filename)
df.loc[i] = [name, id_, hyperlink]
df.to_excel('output_file.xlsx', index=False)
我有一个文件夹,里面装满了以相同方式命名的图片。
文件名: ..\name_ID。
我想创建一个电子表格,然后将名称、ID 和 link 的图片放入单独的列中。
应该使用 openpyxl、xlsxwriter 还是其他什么?
我没有使用 openpyxl 或 xlsxwriter 的经验,但如果我查看 openpyxl 的文档,我想该程序将是这样的
from openpyxl import Workbook
from openpyxl.styles import PatternFill
from scipy.misc import imread
wb = Workboo()
ws = wb.active
img = imread('image.jpg', mode='RGB')
for i in range(len(img)):
for j in range(len(img[0])):
# TODO a method to set turn (3, 1) into 'D2'
index = excel_coordinate(i, j)
# TODO a method to change RGB in a hex value, perhaps imread also support hex, not sure
hexval = RGB2hex(img[i][j])
cel = ws[index]
cel.PatternFill("Solid", fgColor=hexval)
我提供的答案展示了如何使用 xlsxwriter 实现这一点。它创建了一个电子表格,其中包含名称和 ID 以及一个 link 到三个单独的列中的关联图片。
答案使用urllib.request以便它可以重现(这个模块不是必需的,我只是把它放在那里下载三个测试图像)。我还将目录设置为当前目录,您可以根据需要进行修改。另外,在我的回答中,我将其设置为仅查找 .png 文件,但您也可以调整以查找其他文件格式。
import urllib.request
import xlsxwriter
import os
#comment out the next 4 lines if you don't want to download 3 pictures
url = 'https://upload.wikimedia.org/wikipedia/en/thumb/4/43/Ipswich_Town.svg/255px-Ipswich_Town.svg.png'
urllib.request.urlretrieve(url, "pica_1.png")
urllib.request.urlretrieve(url, "picb_2.png")
urllib.request.urlretrieve(url, "picc_3.png")
dir_wanted = os.getcwd()
#uncomment the following line if you don't want the current directory
#dir_wanted = "C:\users\doe_j"
file_list = [file for file in os.listdir(dir_wanted) if file.endswith('.png')]
full_path_list = [dir_wanted + '\' + file for file in file_list]
name_list = []
num_list = []
for item in file_list:
temp_list = item.rpartition('_')
name = str(temp_list[0])
num = str(temp_list[2].rpartition('.')[0])
name_list.append(name)
num_list.append(num)
workbook = xlsxwriter.Workbook('pics_and_links.xlsx')
ws = workbook.add_worksheet('Links')
#adding column titles and making them bold
bold = workbook.add_format({'bold': True})
ws.write('A1', "Name", bold)
ws.write('B1', "Number", bold)
ws.write('C1', "Link", bold)
#putting the three lists we made into the workbook
for i in range (0, len(full_path_list)):
row_num = i + 2
ws.write('A%d' % row_num, name_list[i])
ws.write('B%d' % row_num, int(num_list[i]))
ws.write_url('C%d' % row_num, full_path_list[i])
#Set the width of the column with the links in it
ws.set_column(2, 2, 40)
workbook.close()
您可以使用 pandas 包来完成:
import glob
import os
import pandas as pd
files_dir = '/home/username/files_dir' # here should be path to your directory with images
files = glob.glob(os.path.join(files_dir, '*'))
df = pd.DataFrame(columns=['name', 'id', 'hyperlink'])
for i, full_filename in enumerate(files):
filename = os.path.basename(full_filename)
name, id_ = filename.split('_')
id_ = os.path.splitext(id_)[0] # remove file extension from id_
hyperlink = '=HYPERLINK("file:///{}")'.format(full_filename)
df.loc[i] = [name, id_, hyperlink]
df.to_excel('output_file.xlsx', index=False)