打开多个 excel 个文件,在每个文件上打开每个 sheet,并保存图像
Open multiple excel files, open every sheet on each file, and save the image
我有多个 excel 文件,里面有图片,图片位于不同的 excel sheet 上。我的 objective 是将图像保存到我的电脑。这些图像稍后将用于人脸识别。
我构建了一些代码来打开 excel 文件并抓取图像。但是它只需要一个 sheet 而不是所有 sheet.
import face_recognition
import pandas as pd
import win32com.client as win32
from PIL import ImageGrab
import os
#Read working directory
print(os.getcwd()) #get current working directory
os.chdir("E:/DATA/Master data") #set working directory
print(os.getcwd())#check updated working directory
#Reading xlsx file in a folder
path1="E:/DATA/Master data"
files= os.listdir(path1)
print(files)
listlength = len(files)
#Extracting data from each xlsx file
for f in files:
excel = win32.gencache.EnsureDispatch('Excel.Application')
count=0
while (count<listlength):
a = files.pop(count)
path_name = path1 + "/" + a
workbook = excel.Workbooks.Open(path_name)
wb_folder = workbook.Path
wb_name = workbook.Name
wb_path = os.path.join(wb_folder, wb_name)
for sheet in workbook.Worksheets:
for i, shape in enumerate(sheet.Shapes):
if shape.Name.startswith('Picture'):
shape.Copy()
image = ImageGrab.grabclipboard()
image.save('{}.jpg'.format(i+1), 'jpeg')
我希望从多个 excel 文件中的每个 sheet 获取所有图像。
变量 i 正在为每个 sheet 重置,因此您的文件名相同,因此文件被覆盖。添加第二个变量,每个 sheet 都会递增,因此文件名也包含该变量。
这已经过测试,可以正常工作,我添加了 excel.Visible 以便您可以看到 sheets 弹出窗口 :) 还可以记录日志,以便您可以看到发生了什么。我没有使用全局计数变量,而是将工作簿名称连接到 sheet 名称,然后使用每个 sheet 图像中的 "n" 变量。
import win32com.client as win32
from PIL import ImageGrab
import os
def ensureDirExists(filePath):
if not os.path.exists(filePath):
os.makedirs(filePath)
def absoluteListDir(directory):
for dirpath,_,filenames in os.walk(directory):
for f in filenames:
yield os.path.abspath(os.path.join(dirpath, f))
dataDirectory = "data"
outputDirectory = "images"
ensureDirExists(dataDirectory)
ensureDirExists(outputDirectory)
excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = True
files = absoluteListDir(dataDirectory)
for file in files:
print("=" * 20)
print("Opening Workbook: ", file)
workbook = excel.Workbooks.Open(file)
for sheet in workbook.Sheets:
print("Scraping Sheet: ", sheet.Name)
for n, shape in enumerate(sheet.Shapes):
if shape.Name.startswith("Picture"):
shape.Copy()
image = ImageGrab.grabclipboard()
outputFile = "{}/{}_{}_{}.jpg".format(outputDirectory, workbook.Name, sheet.Name, n)
print("Saving Image File: ", outputFile)
image.save(outputFile, "jpeg")
print("Closing Workbook")
workbook.Close(True)
我有多个 excel 文件,里面有图片,图片位于不同的 excel sheet 上。我的 objective 是将图像保存到我的电脑。这些图像稍后将用于人脸识别。
我构建了一些代码来打开 excel 文件并抓取图像。但是它只需要一个 sheet 而不是所有 sheet.
import face_recognition
import pandas as pd
import win32com.client as win32
from PIL import ImageGrab
import os
#Read working directory
print(os.getcwd()) #get current working directory
os.chdir("E:/DATA/Master data") #set working directory
print(os.getcwd())#check updated working directory
#Reading xlsx file in a folder
path1="E:/DATA/Master data"
files= os.listdir(path1)
print(files)
listlength = len(files)
#Extracting data from each xlsx file
for f in files:
excel = win32.gencache.EnsureDispatch('Excel.Application')
count=0
while (count<listlength):
a = files.pop(count)
path_name = path1 + "/" + a
workbook = excel.Workbooks.Open(path_name)
wb_folder = workbook.Path
wb_name = workbook.Name
wb_path = os.path.join(wb_folder, wb_name)
for sheet in workbook.Worksheets:
for i, shape in enumerate(sheet.Shapes):
if shape.Name.startswith('Picture'):
shape.Copy()
image = ImageGrab.grabclipboard()
image.save('{}.jpg'.format(i+1), 'jpeg')
我希望从多个 excel 文件中的每个 sheet 获取所有图像。
变量 i 正在为每个 sheet 重置,因此您的文件名相同,因此文件被覆盖。添加第二个变量,每个 sheet 都会递增,因此文件名也包含该变量。
这已经过测试,可以正常工作,我添加了 excel.Visible 以便您可以看到 sheets 弹出窗口 :) 还可以记录日志,以便您可以看到发生了什么。我没有使用全局计数变量,而是将工作簿名称连接到 sheet 名称,然后使用每个 sheet 图像中的 "n" 变量。
import win32com.client as win32
from PIL import ImageGrab
import os
def ensureDirExists(filePath):
if not os.path.exists(filePath):
os.makedirs(filePath)
def absoluteListDir(directory):
for dirpath,_,filenames in os.walk(directory):
for f in filenames:
yield os.path.abspath(os.path.join(dirpath, f))
dataDirectory = "data"
outputDirectory = "images"
ensureDirExists(dataDirectory)
ensureDirExists(outputDirectory)
excel = win32.gencache.EnsureDispatch('Excel.Application')
excel.Visible = True
files = absoluteListDir(dataDirectory)
for file in files:
print("=" * 20)
print("Opening Workbook: ", file)
workbook = excel.Workbooks.Open(file)
for sheet in workbook.Sheets:
print("Scraping Sheet: ", sheet.Name)
for n, shape in enumerate(sheet.Shapes):
if shape.Name.startswith("Picture"):
shape.Copy()
image = ImageGrab.grabclipboard()
outputFile = "{}/{}_{}_{}.jpg".format(outputDirectory, workbook.Name, sheet.Name, n)
print("Saving Image File: ", outputFile)
image.save(outputFile, "jpeg")
print("Closing Workbook")
workbook.Close(True)