Python - 如何阅读 Sharepoint excel sheet 具体工作sheet
Python - how to read Sharepoint excel sheet specific worksheet
在 Python 中,我正在利用 Office 365 REST Python Client library 访问和阅读包含许多 sheet 的 excel 工作簿。
虽然身份验证成功,我无法将 sheet 名称的正确路径附加到文件名 以访问第一个或第二个作品sheet,这就是为什么 sheet 的 输出不是 JSON,而是我的代码无法处理的 IO 字节。
我的最终目标是简单地通过名称 'employee_list' 访问特定工作 sheet 并将其转换为 JSON 或 Pandas 数据框以供进一步使用。
下面的代码片段 -
import io
import json
import pandas as pd
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.runtime.auth.user_credential import UserCredential
from office365.runtime.http.request_options import RequestOptions
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from io import BytesIO
username = 'abc@a.com'
password = 'abcd'
site_url = 'https://sample.sharepoint.com/sites/SAMPLE/_layouts/15/Doc.aspx?OR=teams&action=edit&sourcedoc={739271873}'
# HOW TO ACCESS WORKSHEET BY ITS NAME IN ABOVE LINE
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))
request = RequestOptions("{0}/_api/web/".format(site_url))
response = ctx.execute_request_direct(request)
json_data = json.loads(response.content) # ERROR ENCOUNTERED JSON DECODE ERROR SINCE DATA IS IN BYTES
可以通过sheet索引访问,查看以下代码....
import xlrd
loc = ("File location")
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
# For row 0 and column 0
print(sheet.cell_value(1, 0))
您可以尝试将组件 'sheetname' 添加到 url 中。
https://site/lib/workbook.xlsx#'Sheet1'!A1
似乎 URL 构造来访问数据是不正确的。您应该在浏览器中测试完整的 URL 是否正常工作,然后修改代码以开始运行。你可以尝试做一些改变,我已经验证了用这个逻辑形成的 URL 会 return JSON data.
import io
import json
import pandas as pd
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.runtime.auth.user_credential import UserCredential
from office365.runtime.http.request_options import RequestOptions
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from io import BytesIO
username = 'abc@a.com'
password = 'abcd'
site_url = 'https://sample.sharepoint.com/_vti_bin/ExcelRest.aspx/RootFolder/ExcelFileName.xlsx/Model/Ranges('employee_list!A1%7CA10')?$format=json'
# Replace RootFolder/ExcelFileName.xlsx with actual path of excel file from the root.
# Replace A1 and A10 with actual start and end of cell range.
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))
request = RequestOptions(site_url)
response = ctx.execute_request_direct(request)
json_data = json.loads(response.content)
我使用的更新 (Office365-REST-Python-Client==2.3.11
) 允许更简单地访问 SharePoint 存储库中的 Excel 文件。
# from original_question import pd,\
# username,\
# password,\
# UserCredential,\
# File,\
# BytesIO
user_credentials = UserCredential(user_name=username,
password=password)
file_url = ('https://sample.sharepoint.com'
'/sites/SAMPLE/{*recursive_folders}'
'/sample_worksheet.xlsx')
## absolute path of excel file on SharePoint
excel_file = BytesIO()
## initiating binary object
excel_file_online = File.from_url(abs_url=file_url)
## requesting file from SharePoint
excel_file_online = excel_file_online.with_credentials(
credentials=user_credentials)
## validating file with accessible credentials
excel_file_online.download(file_object=excel_file).execute_query()
## writing binary response of the
## file request into bytes object
我们现在有一个名为 excel_file
的 Excel 文件的二进制副本 BytesIO
。进步,阅读它作为 pd.DataFrame
是 straight-forward 就像通常存储在本地驱动器中的 Excel 文件一样。例如:
pd.read_excel(excel_file) # -> pd.DataFrame
因此,如果您对 'employee_list'
等特定 sheet 感兴趣,您最好将其阅读为
employee_list = pd.read_excel(excel_file,
sheet_name='employee_list')
# -> pd.DataFrame
或
data = pd.read_excel(excel_file,
sheet_name=None) # -> dict
employee_list = data.get('employee_list')
# -> [pd.DataFrame, None]
我知道你说过你不能使用 BytesIO 对象,但是对于那些像我正在寻找的那样将文件作为 BytesIO 对象读入这里的人,你可以使用 sheet_name
arg pd.read_excel
:
url = "https://sharepoint.site.com/sites/MySite/MySheet.xlsx"
sheet_name = 'Sheet X'
response = File.open_binary(ctx, relative_url)
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0)
df = pd.read_excel(bytes_file_obj, sheet_name = sheet_name) //call sheet name
在 Python 中,我正在利用 Office 365 REST Python Client library 访问和阅读包含许多 sheet 的 excel 工作簿。
虽然身份验证成功,我无法将 sheet 名称的正确路径附加到文件名 以访问第一个或第二个作品sheet,这就是为什么 sheet 的 输出不是 JSON,而是我的代码无法处理的 IO 字节。
我的最终目标是简单地通过名称 'employee_list' 访问特定工作 sheet 并将其转换为 JSON 或 Pandas 数据框以供进一步使用。
下面的代码片段 -
import io
import json
import pandas as pd
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.runtime.auth.user_credential import UserCredential
from office365.runtime.http.request_options import RequestOptions
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from io import BytesIO
username = 'abc@a.com'
password = 'abcd'
site_url = 'https://sample.sharepoint.com/sites/SAMPLE/_layouts/15/Doc.aspx?OR=teams&action=edit&sourcedoc={739271873}'
# HOW TO ACCESS WORKSHEET BY ITS NAME IN ABOVE LINE
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))
request = RequestOptions("{0}/_api/web/".format(site_url))
response = ctx.execute_request_direct(request)
json_data = json.loads(response.content) # ERROR ENCOUNTERED JSON DECODE ERROR SINCE DATA IS IN BYTES
可以通过sheet索引访问,查看以下代码....
import xlrd
loc = ("File location")
wb = xlrd.open_workbook(loc)
sheet = wb.sheet_by_index(0)
# For row 0 and column 0
print(sheet.cell_value(1, 0))
您可以尝试将组件 'sheetname' 添加到 url 中。
https://site/lib/workbook.xlsx#'Sheet1'!A1
似乎 URL 构造来访问数据是不正确的。您应该在浏览器中测试完整的 URL 是否正常工作,然后修改代码以开始运行。你可以尝试做一些改变,我已经验证了用这个逻辑形成的 URL 会 return JSON data.
import io
import json
import pandas as pd
from office365.runtime.auth.authentication_context import AuthenticationContext
from office365.runtime.auth.user_credential import UserCredential
from office365.runtime.http.request_options import RequestOptions
from office365.sharepoint.client_context import ClientContext
from office365.sharepoint.files.file import File
from io import BytesIO
username = 'abc@a.com'
password = 'abcd'
site_url = 'https://sample.sharepoint.com/_vti_bin/ExcelRest.aspx/RootFolder/ExcelFileName.xlsx/Model/Ranges('employee_list!A1%7CA10')?$format=json'
# Replace RootFolder/ExcelFileName.xlsx with actual path of excel file from the root.
# Replace A1 and A10 with actual start and end of cell range.
ctx = ClientContext(site_url).with_credentials(UserCredential(username, password))
request = RequestOptions(site_url)
response = ctx.execute_request_direct(request)
json_data = json.loads(response.content)
我使用的更新 (Office365-REST-Python-Client==2.3.11
) 允许更简单地访问 SharePoint 存储库中的 Excel 文件。
# from original_question import pd,\
# username,\
# password,\
# UserCredential,\
# File,\
# BytesIO
user_credentials = UserCredential(user_name=username,
password=password)
file_url = ('https://sample.sharepoint.com'
'/sites/SAMPLE/{*recursive_folders}'
'/sample_worksheet.xlsx')
## absolute path of excel file on SharePoint
excel_file = BytesIO()
## initiating binary object
excel_file_online = File.from_url(abs_url=file_url)
## requesting file from SharePoint
excel_file_online = excel_file_online.with_credentials(
credentials=user_credentials)
## validating file with accessible credentials
excel_file_online.download(file_object=excel_file).execute_query()
## writing binary response of the
## file request into bytes object
我们现在有一个名为 excel_file
的 Excel 文件的二进制副本 BytesIO
。进步,阅读它作为 pd.DataFrame
是 straight-forward 就像通常存储在本地驱动器中的 Excel 文件一样。例如:
pd.read_excel(excel_file) # -> pd.DataFrame
因此,如果您对 'employee_list'
等特定 sheet 感兴趣,您最好将其阅读为
employee_list = pd.read_excel(excel_file,
sheet_name='employee_list')
# -> pd.DataFrame
或
data = pd.read_excel(excel_file,
sheet_name=None) # -> dict
employee_list = data.get('employee_list')
# -> [pd.DataFrame, None]
我知道你说过你不能使用 BytesIO 对象,但是对于那些像我正在寻找的那样将文件作为 BytesIO 对象读入这里的人,你可以使用 sheet_name
arg pd.read_excel
:
url = "https://sharepoint.site.com/sites/MySite/MySheet.xlsx"
sheet_name = 'Sheet X'
response = File.open_binary(ctx, relative_url)
bytes_file_obj = io.BytesIO()
bytes_file_obj.write(response.content)
bytes_file_obj.seek(0)
df = pd.read_excel(bytes_file_obj, sheet_name = sheet_name) //call sheet name