如何使用 python 从 Google 电子表格中获取工作表 ID？

Question

我想确定一种方法来获取 URL 中每个工作表在 Google 电子表格工作簿中的工作表 ID。例如，this workbook is '1244369280' , since it's url is https://docs.google.com/spreadsheets/d/1yd8qTYjRns4_OT8PbsZzH0zajvzguKS79dq6j--hnTs/edit#gid=1244369280

的 'sheet2' 的工作表 ID

我发现的一种方法是提取 Google 电子表格的 XML，因为根据 this question，获取工作表 ID 的唯一方法是向下流工作表的 XML，但示例在 Javascript 中，我需要在 Python

中执行此操作

这是我想在 Python 中执行的 Javascript 代码：

  Dim worksheetFeed As WorksheetFeed
  Dim query As WorksheetQuery
  Dim worksheet As WorksheetEntry
  Dim output As New MemoryStream
  Dim xml As String
  Dim gid As String = String.Empty

  Try
    _service = New Spreadsheets.SpreadsheetsService("ServiceName")
    _service.setUserCredentials(UserId, Password)
    query = New WorksheetQuery(feedUrl)
    worksheetFeed = _service.Query(query)
    worksheet = worksheetFeed.Entries(0)

    ' Save worksheet feed to memory stream so we can 
    ' get the xml returned from the feed url and look for
    ' the gid.  Gid allows us to download the specific worksheet tab
    Using output
      worksheet.SaveToXml(output)
    End Using

    xml = Encoding.ASCII.GetString(output.ToArray())

似乎从 Google 电子表格获取 XML 的最佳方法是使用 Gdata，因此我下载了 GData 并尝试 the Google Spreadsheet example 使用我的凭据。

见下文

#!/usr/bin/python
#
# Copyright (C) 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


__author__ = 'api.laurabeth@gmail.com (Laura Beth Lincoln)'


try:
  from xml.etree import ElementTree
except ImportError:
  from elementtree import ElementTree
import gdata.spreadsheet.service
import gdata.service
import atom.service
import gdata.spreadsheet
import atom
import getopt
import sys
import string


class SimpleCRUD:

  def __init__(self, email, password):
    self.gd_client = gdata.spreadsheet.service.SpreadsheetsService()
    self.gd_client.email = 'chris@curalate.com'
    self.gd_client.password = 'jkjkdioerzumawya'
    self.gd_client.source = 'Spreadsheets GData Sample'
    self.gd_client.ProgrammaticLogin()
    self.curr_key = ''
    self.curr_wksht_id = ''
    self.list_feed = None

  def _PromptForSpreadsheet(self):
    # Get the list of spreadsheets
    feed = self.gd_client.GetSpreadsheetsFeed()
    self._PrintFeed(feed)
    input = raw_input('\nSelection: ')
    id_parts = feed.entry[string.atoi(input)].id.text.split('/')
    self.curr_key = id_parts[len(id_parts) - 1]

  def _PromptForWorksheet(self):
    # Get the list of worksheets
    feed = self.gd_client.GetWorksheetsFeed(self.curr_key)
    self._PrintFeed(feed)
    input = raw_input('\nSelection: ')
    id_parts = feed.entry[string.atoi(input)].id.text.split('/')
    self.curr_wksht_id = id_parts[len(id_parts) - 1]

  def _PromptForCellsAction(self):
    print ('dump\n'
           'update {row} {col} {input_value}\n'
           '\n')
    input = raw_input('Command: ')
    command = input.split(' ', 1)
    if command[0] == 'dump':
      self._CellsGetAction()
    elif command[0] == 'update':
      parsed = command[1].split(' ', 2)
      if len(parsed) == 3:
        self._CellsUpdateAction(parsed[0], parsed[1], parsed[2])
      else:
        self._CellsUpdateAction(parsed[0], parsed[1], '')
    else:
      self._InvalidCommandError(input)

  def _PromptForListAction(self):
    print ('dump\n'
           'insert {row_data} (example: insert label=content)\n'
           'update {row_index} {row_data}\n'
           'delete {row_index}\n'
           'Note: No uppercase letters in column names!\n'
           '\n')
    input = raw_input('Command: ')
    command = input.split(' ' , 1)
    if command[0] == 'dump':
      self._ListGetAction()
    elif command[0] == 'insert':
      self._ListInsertAction(command[1])
    elif command[0] == 'update':
      parsed = command[1].split(' ', 1)
      self._ListUpdateAction(parsed[0], parsed[1])
    elif command[0] == 'delete':
      self._ListDeleteAction(command[1])
    else:
      self._InvalidCommandError(input)

  def _CellsGetAction(self):
    # Get the feed of cells
    feed = self.gd_client.GetCellsFeed(self.curr_key, self.curr_wksht_id)
    self._PrintFeed(feed)

  def _CellsUpdateAction(self, row, col, inputValue):
    entry = self.gd_client.UpdateCell(row=row, col=col, inputValue=inputValue, 
        key=self.curr_key, wksht_id=self.curr_wksht_id)
    if isinstance(entry, gdata.spreadsheet.SpreadsheetsCell):
      print 'Updated!'

  def _ListGetAction(self):
    # Get the list feed
    self.list_feed = self.gd_client.GetListFeed(self.curr_key, self.curr_wksht_id)
    self._PrintFeed(self.list_feed)

  def _ListInsertAction(self, row_data):
    entry = self.gd_client.InsertRow(self._StringToDictionary(row_data), 
        self.curr_key, self.curr_wksht_id)
    if isinstance(entry, gdata.spreadsheet.SpreadsheetsList):
      print 'Inserted!'

  def _ListUpdateAction(self, index, row_data):
    self.list_feed = self.gd_client.GetListFeed(self.curr_key, self.curr_wksht_id)
    entry = self.gd_client.UpdateRow(
        self.list_feed.entry[string.atoi(index)], 
        self._StringToDictionary(row_data))
    if isinstance(entry, gdata.spreadsheet.SpreadsheetsList):
      print 'Updated!'

  def _ListDeleteAction(self, index):
    self.list_feed = self.gd_client.GetListFeed(self.curr_key, self.curr_wksht_id)
    self.gd_client.DeleteRow(self.list_feed.entry[string.atoi(index)])
    print 'Deleted!'

  def _StringToDictionary(self, row_data):
    dict = {}
    for param in row_data.split():
      temp = param.split('=')
      dict[temp[0]] = temp[1]
    return dict

  def _PrintFeed(self, feed):
    for i, entry in enumerate(feed.entry):
      if isinstance(feed, gdata.spreadsheet.SpreadsheetsCellsFeed):
        print '%s %s\n' % (entry.title.text, entry.content.text)
      elif isinstance(feed, gdata.spreadsheet.SpreadsheetsListFeed):
        print '%s %s %s' % (i, entry.title.text, entry.content.text)
        # Print this row's value for each column (the custom dictionary is
        # built using the gsx: elements in the entry.)
        print 'Contents:'
        for key in entry.custom:  
          print '  %s: %s' % (key, entry.custom[key].text) 
        print '\n',
      else:
        print '%s %s\n' % (i, entry.title.text)

  def _InvalidCommandError(self, input):
    print 'Invalid input: %s\n' % (input)

  def Run(self):
    self._PromptForSpreadsheet()
    self._PromptForWorksheet()
    input = raw_input('cells or list? ')
    if input == 'cells':
      while True:
        self._PromptForCellsAction()
    elif input == 'list':
      while True:
        self._PromptForListAction()


def main():
  # parse command line options
  try:
    opts, args = getopt.getopt(sys.argv[1:], "", ["user=", "pw="])
  except getopt.error, msg:
    print 'python spreadsheetExample.py --user [username] --pw [password] '
    sys.exit(2)

  user = 'fake@gmail.com'
  pw = 'fakepassword'
  key = ''
  # Process options
  for o, a in opts:
    if o == "--user":
      user = a
    elif o == "--pw":
      pw = a

  if user == '' or pw == '':
    print 'python spreadsheetExample.py --user [username] --pw [password] '
    sys.exit(2)

  sample = SimpleCRUD(user, pw)
  sample.Run()


if __name__ == '__main__':
  main()

然而这个returns出现以下错误：

Traceback (most recent call last):
  File "/Users/Chris/Desktop/gdata_test.py", line 200, in <module>
    main()
  File "/Users/Chris/Desktop/gdata_test.py", line 196, in main
    sample.Run()
  File "/Users/Chris/Desktop/gdata_test.py", line 162, in Run
    self._PromptForSpreadsheet()
  File "/Users/Chris/Desktop/gdata_test.py", line 49, in _PromptForSpreadsheet
    feed = self.gd_client.GetSpreadsheetsFeed()
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/gdata/spreadsheet/service.py", line 99, in GetSpreadsheetsFeed
    converter=gdata.spreadsheet.SpreadsheetsSpreadsheetsFeedFromString)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/gdata/service.py", line 1074, in Get
    return converter(result_body)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/gdata/spreadsheet/__init__.py", line 395, in SpreadsheetsSpreadsheetsFeedFromString
    xml_string)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/atom/__init__.py", line 93, in optional_warn_function
    return f(*args, **kwargs)
  File "/Library/Frameworks/Python.framework/Versions/2.7/lib/python2.7/site-packages/atom/__init__.py", line 127, in CreateClassFromXMLString
    tree = ElementTree.fromstring(xml_string)
  File "<string>", line 125, in XML
cElementTree.ParseError: no element found: line 1, column 0
[Finished in 0.3s with exit code 1]
[shell_cmd: python -u "/Users/Chris/Desktop/gdata_test.py"]
[dir: /Users/Chris/Desktop]
[path: /usr/bin:/bin:/usr/sbin:/sbin]

我还应该提到我一直在使用 Gspread 作为与 Google 电子表格交互的方法，但是当我运行下面的代码时，我得到了 gid，但我需要有工作表ID。

gc = gspread.authorize(credentials)
sh = gc.open_by_url('google_spreadsheet_url')
sh.get_id_fields() 
>> {'spreadsheet_id': '1BgCEn-3Nor7UxOEPwD-qv8qXe7CaveJBrn9_Lcpo4W4','worksheet_id': 'oqitk0d'}

Answer 1

请参阅 self.gd_client.ProgrammaticLogin() 调用 - 这是导致主要问题的原因，因为它使用了 "ClientLogin" 授权方法，该方法首先被弃用，后来被弃用 removed on April 20, 2015。

我实际上会研究更新鲜和积极开发的 gspread 模块。

这是一个有点疯狂的示例，演示了如何为给定的电子表格和工作表名称提取实际 "gid" 值。请注意，您首先需要 generate the JSON file with the OAuth credentials（我假设您已经这样做了）。

代码（添加了注释，希望有助于理解它）：

import urlparse
import xml.etree.ElementTree as ET

import gspread
from oauth2client.service_account import ServiceAccountCredentials

SPREADSHEET_NAME = 'My Test Spreadsheet'
WORKSHEET_NAME = "Sheet2"

PATH_TO_JSON_KEYFILE = '/path/to/json/key/file.json'
NAMESPACES = {'ns0': 'http://www.w3.org/2005/Atom'}
SCOPES = ['https://spreadsheets.google.com/feeds']

# log in
credentials = ServiceAccountCredentials.from_json_keyfile_name(PATH_TO_JSON_KEYFILE, SCOPES)
gss_client = gspread.authorize(credentials)

# open spreadsheet
gss = gss_client.open(SPREADSHEET_NAME)

# extract the full feed url
root = gss._feed_entry
full_feed_url = next(elm.attrib["href"] for elm in root.findall("ns0:link", namespaces=NAMESPACES) if "full" in elm.attrib["href"])

# get the feed and extract the gid value for a given sheet name
response = gss_client.session.get(full_feed_url)
root = ET.fromstring(response.content)
sheet_entry = next(elm for elm in root.findall("ns0:entry", namespaces=NAMESPACES)
                   if elm.find("ns0:title", namespaces=NAMESPACES).text == WORKSHEET_NAME)
link = next(elm.attrib["href"] for elm in sheet_entry.findall("ns0:link", namespaces=NAMESPACES)
            if "gid=" in elm.attrib["href"])

# extract "gid" from URL
gid = urlparse.parse_qs(urlparse.urlparse(link).query)["gid"][0]
print(gid)

看起来还有一种方法可以将工作表 ID 转换为 gid 值，请参阅：

How to convert Google spreadsheet's worksheet string id to integer index (GID)?

Answer 2

2017 年 1 月

您可以使用新的 google 电子表格 api v4。您可以查看使用 api v4 的 pygsheets 库。

import pygsheets

#authorize the pygsheets
gc = pygsheets.authorize()

#open the spreadsheet
sh = gc.open('my new ssheet')

# get the worksheet and its id    
print sh.worksheet_by_title("my test sheet").id

如何使用 python 从 Google 电子表格中获取工作表 ID？

How to get the Worksheet ID from a Google Spreadsheet with python?

python

xml

gdata

gspread