如何从存储在 gitlab repos 中的文件中提取内容

How can I extract contents from a file stored in gitlab repos

使用 gitlab-python 包,我想从所有 Dockerfile 中提取行。使用下面的代码,我能够将项目名称和 url 获取到我想要的存储库,但我如何才能确保有一个 Dockerfile 并读取 Dockerfile 的内容。

import gitlab
import json
from pprint import pprint
import requests
import urllib.request


# private token authentication
gl = gitlab.Gitlab('<path_to_gitlab_repo>', private_token=<token_here>)

gl.auth()

# list all projects
projects = gl.projects.list()
for project in projects:
    # print(project) # prints all the meta data for the project
    print("Project: ", project.name)
    print("Gitlab URL: ", project.http_url_to_repo)
    # print("Branches: ", project.repo_branches)
    pprint(project.repository_tree(all=True))
    f = urllib.request.urlopen(project.http_url_to_repo)
    myfile = f.read()
    print(myfile)
    print("\n\n")

我现在得到的输出是:

Gitlab URL:  <path_to_gitlab_repo>
[{'id': '0c4a64925f5c129d33557',
  'mode': '1044',
  'name': 'README.md',
  'path': 'README.md',
  'type': 'blob'}]

可以使用project.files.get()方法(see documentation)获取项目的Dockerfile

然后你可以打印 Dockerfile/do 的内容,无论你想用它做什么:

import gitlab
import base64


# private token authentication
gl = gitlab.Gitlab(<gitlab-url>, private_token=<private-token>)

gl.auth()

# list all projects
projects = gl.projects.list(all=True)
for project in projects:
    # print(project) # prints all the meta data for the project
    # print("Project: ", project.name)
    # print("Gitlab URL: ", project.http_url_to_repo)

    # Skip projects without branches
    if len(project.branches.list()) == 0:
        continue

    branch = project.branches.list()[0].name

    try:
        f = project.files.get(file_path='Dockerfile', ref=branch)
    except gitlab.exceptions.GitlabGetError:
        # Skip projects without Dockerfile
        continue

    file_content = base64.b64decode(f.content).decode("utf-8")
    print(file_content.replace('\n', '\n'))

如果有多个分支,您可能需要调整分支名称。