背景
可能你会遇到项目需要把你的项目组中所有的项目拉去到本地维护,如果项目不多一个一个手动的 git clone 还好,那如果项目有几百个呢?一个个 clone 本地可能就到下班时间了,第二天需要把这几百个项目在做 git pull 拉新代码,是不是很绝望?!
所以一个脚本辅助能帮到我们很多。
实现方式
第一步:现将组下所有项目输出到Excel文件中
#!/usr/bin/python3 # -*- coding: UTF-8 -*- import os import sys import requests import time import functools from requests_pkcs12 import Pkcs12Adapter from openpyxl import Workbook from openpyxl.styles import NamedStyle, Font, Alignment, PatternFill, colors # 指定双向认证网站的pfx证书文件,并附带证书的密码 session = requests.session() BASE_HOST = 'https://gitlab.co-xxx.com' HEADERS = {'PRIVATE-TOKEN': 'xxxxxx'} # gitlab私人token session.mount(BASE_HOST, Pkcs12Adapter(pkcs12_filename='cert/client-xxxxxx.p12', pkcs12_password='xxxxxx')) # 带有证书的方式访问公司内部项目 # 导出excel _root_file_path = os.path.abspath(".") _py_filename = os.path.basename(sys.argv[0]).split(".")[0] wb_result = Workbook() sheet = wb_result.active def time_me(info='耗时'): def _time_me(fn): @functools.wraps(fn) def _wrapper(*args, **kwargs): start = time.perf_counter() fn(*args, **kwargs) print('{} {} {}'.format(fn.__name__, info, int(time.perf_counter() - start)), ' 秒') return _wrapper return _time_me def result_excel_init(): highlight = NamedStyle(name="highlight") highlight.font = Font(name='Arial', size=13, color=colors.BLACK, bold=True) highlight.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True) highlight.fill = PatternFill("solid", fgColor="ACB9C9") sheet.append(['关键字', '项目名称', '文件数', '文件列表', '分支', '最后活动时间', '链接']) sheet.row_dimensions[1].height = 25 sheet.column_dimensions['A'].width = 30 sheet.column_dimensions['B'].width = 35 sheet.column_dimensions['C'].width = 10 sheet.column_dimensions['D'].width = 200 sheet.column_dimensions['E'].width = 10 sheet.column_dimensions['F'].width = 25 sheet.column_dimensions['G'].width = 120 for cell in list(sheet.rows)[0]: cell.style = highlight def result_excel_end(): style = NamedStyle(name="style") v_center = Alignment(vertical='center', wrap_text=True) hv_center = Alignment(horizontal='center', vertical='center', wrap_text=True) hv_list = [1, 3, 5, 6] for i, row in enumerate(list(sheet.rows)): if i == 0: continue for cell in row: if cell.column in hv_list: style.alignment = hv_center else: style.alignment = v_center cell.style = style def list_all_projects(): _projects = [] page = 1 while True: rsp = session.get( BASE_HOST + '/api/v4/groups/' + group_id + '/projects', headers=HEADERS, params={'simple': True, 'per_page': 100, 'page': page} ) projects_page = rsp.json() if len(projects_page) == 0: break else: _projects.extend(projects_page) page = page + 1 return _projects def result_excel_init(): highlight = NamedStyle(name="highlight") highlight.font = Font(name='Arial', size=13, color=colors.BLACK, bold=True) highlight.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True) highlight.fill = PatternFill("solid", fgColor="ACB9C9") sheet.append(['id', 'name', 'path_with_namespace', 'description', 'ssh_url_to_repo', 'default_branch', 'created_at', 'last_activity_at']) sheet.row_dimensions[1].height = 25 sheet.column_dimensions['A'].width = 10 sheet.column_dimensions['B'].width = 20 sheet.column_dimensions['C'].width = 20 sheet.column_dimensions['D'].width = 100 sheet.column_dimensions['E'].width = 100 sheet.column_dimensions['F'].width = 20 sheet.column_dimensions['G'].width = 25 sheet.column_dimensions['H'].width = 25 for cell in list(sheet.rows)[0]: cell.style = highlight @time_me() def main(): try: projects = list_all_projects() result_excel_init() print('项目总数 {}\n'.format(len(projects))) for project in projects: print(project) sheet.append([project['id'], project['name'], project['path_with_namespace'], project['description'], project['ssh_url_to_repo'], project['default_branch'], project['created_at'], project['last_activity_at']]) wb_result.save('excel/projects-info-{}.xlsx'.format(group_id)) print('\n执行导出结果完成\n') except Exception as e: print('查询异常:', e) if __name__ == '__main__': group_id = '551' # 项目组id main()
第二步:读取文件,批量拉取
#!/usr/bin/python3 # -*- coding: UTF-8 -*- import os import os.path import sys import requests import time import functools import threading import logging from requests_pkcs12 import Pkcs12Adapter from concurrent.futures import ThreadPoolExecutor, wait, ALL_COMPLETED session = requests.session() BASE_HOST = 'https://gitlab.co-xxxxxx.com' HEADERS = {'PRIVATE-TOKEN': 'xxxxxxx'} session.mount(BASE_HOST, Pkcs12Adapter(pkcs12_filename='cert/client-xxxxxx.p12', pkcs12_password='xxxxxx')) def time_me(info='耗时'): def _time_me(fn): @functools.wraps(fn) def _wrapper(*args, **kwargs): start = time.perf_counter() fn(*args, **kwargs) print('{} {} {}'.format(fn.__name__, info, int(time.perf_counter() - start)), ' 秒') return _wrapper return _time_me def batch_clone(name: str, namespace: str, default_branch: str, ssh_pash: str): try: # 查看目录是否存在 _path = base_code_path + "/" + name if not os.path.exists(_path): print('{} 开始拉取 [{}]'.format(threading.currentThread().getName(), namespace)) command = 'git clone -b {} {}'.format(default_branch, ssh_pash) f = os.popen(command) # print(f.readline()) print(f.read()) print('{} 拉取 [{}]完毕'.format(threading.currentThread().getName(), namespace)) else: pass # print('目录存在', _path) return namespace except Exception as e: print('\nclone 异常 {},{} \n'.format(namespace, ssh_pash), e) @time_me() def main(): # 创建线程池 all_task = [] pool = ThreadPoolExecutor(max_workers=max_workers, thread_name_prefix='git-clone-exec') # 多线程下载 for i, row in enumerate(ws_projects.rows): if i == 0: continue if not row[0].value: break os.chdir(base_code_path) _pid = str(row[0].value).strip() _name = str(row[1].value).strip() _namespace = str(row[2].value).strip() _ssh_url = str(row[4].value).strip() _default_branch = str(row[5].value).strip() if int(_pid) not in excluded_project: time.sleep(0.05) all_task.append(pool.submit(batch_clone, _name, _namespace, _default_branch, _ssh_url)) # 等待所有完成 wait(all_task, return_when=ALL_COMPLETED) print("\n------所有项目拉取完毕--------") print([i.result() for i in all_task]) if __name__ == '__main__': # 加载文件 wb = openpyxl.load_workbook(project_excel_file) ws_projects = wb['Sheet'] # 操作目录 base_code_path = '/Users/liurenkui/workSpace/Java/all-code' os.chdir(base_code_path) # 排除不下载的项目id excluded_project = [] # 最大线程数量 max_workers = 20 # 执行主方法 main()
未经允许请勿转载:程序喵 » Python3 多线程批量拉取 Gitlab 项目代码