Upload large file (>100 MB) directly to github with pygithub

374 Views Asked by At

I am using pyGitHub to upload files to my repo, however some of the files are so large that the server connection times out. My code to upload/overwrite a file from a folder is:

def commit(folder):
    
    foldername = folder.split("/")[-1]
    onlyfiles = [f for f in listdir(folder) if isfile(join(folder, f))]
    
    repo = g.get_repo(user.login+"/My-repo")

    all_files = []
    contents = repo.get_contents("")
    while contents:
        file_content = contents.pop(0)
        if file_content.type == "dir":
            contents.extend(repo.get_contents(file_content.path))
        else:
            file = file_content
            all_files.append(str(file).replace('ContentFile(path="','').replace('")',''))

    body = '''
    Line 1: Message
    Line 2: Sample Text
    Line 3: yet another line
    '''
    for i in onlyfiles:
        print(i)
        input_file = open(folder + "/" + i, "rb")
        data = input_file.read()
        input_file.close()
        if not(f"{foldername}/{i}" in all_files):
            repo.create_file(f"{foldername}/{i}", "Created building data", data)
        else:
            file = repo.get_contents(f"{foldername}/{i}")
            repo.update_file(file.path, "Updated information", data, file.sha)

This code works for files <25mb, but for larger ones I get the error:

---------------------------------------------------------------------------

GithubException                           Traceback (most recent call last)

<ipython-input-9-7d41473c81a0> in <module>()
     79 
     80 
---> 81 commit(str("/content/"+dirname))

3 frames

<ipython-input-9-7d41473c81a0> in commit(folder)
     72         input_file.close()
     73         if not(f"{foldername}/{i}" in all_files):
---> 74             repo.create_file(f"{foldername}/{i}", "Created building data", data)
     75         else:
     76             file = repo.get_contents(f"{foldername}/{i}")

/usr/local/lib/python3.7/dist-packages/github/Repository.py in create_file(self, path, message, content, branch, committer, author)
   2091             "PUT",
   2092             f"{self.url}/contents/{urllib.parse.quote(path)}",
-> 2093             input=put_parameters,
   2094         )
   2095 

/usr/local/lib/python3.7/dist-packages/github/Requester.py in requestJsonAndCheck(self, verb, url, parameters, headers, input)
    353         return self.__check(
    354             *self.requestJson(
--> 355                 verb, url, parameters, headers, input, self.__customConnection(url)
    356             )
    357         )

/usr/local/lib/python3.7/dist-packages/github/Requester.py in __check(self, status, responseHeaders, output)
    376         output = self.__structuredFromJson(output)
    377         if status >= 400:
--> 378             raise self.__createException(status, responseHeaders, output)
    379         return responseHeaders, output
    380 

GithubException: 502 {"message": "Server Error"}

I am aware that the file upload limit for github is 25MB, but apparently files up to 100MB can be uploaded via the command line. How would I upload files larger than this to GitHub using pyGitHub? The file is zipped, so it really is as small as it can be, but is still ~150MB. Is this doable? If not, is there a way to reference a larger file in github which I can upload elsewhere? I am using Google Colab in case anyone is wondering.

0

There are 0 best solutions below