pygithub search and read specific files

297 Views Asked by At

I am using pyGithub to go through the files in the Github repository. The problem is, with this code my_code.get_contents(""), it goes through each and every file in all the folders and subfolders in the repo. Is there a way to make this code efficient. I am only interested in parsing the .csproj files and the packages.config files where they are found. But these files are scattered in multiple places.

from github import Github
import pathlib
import xml.etree.ElementTree as ET

def processFilesInGitRepo():
  while len(contents)>0:
    file_content = contents.pop(0)
    if file_content.type=='dir':
      contents.extend(my_code.get_contents(file_content.path))
    else :
       path=pathlib.Path(file_content.path)
       file_name=path.name
       extention=path.suffix
       if(file_name=='packages.config'):
          parseXMLInPackagesConfig(file_content.decoded_content.decode())
          
       if(extention=='.csproj'):
          parseXMLInCsProj(file_content.decoded_content.decode())  
  
       print(file_content)


my_git=Github("MyToken")


my_code=my_git.get_repo("BeclsAutomation/Echo65XPlus")
contents=my_code.get_contents("") #empty string i.e. ("") gives all the items in the Repository. But can I specify some kind of a search term here saying I need only .csproj and packages.config files.

processFilesInGitRepo()
1

There are 1 best solutions below

0
Dayananda D R On BEST ANSWER

We can get the file content of a specific file on a specific branch as well using pyGithub.

from github import Github

def connect_to_github():
    host = "https://github.com/<organisation-name>"
    token = "PAT TOKEN"
    github_instance = Github(token)
    return github_instance

if __name__ == "__main__":
    gl = connect_to_github()
    project = gl.get_repo("<organisation>/<project-name>")
    data = project.get_contents(path="<path of the file>",ref="<branch name>")
    content = data.decoded_content
    print(content.decode('utf-8'))