Using the Gitlab API in Python, how do I retrieve a file from a specific commit

41 Views Asked by At

For a project I am trying to retrieve every commit and for each updated file, I want to store the entire file (without the commit syntax, just the vanilla file) and which lines were updated. I am using the Gitlab API in python. Whilst I can get the updated lines, I struggle retrieving the file's complete contents at the time of the commit.

Here is a snippet on how I try to retrieve the files, but the issue really only lies in the __get_file_content - all else works like I intend it to.

def __get_file_content(self, project, commit_id, file_path):
        try:
            # Get the file content from a specific commit
            file_content = project.files.get(file_path=file_path, ref=commit_id)
            return file_content.decode()
        except Exception as e:
            print(f"Error fetching file content: {e}, {file_path}, {commit_id}, {project}")
            return None

    
    def generate_commit_dict(self, commits, project):
        # Prepare commit diffs into dict to save time when iterating
        commit_diffs = {}
        for commit in commits:
            diffs = commit.diff(get_all=True)
            commit_dict = []
            commit_time = commit.created_at
            commit_id = commit.short_id

            # print(diffs)
            # logging.info(f'Transforming commit {commit_id}...')

            for diff in diffs:
                diff_file = diff['diff']
                diff_code = self.__get_file_content(project, commit_id, diff['new_path'])
                diff_updated_code = self.__get_commit_diff_contents(diff_file)
                commit_dict.append({
                    'file': diff['new_path'],
                    'type': self.get_file_extension(diff['new_path']),
                    'change': diff_code,
                    'updated_lines': diff_updated_code
                })
            commit_diffs[commit_id] = {
                "commit_time":  commit_time,
                "commits": commit_dict
            }
        return commit_diffs

project = gl.projects.get(int(project_id))
commits = get_commits(gitlab_user, project)

user_commit_contents = generate_commit_dict(commits, project)
0

There are 0 best solutions below