<**
extract text from all the images in a folder
storing the text in a single file
from PIL import Image import pytesseract as pt import pandas as pd from tabulate import tabulate from io import StringIO import os import json import csv
def main(): # path for the folder for getting the raw images path ="E:/mehr mtech p1/images/"
# link to the file in which output needs to be kept
fullTempPath ="E:/mehr mtech p1/out.txt"
# iterating the images inside the folder
for imageName in os.listdir(path):
inputPath = os.path.join(path, imageName)
img = Image.open(inputPath)
#print(imageName)
# applying ocr using pytesseract for python
pt.pytesseract.tesseract_cmd = r'C:/Program Files/Tesseract-OCR/tesseract.exe'
text = pt.image_to_string(img, lang ="eng")
#print(text)
dictionary = {'image': imageName, 'Text': text}
print(dictionary)
#Create a datafrmae from the dictionary
df = pd.DataFrame(dictionary, index=[0])
#print dataframe.
#print(df)
#print(tabulate(df, headers = 'keys', tablefmt = 'psql'))
#Creating a string of the dictionary to print the data with labels in string format in the txt file
#string = json.dumps(dictionary)
#f1 = open("E:/mehr mtech p1/mmyfile.txt","a+")
#f1.write(string)
#df = pd.read_csv(string, sep =";")
#print(df)
df.to_csv("E:/mehr mtech p1/tableimage.csv")
# saving the text for appending it to the output.txt file
# a + parameter used for creating the file if not present
# and if present then append the text content
file1 = open(fullTempPath, "a+")
# providing the name of the image
file1.write(imageName+"\n")
# providing the content in the image
file1.write(text+"\n")
file1.close()
# for printing the output file
file2 = open(fullTempPath, 'r')
print(file2.read())
file2.close()
if name == 'main': main()
**>
the extracted text was converted into a dataframe by first converting it into a dictionary. but while converting that dataframe to a csv file and transferring data to excel file..only 1record i.e., only text of 1 image is coming in csv file..what to do now
the dataframe is coming in this way