ga('set', 'anonymizeIp', 1);
How to implement OCR in pyhton. Advanced: Identify non-english text.
pip install pytesseract
from PIL import Image
import pytesseract
def main():
pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" # The path is defined by where you install the execute file.
img = Image.open(r"image\testIMG.jpg") # Please change the image path to yours.
#img.show()
print(pytesseract.image_to_string(img, lang="eng"))
if __name__ == "__main__":
main()
If you have non-english text to find, please search the pre-trained language set and download to your "Tessract" installed path -> tessdata folder.
There is only "eng.traineddata" by default.
Then modify the code:
from PIL import Image
import pytesseract
import cv2
from pytesseract import Output
def main():
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
img = cv2.imread('test.png')
# Assume we want to identify some Traditional Chinese text.
text = pytesseract.image_to_string(img, lang='chi_tra+eng')
prind(text)
if __name__ == "__main__":
main()