1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
|
import json from tencentcloud.common import credential from tencentcloud.common.profile.client_profile import ClientProfile from tencentcloud.common.profile.http_profile import HttpProfile from tencentcloud.common.exception.tencent_cloud_sdk_exception import TencentCloudSDKException from tencentcloud.ocr.v20181119 import ocr_client, models import base64 import os import time
""" 参考资料: API 中心: https://cloud.tencent.com/document/api/866/33517 API Explorer: https://console.cloud.tencent.com/api/explorer?Product=ocr&Version=2018-11-19&Action=GeneralBasicOCR&SignVersion= """
start = time.time()
def get_file_content(filePath): print(filePath) with open(filePath, 'rb') as fp: return fp.read()
def write_on_txt(content,filePath,linefeed = "1"): """ content:要写入的内容 filePath:要写入文件的路径 linefeed :判断是否换行 - 1 为不换行 - 其他 为换行 """ with open(filePath,"a") as file: try: file.write(content) except: print("写入错误") else: if linefeed != "1": file.write("\n")
image_path = r"G:\Pictures\jy\test"
txt_path = r"G:\Pictures\jy\OCR\图片.txt"
for root,dirs,files in os.walk(image_path): for file in files: file_dir = os.path.join(root, file) print(file_dir) write_on_txt("=============================",txt_path,"0") write_on_txt("文件名:"+ file_dir,txt_path,"0")
try: cred = credential.Credential("腾讯 api 的 id", "腾讯 api 的秘钥") httpProfile = HttpProfile() httpProfile.endpoint = "ocr.ap-chengdu.tencentcloudapi.com"
clientProfile = ClientProfile() clientProfile.httpProfile = httpProfile client = ocr_client.OcrClient(cred, "ap-guangzhou", clientProfile)
with open(file_dir, 'rb') as f: data = f.read() encodestr = str(base64.b64encode(data), 'utf-8') params = { "ImageBase64": encodestr, "LanguageType": "auto" }
req = models.GeneralBasicOCRRequest() req.from_json_string(json.dumps(params))
resp = client.GeneralBasicOCR(req) print() for text in resp.TextDetections: print(text.DetectedText) write_on_txt(text.DetectedText,txt_path,"0") except TencentCloudSDKException as err: print(err) end = time.time() print('Running time: %1.2f Seconds'%(end-start))
|