文字识别 OCR-识别结果后处理:提取特定字段导入Excel

时间:2025-02-12 15:02:41

提取特定字段导入Excel

本示例调用身份证识别API,并从获取到的JSON结果中,提取所需的字段,填入至Excel。

  • 前提条件
    • 开通身份证识别
    • 参考本地调用,安装OCR Python SDK。并执行pip install xlsxwriter命令安装依赖包。
    • 登录访问密钥页面,获取AK、SK。可以新增访问密钥,或使用已有的访问密钥。访问密钥为credentials.csv文件,包含AK/SK信息。

  • 代码示例
    # -*- coding: utf-8 -*-import base64import xlsxwriterfrom huaweicloudsdkcore.auth.credentials import BasicCredentialsfrom huaweicloudsdkocr.v1.region.ocr_region import OcrRegionfrom huaweicloudsdkcore.exceptions import exceptionsfrom huaweicloudsdkocr.v1 import *from huaweicloudsdkcore.http.http_config import HttpConfigdef recognize_id_card_request():    try:        request = RecognizeIdCardRequest()        request.body = IdCardRequestBody(            image=image_base64        )        response = client.recognize_id_card(request)        return response    except exceptions.ClientRequestException as e:        print(e.status_code)        print(e.request_id)        print(e.error_code)        print(e.error_msg)def get_credential():    return BasicCredentials(ak, sk)def get_client():    config = HttpConfig.get_default_config()    config.ignore_ssl_verification = True    return OcrClient.new_builder(OcrClient) \        .with_credentials(credentials) \        .with_region(OcrRegion.CN_NORTH_4) \        .with_http_config(config) \        .build()def image_to_base64(imagepath):    """    将本地图片转化为base64编码    """    with open(imagepath, "rb") as bin_data:        image_data = bin_data.read()    base64_data = base64.b64encode(image_data).decode("utf-8")    return base64_datadef response_to_execl(save_file, data):    """    :param save_file: 文件名    :param data: result data    """    # 处理调用API返回的result数据    keys_list = list(data["result"].keys())    values_list = list(data["result"].values())    options = {'in_memory': True}    with xlsxwriter.Workbook(save_file, options) as workbook:        worksheet = workbook.add_worksheet()        worksheet.set_column('A1:A20', 23)        worksheet.set_column('B1:B20', 100)        worksheet.write_column('A1', keys_list)        worksheet.write_column('B1', values_list)    workbook.close()if __name__ == '__main__':    # 填写访问密钥AK、SK    ak = "填写AK"    sk = "填写SK"    # Init Auth Info    credentials = get_credential()    # Create OcrClient    client = get_client()    image_base64 = image_to_base64(r"图片的路径,例如D:\local\test.png")    # request id card service    response = recognize_id_card_request().to_dict()    # 数据保存在execl上    response_to_execl(r"excel路径,例如D:\local\test.xlsx", response)
support.huaweicloud.com/usermanual-ocr/ocr_08_0010.html