23.12.10.(SUN).TIL.

NAVER CLOVA OCR

NAVER CLOVA OCR

import requests
import uuid
import time
import base64
import json
from google.colab import drive

drive.mount('/content/drive')

api_url = 'api_url'
secret_key = 'secret_key'
image_file = 'image_file'


with open(image_file, 'rb') as f:
    file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode(),
        },
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000)),
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json',
}

response = requests.post(api_url, headers=headers, data=payload)

# 일시 키워드가 포함된 텍스트만 출력
recognized_text = ''
for field in response.json()['images'][0]['fields']:
    if '일시' in field['inferText']:
        recognized_text += field['inferText']

# 일시 정보를 추출
start_time, *end_time = recognized_text.split('~')

# 일시 정보를 정규식으로 검증
import re
if not re.match(r"\d{4}\.\d{2}\.\d{2}", start_time):
    print("Invalid date format")
    exit()

# 일시 정보를 파악
year, month, day = start_time.split('.')[:3]
weekday = start_time.split('.')[-1].split(' ')[0]
start_hour, start_minute = start_time.split('.')[-1].split(' ')[1].split(':')[:2]

# 출력 형식에 맞게 변환
print(f"년 : {year}")
print(f"월 : {month}")
print(f"일 : {day}")
print(f"요일 : {weekday}")
print(f"시작 시간 : {start_hour}:{start_minute}")
print(f"종료 시간 : {end_hour}:{end_minute}")

import requests
import uuid
import time
import base64
import json
from google.colab import drive

drive.mount('/content/drive')

api_url = 'api_url'
secret_key = 'secret_key'
image_file = 'image_file'

with open(image_file, 'rb') as f:
    file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode(),
        },
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000)),
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json',
}

response = requests.post(api_url, headers=headers, data=payload)

# 일시 정보를 추출
recognized_text = ''
for field in response.json()['images'][0]['fields']:
    if '일시' in field['inferText']:
        # '일시' 키워드를 찾은 이후의 문자열을 추출
        start_index = field['inferText'].find('일시')
        recognized_text = field['inferText'][start_index:]
        break

# 다음 한글을 찾을 때까지 반복
while True:
    if recognized_text[-1].isalpha():
        break
    recognized_text = recognized_text[:-1]

print(recognized_text)

'Coding > TIL' 카테고리의 다른 글

TIL \| #14 \| NAVER CLOVA OCR / SUMMARY - Local \| 23.12.12.(화) (0)	2023.12.13
TIL \| #13 \| NAVER CLOVA SUMMARY API \| 23.12.11.(월) (0)	2023.12.13
TIL \| #11 \| NAVER CLOVA OCR \| 23.12.09.(토) (2)	2023.12.10
TIL \| #10 \| NAVER CLOVA OCR \| 23.12.08.(금) (1)	2023.12.10
TIL \| #09 \| 랭체인(LangChain) \| 23.12.07.(목) (2)	2023.12.08

禹

TIL | #12 | NAVER CLOVA OCR | 23.12.10.(일)

23.12.10.(SUN).TIL.

NAVER CLOVA OCR

'Coding > TIL' 카테고리의 다른 글

티스토리툴바

TIL | #12 | NAVER CLOVA OCR | 23.12.10.(일)

23.12.10.(SUN).TIL.

NAVER CLOVA OCR

'Coding > TIL' 카테고리의 다른 글

'Coding/TIL' Related Articles

티스토리툴바