23.12.09.(SAT).TIL.
NAVER CLOVA OCR
NAVER CLOVA OCR
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')
api_url = 'api_url'
secret_key = 'secret_key'
image_file = 'image_file'
with open(image_file,'rb') as f:
file_data = f.read()
request_json = {
'images': [
{
'format': 'jpg',
'name': 'demo',
'data': base64.b64encode(file_data).decode()
}
],
'requestId': str(uuid.uuid4()),
'version': 'V2',
'timestamp': int(round(time.time() * 1000))
}
payload = json.dumps(request_json).encode('UTF-8')
headers = {
'X-OCR-SECRET': secret_key,
'Content-Type': 'application/json'
}
response = requests.request("POST", api_url, headers=headers, data = payload)
print(response.text)
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')
api_url = 'api_url'
secret_key = 'secret_key'
image_file = 'image_file'
with open(image_file, 'rb') as f:
file_data = f.read()
request_json = {
'images': [
{
'format': 'jpg',
'name': 'demo',
'data': base64.b64encode(file_data).decode(),
},
],
'requestId': str(uuid.uuid4()),
'version': 'V2',
'timestamp': int(round(time.time() * 1000)),
}
payload = json.dumps(request_json).encode('UTF-8')
headers = {
'X-OCR-SECRET': secret_key,
'Content-Type': 'application/json',
}
response = requests.post(api_url, headers=headers, data=payload)
# 인식한 모든 문자를 한 줄에 출력
recognized_text = ''
for field in response.json()['images'][0]['fields']:
recognized_text += field['inferText']
print(recognized_text)
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')
api_url = 'api_url'
secret_key = 'secret_key'
image_file = 'image_file'
with open(image_file, 'rb') as f:
file_data = f.read()
request_json = {
'images': [
{
'format': 'jpg',
'name': 'demo',
'data': base64.b64encode(file_data).decode(),
},
],
'requestId': str(uuid.uuid4()),
'version': 'V2',
'timestamp': int(round(time.time() * 1000)),
}
payload = json.dumps(request_json).encode('UTF-8')
headers = {
'X-OCR-SECRET': secret_key,
'Content-Type': 'application/json',
}
response = requests.post(api_url, headers=headers, data=payload)
# 일시 키워드가 포함된 텍스트만 출력
recognized_text = ''
for field in response.json()['images'][0]['fields']:
if '일시' in field['inferText']:
recognized_text += field['inferText']
print(recognized_text)
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')
api_url = 'api_url'
secret_key = 'secret_key'
image_file = 'image_file'
with open(image_file, 'rb') as f:
file_data = f.read()
request_json = {
'images': [
{
'format': 'jpg',
'name': 'demo',
'data': base64.b64encode(file_data).decode(),
},
],
'requestId': str(uuid.uuid4()),
'version': 'V2',
'timestamp': int(round(time.time() * 1000)),
}
payload = json.dumps(request_json).encode('UTF-8')
headers = {
'X-OCR-SECRET': secret_key,
'Content-Type': 'application/json',
}
response = requests.post(api_url, headers=headers, data=payload)
# 일시 키워드가 포함된 텍스트만 출력
recognized_text = ''
for field in response.json()['images'][0]['fields']:
if '일시' in field['inferText']:
recognized_text += field['inferText']
# 일시 정보를 추출
start_time = recognized_text.split(' ', 1)[0]
end_time = recognized_text.split(' ')[-1]
# 출력 형식에 맞게 변환
print(f"{start_time}~{end_time}")
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')
api_url = 'api_url'
secret_key = 'secret_key'
image_file = 'image_file'
with open(image_file, 'rb') as f:
file_data = f.read()
request_json = {
'images': [
{
'format': 'jpg',
'name': 'demo',
'data': base64.b64encode(file_data).decode(),
},
],
'requestId': str(uuid.uuid4()),
'version': 'V2',
'timestamp': int(round(time.time() * 1000)),
}
payload = json.dumps(request_json).encode('UTF-8')
headers = {
'X-OCR-SECRET': secret_key,
'Content-Type': 'application/json',
}
response = requests.post(api_url, headers=headers, data=payload)
# 일시 키워드가 포함된 텍스트만 출력
recognized_text = ''
for field in response.json()['images'][0]['fields']:
if '일시' in field['inferText']:
recognized_text += field['inferText']
# 일시 정보를 추출
start_time, *end_time = recognized_text.split('~')
# 일시 정보를 정규식으로 검증
import re
if not re.match(r"\d{4}\.\d{2}\.\d{2}", start_time):
print("Invalid date format")
exit()
# 일시 정보를 파악
year, month, day = start_time.split('.')[:3]
weekday = start_time.split('.')[-1].split(' ')[0]
start_hour, start_minute = start_time.split('.')[-1].split(' ')[1].split(':')[:2]
# 출력 형식에 맞게 변환
print(f"년 : {year}")
print(f"월 : {month}")
print(f"일 : {day}")
print(f"요일 : {weekday}")
print(f"시작 시간 : {start_hour}:{start_minute}")
print(f"종료 시간 : {end_hour}:{end_minute}")
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')
api_url = 'api_url'
secret_key = 'secret_key'
image_file = 'image_file'
with open(image_file, 'rb') as f:
file_data = f.read()
request_json = {
'images': [
{
'format': 'jpg',
'name': 'demo',
'data': base64.b64encode(file_data).decode(),
},
],
'requestId': str(uuid.uuid4()),
'version': 'V2',
'timestamp': int(round(time.time() * 1000)),
}
payload = json.dumps(request_json).encode('UTF-8')
headers = {
'X-OCR-SECRET': secret_key,
'Content-Type': 'application/json',
}
response = requests.post(api_url, headers=headers, data=payload)
# 일시 정보를 추출
recognized_text = ''
for field in response.json()['images'][0]['fields']:
if '일시' in field['inferText']:
# '일시' 키워드를 찾은 이후의 문자열을 추출
start_index = field['inferText'].find('일시')
recognized_text = field['inferText'][start_index:]
break
# 다음 한글을 찾을 때까지 반복
while True:
if recognized_text[-1].isalpha():
break
recognized_text = recognized_text[:-1]
print(recognized_text)
'Coding > TIL' 카테고리의 다른 글
TIL | #13 | NAVER CLOVA SUMMARY API | 23.12.11.(월) (0) | 2023.12.13 |
---|---|
TIL | #12 | NAVER CLOVA OCR | 23.12.10.(일) (0) | 2023.12.13 |
TIL | #10 | NAVER CLOVA OCR | 23.12.08.(금) (1) | 2023.12.10 |
TIL | #09 | 랭체인(LangChain) | 23.12.07.(목) (2) | 2023.12.08 |
TIL | #08 | ChatGPT API 활용 카카오톡 챗봇 | 23.12.06.(수) (1) | 2023.12.07 |