23.12.12.(TUE).TIL.
NAVER CLOVA OCR / SUMMARY - Local
NAVER CLOVA OCR / SUMMARY - Local
import requests
import json
import nltk
def get_text_from_image(image_path):
# Naver Clova OCR API URL
url = "https://openapi.naver.com/v1/vision/ocr"
# 요청 헤더
headers = {
"X-Naver-Client-Id": "YOUR_CLIENT_ID",
"X-Naver-Client-Secret": "YOUR_CLIENT_SECRET",
}
# 요청 데이터
data = {
"image": image_path,
}
# 요청
response = requests.post(url, headers=headers, data=data)
# 응답
if response.status_code == 200:
response_json = json.loads(response.content)
return response_json["result"]["recognition_results"][0]["text"]
else:
print(f"[ERROR] {response.status_code}")
return None
def analyze_text(text):
# 토큰화
tokens = nltk.word_tokenize(text)
# 품사 태깅
pos_tags = nltk.pos_tag(tokens)
# 명사, 형용사, 부사, 동사로 분류
nouns = [token for token, tag in pos_tags if tag in ["NN", "NNS", "NNP", "NNPS"]]
adjectives = [token for token, tag in pos_tags if tag in ["JJ", "JJS", "JJR"]]
adverbs = [token for token, tag in pos_tags if tag in ["RB", "RBR", "RBS"]]
verbs = [token for token, tag in pos_tags if tag in ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]]
return nouns, adjectives, adverbs, verbs
def extract_event(text):
# 명사와 동사를 추출합니다.
nouns, verbs = analyze_text(text)
# 명사와 동사의 조합을 추출합니다.
events = []
for noun in nouns:
for verb in verbs:
events.append(f"{noun} {verb}")
return events
def register_event(event, calendar_id):
# KakaoTalk Calendar API URL
url = "https://kapi.kakao.com/v2/calendar/events"
# 요청 헤더
headers = {
"Authorization": f"Bearer {YOUR_ACCESS_TOKEN}",
}
# 요청 데이터
data = {
"title": event,
"start_time": "2023-12-20T10:00:00+09:00",
"end_time": "2023-12-20T11:00:00+09:00",
}
# 요청
response = requests.post(url, headers=headers, data=data)
# 응답
if response.status_code == 200:
print(f"[SUCCESS] 이벤트 등록 성공")
else:
print(f"[ERROR] {response.status_code}")
# 테스트
image_path = "image.jpg"
text = get_text_from_image(image_path)
events = extract_event(text)
print(events)
# 이벤트 등록
calendar_id = "YOUR_CALENDAR_ID"
for event in events:
register_event(event, calendar_id)
[ERROR] 401
Traceback (most recent call last):
File "d:\Coding\workspace\python-naver-clova-ocr-summary\ChatCalendar.py", line 86, in <module>
events = extract_event(text)
^^^^^^^^^^^^^^^^^^^
File "d:\Coding\workspace\python-naver-clova-ocr-summary\ChatCalendar.py", line 48, in extract_event
nouns, verbs = analyze_text(text)
^^^^^^^^^^^^^^^^^^
File "d:\Coding\workspace\python-naver-clova-ocr-summary\ChatCalendar.py", line 33, in analyze_text
tokens = nltk.word_tokenize(text)
^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Lib\site-packages\nltk\tokenize\__init__.py", line 129, in word_tokenize
sentences = [text] if preserve_line else sent_tokenize(text, language)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Lib\site-packages\nltk\tokenize\__init__.py", line 107, in sent_tokenize
return tokenizer.tokenize(text)
^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1281, in tokenize
return list(self.sentences_from_text(text, realign_boundaries))
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1341, in sentences_from_text
return [text[s:e] for s, e in self.span_tokenize(text, realign_boundaries)]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1341, in <listcomp>
return [text[s:e] for s, e in self.span_tokenize(text, realign_boundaries)]
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1329, in span_tokenize
for sentence in slices:
File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1459, in _realign_boundaries
for sentence1, sentence2 in _pair_iter(slices):
File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 321, in _pair_iter
prev = next(iterator)
^^^^^^^^^^^^^^
File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1431, in _slices_from_text
for match, context in self._match_potential_end_contexts(text):
File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1395, in _match_potential_end_contexts
for match in self._lang_vars.period_context_re().finditer(text):
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: expected string or bytes-like object, got 'NoneType'