본문 바로가기

Coding/TIL

TIL | #14 | NAVER CLOVA OCR / SUMMARY - Local | 23.12.12.(화)

23.12.12.(TUE).TIL.

NAVER CLOVA OCR / SUMMARY - Local

NAVER CLOVA OCR / SUMMARY - Local

import requests
import json
import nltk

def get_text_from_image(image_path):
    # Naver Clova OCR API URL
    url = "https://openapi.naver.com/v1/vision/ocr"

    # 요청 헤더
    headers = {
        "X-Naver-Client-Id": "YOUR_CLIENT_ID",
        "X-Naver-Client-Secret": "YOUR_CLIENT_SECRET",
    }

    # 요청 데이터
    data = {
        "image": image_path,
    }

    # 요청
    response = requests.post(url, headers=headers, data=data)

    # 응답
    if response.status_code == 200:
        response_json = json.loads(response.content)
        return response_json["result"]["recognition_results"][0]["text"]
    else:
        print(f"[ERROR] {response.status_code}")
        return None

def analyze_text(text):
    # 토큰화
    tokens = nltk.word_tokenize(text)

    # 품사 태깅
    pos_tags = nltk.pos_tag(tokens)

    # 명사, 형용사, 부사, 동사로 분류
    nouns = [token for token, tag in pos_tags if tag in ["NN", "NNS", "NNP", "NNPS"]]
    adjectives = [token for token, tag in pos_tags if tag in ["JJ", "JJS", "JJR"]]
    adverbs = [token for token, tag in pos_tags if tag in ["RB", "RBR", "RBS"]]
    verbs = [token for token, tag in pos_tags if tag in ["VB", "VBD", "VBG", "VBN", "VBP", "VBZ"]]

    return nouns, adjectives, adverbs, verbs

def extract_event(text):
    # 명사와 동사를 추출합니다.
    nouns, verbs = analyze_text(text)

    # 명사와 동사의 조합을 추출합니다.
    events = []
    for noun in nouns:
        for verb in verbs:
            events.append(f"{noun} {verb}")

    return events

def register_event(event, calendar_id):
    # KakaoTalk Calendar API URL
    url = "https://kapi.kakao.com/v2/calendar/events"

    # 요청 헤더
    headers = {
        "Authorization": f"Bearer {YOUR_ACCESS_TOKEN}",
    }

    # 요청 데이터
    data = {
        "title": event,
        "start_time": "2023-12-20T10:00:00+09:00",
        "end_time": "2023-12-20T11:00:00+09:00",
    }

    # 요청
    response = requests.post(url, headers=headers, data=data)

    # 응답
    if response.status_code == 200:
        print(f"[SUCCESS] 이벤트 등록 성공")
    else:
        print(f"[ERROR] {response.status_code}")

# 테스트
image_path = "image.jpg"
text = get_text_from_image(image_path)
events = extract_event(text)
print(events)

# 이벤트 등록
calendar_id = "YOUR_CALENDAR_ID"
for event in events:
    register_event(event, calendar_id)

[ERROR] 401
Traceback (most recent call last):
  File "d:\Coding\workspace\python-naver-clova-ocr-summary\ChatCalendar.py", line 86, in <module>
    events = extract_event(text)
             ^^^^^^^^^^^^^^^^^^^
  File "d:\Coding\workspace\python-naver-clova-ocr-summary\ChatCalendar.py", line 48, in extract_event
    nouns, verbs = analyze_text(text)
                   ^^^^^^^^^^^^^^^^^^
  File "d:\Coding\workspace\python-naver-clova-ocr-summary\ChatCalendar.py", line 33, in analyze_text
    tokens = nltk.word_tokenize(text)
             ^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Lib\site-packages\nltk\tokenize\__init__.py", line 129, in word_tokenize
    sentences = [text] if preserve_line else sent_tokenize(text, language)
                                             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Lib\site-packages\nltk\tokenize\__init__.py", line 107, in sent_tokenize
    return tokenizer.tokenize(text)
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1281, in tokenize
    return list(self.sentences_from_text(text, realign_boundaries))
                ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1341, in sentences_from_text
    return [text[s:e] for s, e in self.span_tokenize(text, realign_boundaries)]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1341, in <listcomp>
    return [text[s:e] for s, e in self.span_tokenize(text, realign_boundaries)]
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1329, in span_tokenize
    for sentence in slices:
  File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1459, in _realign_boundaries
    for sentence1, sentence2 in _pair_iter(slices):
  File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 321, in _pair_iter
    prev = next(iterator)
           ^^^^^^^^^^^^^^
  File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1431, in _slices_from_text
    for match, context in self._match_potential_end_contexts(text):
  File "C:\Python\Lib\site-packages\nltk\tokenize\punkt.py", line 1395, in _match_potential_end_contexts
    for match in self._lang_vars.period_context_re().finditer(text):
                 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
TypeError: expected string or bytes-like object, got 'NoneType'