본문 바로가기

Coding/TIL

TIL | #13 | NAVER CLOVA SUMMARY API | 23.12.11.(월)

23.12.11.(MON).TIL.

NAVER CLOVA SUMMARY API

NAVER CLOVA SUMMARY API

import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')

api_url = ''
secret_key = ''
image_file = ''

with open(image_file, 'rb') as f:
  file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode()
        }
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000))
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json'
}

response = requests.request("POST", api_url, headers=headers, data=payload)

json_data = json.loads(response.text)

text = []

for result in json_data['images'][0]['fields']:
  text.append(result['inferText'])

print('모든 텍스트:', text)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
모든 텍스트: ['「2023', '세종', 'UNION', '청년창업캠프」', '운영', '계획(안)', '<2023.', '8.', '창업지원팀', '김지혜>', 'I', '개요', '□', '추진목적', '0', '세종지역', '청년 대학생,', '(예비)창업자', '등을', '대상으로', '창업문화', '확산', '및', '기업가', '정신', '함양을', '도모하기', '위해', '세종청년창업협의회*', '기관', '공동사업의', '일환으로', '청년창업캠프', '개최', '(9개', '기관)', '고려대학교', '세종창업교육센터,', '한국영상대학교', '창업교육센터,', '홍익대학교', '세종창', '업교육센터,', '한국전통문화대학교', '취창업센터,', '청년희망팩토리', '사회적협동조합,', '한국청년기업', '가정신재단,', '세종청년센터', '세청나래,', '한국발명진흥회', '세종지부,', '세종창조경제혁신센터', '□행사개요', '0', '프로그램명 :', '2023', '세종', 'UNION', '청년창업캠프 -', '내', '안의', "'리더'", '찾기', '0', '일시:', '2023.', '09.', '21.(목)', '10:00~17:00', '0', '장소 :', '룩백', '카페(세종 연기면 공단로', '184', '0', '운영:', '(재)세종창조경제혁신센터(창업지원팀)', '0', '주관 :', '세종청년창업협의회', '0', '참여자 :', '고려대학교,', '한국영상대,', '한국전통문화대,', '홍익대학교', '대학생', '및', '유관기관', '담당자', '80여명', '0', '주요행사 :', '창업특강,', '팀빌딩,', '모의창업실습', '등']
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')

api_url = ''
secret_key = ''
image_file = ''

with open(image_file, 'rb') as f:
  file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode()
        }
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000))
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json'
}

response = requests.request("POST", api_url, headers=headers, data=payload)

json_data = json.loads(response.text)

text = []

for result in json_data['images'][0]['fields']:
  text.append(result['inferText'])

# 작은 따옴표 제거
for i in range(len(text)):
  text[i] = text[i].replace('\'', '')

print('모든 텍스트:', text)
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
모든 텍스트: ['「2023', '세종', 'UNION', '청년창업캠프」', '운영', '계획(안)', '<2023.', '8.', '창업지원팀', '김지혜>', 'I', '개요', '□', '추진목적', '0', '세종지역', '청년 대학생,', '(예비)창업자', '등을', '대상으로', '창업문화', '확산', '및', '기업가', '정신', '함양을', '도모하기', '위해', '세종청년창업협의회*', '기관', '공동사업의', '일환으로', '청년창업캠프', '개최', '(9개', '기관)', '고려대학교', '세종창업교육센터,', '한국영상대학교', '창업교육센터,', '홍익대학교', '세종창', '업교육센터,', '한국전통문화대학교', '취창업센터,', '청년희망팩토리', '사회적협동조합,', '한국청년기업', '가정신재단,', '세종청년센터', '세청나래,', '한국발명진흥회', '세종지부,', '세종창조경제혁신센터', '□행사개요', '0', '프로그램명 :', '2023', '세종', 'UNION', '청년창업캠프 -', '내', '안의', '리더', '찾기', '0', '일시:', '2023.', '09.', '21.(목)', '10:00~17:00', '0', '장소 :', '룩백', '카페(세종 연기면 공단로', '184', '0', '운영:', '(재)세종창조경제혁신센터(창업지원팀)', '0', '주관 :', '세종청년창업협의회', '0', '참여자 :', '고려대학교,', '한국영상대,', '한국전통문화대,', '홍익대학교', '대학생', '및', '유관기관', '담당자', '80여명', '0', '주요행사 :', '창업특강,', '팀빌딩,', '모의창업실습', '등']
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')

api_url = ''
secret_key = ''
image_file = ''

with open(image_file, 'rb') as f:
  file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode()
        }
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000))
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json'
}

response = requests.request("POST", api_url, headers=headers, data=payload)

json_data = json.loads(response.text)

text = ''

for result in json_data['images'][0]['fields']:
  text += result['inferText'].replace('\'', '').replace(',', '')

print('모든 텍스트:', text)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
모든 텍스트: 「2023세종UNION청년창업캠프」운영계획(안)<2023.8.창업지원팀김지혜>I개요□추진목적0세종지역청년 대학생(예비)창업자등을대상으로창업문화확산및기업가정신함양을도모하기위해세종청년창업협의회*기관공동사업의일환으로청년창업캠프개최(9개기관)고려대학교세종창업교육센터한국영상대학교창업교육센터홍익대학교세종창업교육센터한국전통문화대학교취창업센터청년희망팩토리사회적협동조합한국청년기업가정신재단세종청년센터세청나래한국발명진흥회세종지부세종창조경제혁신센터□행사개요0프로그램명 :2023세종UNION청년창업캠프 -내안의리더찾기0일시:2023.09.21.(목)10:00~17:000장소 :룩백카페(세종 연기면 공단로1840운영:(재)세종창조경제혁신센터(창업지원팀)0주관 :세종청년창업협의회0참여자 :고려대학교한국영상대한국전통문화대홍익대학교대학생및유관기관담당자80여명0주요행사 :창업특강팀빌딩모의창업실습등
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')

api_url = ''
secret_key = ''
image_file = ''

with open(image_file, 'rb') as f:
  file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode()
        }
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000))
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json'
}

response = requests.request("POST", api_url, headers=headers, data=payload)

json_data = json.loads(response.text)

text = ''

for result in json_data['images'][0]['fields']:
  text += result['inferText'].replace('\'', '').replace(',', ' ')

print('모든 텍스트:', text)

import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')

api_url = ''
secret_key = ''
image_file = ''

with open(image_file, 'rb') as f:
  file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode()
        }
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000))
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json'
}

response = requests.request("POST", api_url, headers=headers, data=payload)

json_data = json.loads(response.text)

text = ''

for result in json_data['images'][0]['fields']:
  text += result['inferText'].replace('\'', '').replace(',', ' ')

schedule = ''

for match in re.finditer(r'(일시|일정)(.*)장소', text):
  schedule = match.group(1)

print(schedule)
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
일시
---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-26-c97359214abf> in <cell line: 53>()
     51 schedule = match.group(1)
     52 schedule = schedule.split(':')
---> 53 schedule = schedule[0] + ' ' + schedule[1] + ' ' + schedule[2]
     54 print(schedule)

IndexError: list index out of range
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')

api_url = ''
secret_key = ''
image_file = ''

with open(image_file, 'rb') as f:
  file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode()
        }
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000))
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json'
}

response = requests.request("POST", api_url, headers=headers, data=payload)

json_data = json.loads(response.text)

text = ''

for result in json_data['images'][0]['fields']:
  text += result['inferText'].replace('\'', '').replace(',', ' ')

schedule = ''

for i in range(len(text)):
  if text[i] == '일시' or text[i] == '일정':
    start = i
  elif text[i] == '장소':
    end = i
    schedule = text[start:end]
    break

print(schedule)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')

api_url = ''
secret_key = ''
image_file = ''

with open(image_file, 'rb') as f:
  file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode()
        }
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000))
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json'
}

response = requests.request("POST", api_url, headers=headers, data=payload)

json_data = json.loads(response.text)

text = ''

for result in json_data['images'][0]['fields']:
  text += result['inferText'].replace('\'', '').replace(',', ' ')

schedule = ''

for i in range(len(text)):
  if text[i] == '일시' or text[i] == '일정':
    start = i
  elif text[i] == '장소':
    end = i
    schedule = text[start:end]
    break

print(schedule)
Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')

api_url = ''
secret_key = ''
image_file = ''

with open(image_file, 'rb') as f:
  file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode()
        }
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000))
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json'
}

response = requests.request("POST", api_url, headers=headers, data=payload)

json_data = json.loads(response.text)

text = ''

for result in json_data['images'][0]['fields']:
  text += result['inferText'].replace('\'', '').replace(',', ' ')

print('모든 텍스트:', text)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
모든 텍스트: 「2023세종UNION청년창업캠프」운영계획(안)<2023.8.창업지원팀김지혜>I개요□추진목적0세종지역청년 대학생 (예비)창업자등을대상으로창업문화확산및기업가정신함양을도모하기위해세종청년창업협의회*기관공동사업의일환으로청년창업캠프개최(9개기관)고려대학교세종창업교육센터 한국영상대학교창업교육센터 홍익대학교세종창업교육센터 한국전통문화대학교취창업센터 청년희망팩토리사회적협동조합 한국청년기업가정신재단 세종청년센터세청나래 한국발명진흥회세종지부 세종창조경제혁신센터□행사개요0프로그램명 :2023세종UNION청년창업캠프 -내안의리더찾기0일시:2023.09.21.(목)10:00~17:000장소 :룩백카페(세종 연기면 공단로1840운영:(재)세종창조경제혁신센터(창업지원팀)0주관 :세종청년창업협의회0참여자 :고려대학교 한국영상대 한국전통문화대 홍익대학교대학생및유관기관담당자80여명0주요행사 :창업특강 팀빌딩 모의창업실습등
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')

api_url = ''
secret_key = ''
image_file = ''

with open(image_file, 'rb') as f:
  file_data = f.read()

request_json = {
  'images': [
    {
      'format': 'jpg',
      'name': 'demo',
      'data': base64.b64encode(file_data).decode()
    }
  ],
  'requestId': str(uuid.uuid4()),
  'version': 'V2',
  'timestamp': int(round(time.time() * 1000))
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
  'X-OCR-SECRET': secret_key,
  'Content-Type': 'application/json'
}

response = requests.post(api_url, headers=headers, data=payload)

json_data = json.loads(response.text)

text = ''

for result in json_data['images'][0]['fields']:
  text += result['inferText'].replace('\'', '').replace(',', ' ')

print('모든 텍스트:', text)

summary_api_url = ""
summary_secret_key = ""

payload = {
  'content': text,
  'language': 'ko'
}

summary_headers = {
    'X-NCP-APIGW-API-KEY-ID': 'rtm3c8iicm',
    'X-NCP-APIGW-API-KEY': ''
}

response = requests.post(summary_api_url, headers=summary_headers, data=json.dumps(payload))

summary_json_data = json.loads(response.text)

# 수정 후 코드
summary = summary_json_data['contents']

print('일정 또는 일시:', summary)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
모든 텍스트: 「2023세종UNION청년창업캠프」운영계획(안)<2023.8.창업지원팀김지혜>I개요□추진목적0세종지역청년 대학생 (예비)창업자등을대상으로창업문화확산및기업가정신함양을도모하기위해세종청년창업협의회*기관공동사업의일환으로청년창업캠프개최(9개기관)고려대학교세종창업교육센터 한국영상대학교창업교육센터 홍익대학교세종창업교육센터 한국전통문화대학교취창업센터 청년희망팩토리사회적협동조합 한국청년기업가정신재단 세종청년센터세청나래 한국발명진흥회세종지부 세종창조경제혁신센터□행사개요0프로그램명 :2023세종UNION청년창업캠프 -내안의리더찾기0일시:2023.09.21.(목)10:00~17:000장소 :룩백카페(세종 연기면 공단로1840운영:(재)세종창조경제혁신센터(창업지원팀)0주관 :세종청년창업협의회0참여자 :고려대학교 한국영상대 한국전통문화대 홍익대학교대학생및유관기관담당자80여명0주요행사 :창업특강 팀빌딩 모의창업실습등
---------------------------------------------------------------------------
gaierror                                  Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/urllib3/connection.py in _new_conn(self)
    202         try:
--> 203             sock = connection.create_connection(
    204                 (self._dns_host, self.port),

16 frames
gaierror: [Errno -2] Name or service not known

The above exception was the direct cause of the following exception:

NameResolutionError                       Traceback (most recent call last)
NameResolutionError: <urllib3.connection.HTTPSConnection object at 0x7a25e6842890>: Failed to resolve 'aiopen.navercorp.com' ([Errno -2] Name or service not known)

The above exception was the direct cause of the following exception:

MaxRetryError                             Traceback (most recent call last)
MaxRetryError: HTTPSConnectionPool(host='aiopen.navercorp.com', port=443): Max retries exceeded with url: /v1/analysis/document/summary (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7a25e6842890>: Failed to resolve 'aiopen.navercorp.com' ([Errno -2] Name or service not known)"))

During handling of the above exception, another exception occurred:

ConnectionError                           Traceback (most recent call last)
/usr/local/lib/python3.10/dist-packages/requests/adapters.py in send(self, request, stream, timeout, verify, cert, proxies)
    517                 raise SSLError(e, request=request)
    518
--> 519             raise ConnectionError(e, request=request)
    520
    521         except ClosedPoolError as e:

ConnectionError: HTTPSConnectionPool(host='aiopen.navercorp.com', port=443): Max retries exceeded with url: /v1/analysis/document/summary (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x7a25e6842890>: Failed to resolve 'aiopen.navercorp.com' ([Errno -2] Name or service not known)"))
import re
import sys
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')

api_url = ''
secret_key = ''
image_file = ''

with open(image_file, 'rb') as f:
  file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode()
        }
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000))
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json'
}

response = requests.request("POST", api_url, headers=headers, data=payload)

json_data = json.loads(response.text)

text = ''

for result in json_data['images'][0]['fields']:
  text += result['inferText'].replace('\'', '').replace(',', ' ')

print('모든 텍스트:', text)

##

summary_headers = {
    'X-NCP-APIGW-API-KEY-ID': 'rtm3c8iicm',
    'X-NCP-APIGW-API-KEY': '',
    "Content-Type": "application/json"
}

language = "ko" # Language of document (ko, ja )
model = "general" # Model used for summaries (general, news)
tone = "2" # Converts the tone of the summarized result. (0, 1, 2, 3)
summaryCount = "3" # This is the number of sentences for the summarized document.
url = "https://naveropenapi.apigw.ntruss.com/text-summary/v1/summarize"

# content 변수에 모든 텍스트 저장
content = text

# document 객체 생성
document = {
  "content" : text.replace('\n', ' ').split('.')
}

# 텍스트의 길이가 10자 이상인지 확인
if len(text) < 10:
  raise ValueError("Text length must be more than 10 characters")

# 텍스트에 공백 문자만 포함되어 있는지 확인
if not text.strip():
  raise ValueError("Text must not contain only whitespace characters")

# 텍스트에 특수문자가 포함되어 있는지 확인
if re.search(r"[~!@#$%^&*()_+|<>?]", text):
  # 특수문자를 제거
  text = re.sub(r"[~!@#$%^&*()_+|<>?]", "", text)
  document["content"] = text.split('.')

# data 객체 생성
data = {
  "document": document,
  "option": {
    "language": language,
    "model": model,
    "tone": tone,
    "summaryCount" : summaryCount
  }
}

response = requests.post(url, data=json.dumps(data), headers=summary_headers)
rescode = response.status_code
if(rescode == 200):
    # json.loads() 함수를 사용하여 JSON 객체로 변환
    content = json.loads(response.text)
    # summary 키가 없는 경우 에러 처리
    if 'summary' not in content:
        print("Error: 'summary' key not found in response")
    else:
        # summary 키의 값을 content 변수에 저장
        content = content['summary']
        # summary가 문자열이 아닌 경우 에러 처리
        if not isinstance(content, str):
            print("Error: 'summary' is not a string")
        else:
            # content 출력
            print(content)
else:
    print("Error : " + response.text)
import re
import sys
import requests
import uuid
import time
import base64
import json
from google.colab import drive
drive.mount('/content/drive')

api_url = ''
secret_key = ''
image_file = ''

with open(image_file, 'rb') as f:
  file_data = f.read()

request_json = {
    'images': [
        {
            'format': 'jpg',
            'name': 'demo',
            'data': base64.b64encode(file_data).decode()
        }
    ],
    'requestId': str(uuid.uuid4()),
    'version': 'V2',
    'timestamp': int(round(time.time() * 1000))
}

payload = json.dumps(request_json).encode('UTF-8')
headers = {
    'X-OCR-SECRET': secret_key,
    'Content-Type': 'application/json'
}

response = requests.request("POST", api_url, headers=headers, data=payload)

json_data = json.loads(response.text)

text = ''

for result in json_data['images'][0]['fields']:
  text += result['inferText'].replace('\'', '').replace(',', ' ')

print('모든 텍스트:', text)


# 헤더 설정
summary_headers = {
  'X-NCP-APIGW-API-KEY-ID': '',
  'X-NCP-APIGW-API-KEY': '',
  "Content-Type": "application/json"
}

language = "ko" # Language of document (ko, ja )
model = "general" # Model used for summaries (general, news)
tone = "2" # Converts the tone of the summarized result. (0, 1, 2, 3)
summaryCount = "3" # This is the number of sentences for the summarized document.
url = "https://naveropenapi.apigw.ntruss.com/text-summary/v1/summarize"

# content 변수에 모든 텍스트 저장
content = text

# document 객체 생성
document = {
 "content": content.strip() if content.strip() else []
}

# option 객체 생성
option = {
    "language": language,
    "model": model,
    "tone": tone,
    "summaryCount": summaryCount
}

# data 객체 생성
data = {
  "document": document,
  "option": {
    "language": language,
    "model": model,
    "tone": tone,
    "summaryCount" : summaryCount
  }
}

# 텍스트의 길이가 10자 이상인지 확인
if len(text) < 10:
  raise ValueError("Text length must be more than 10 characters")

# 텍스트에 공백 문자만 포함되어 있는지 확인
if not text.strip():
  raise ValueError("Text must not contain only whitespace characters")

# 텍스트에 특수문자가 포함되어 있는지 확인
if re.search(r"[~!@#$%^&*()_+|<>?]", text):
  # 특수문자를 제거
  text = re.sub(r"([~!@#$%^&*()_+|<>?])", "", text)
  document["content"] = text.split('.')

response = requests.post(url, data=json.dumps(data), headers=summary_headers)
rescode = response.status_code
if(rescode == 200):
    # json.loads() 함수를 사용하여 JSON 객체로 변환
    content = json.loads(response.text)
    # summary 키가 없는 경우 에러 처리
    if 'summary' not in content:
        print("Error: 'summary' key not found in response")
    else:
        # summary 키의 값을 content 변수에 저장
        content = content['summary']
        # summary가 문자열이 아닌 경우 에러 처리
        if not isinstance(content, str):
            print("Error: 'summary' is not a string")
        else:
            # content 출력
            print(content)
else:
    print("Error : " + response.text)