-
OPEN_API를 사용하여 데이터 수집하기Python 2020. 6. 15. 18:12
WEB-API(Application Programming Interface)
-
웹서비스의 정보를 웹사이트를 통해 제공 : 크롤링으로 수집(비공식 수집)
-
웹API : AIP 정보로 수집(공식적으로 수집)
공공데이터 포털
-
공공데이터 포털 가입 https://www.data.go.kr
-
앱 ID 등록(개발자 계정)
-
각 서비스 별로 신청
출입국관광통계서비스
-
사이트에서 검색 후 활용 신청
-
출입국관광통계조회
관광자원통계서비스
-
전국 유료 관광지 입장객 정보 수집
import requests import os import sys import datetime import time
#pip install matplotlib import matplotlib.pyplot as plt import matplotlib from matplotlib import font_manager, rc
endpoint
-
http://openapi.tour.go.kr/openapi/service 는 공통 endpoint
-
문서에 있는 오퍼레이션 명을 확인하여 원하는 주소 확인
# 중국 : 112 / 일본 : 130 / 미국 : 275 url = 'http://openapi.tour.go.kr/openapi/service/EdrcntTourismStatsService/getEdrcntTourismStatsList' serviceKey = 'lg%2BwPhMeEjyYL6esKsQyaf8EGOFrJ0qfss5guvYbipJVC1e9KLfW7M92%2BF7EtBgwZf%2FcVV7PR57T%2FQtMBarUTA%3D%3D' serviceKey = requests.utils.unquote(serviceKey) # requests의 디코딩 하는 기능 params = { '_type' : 'json', # json을 지원하는 경우 _type을 붙어야됨. 'serviceKey' : serviceKey, 'YM' : '202004', 'NAT_CD' : '112', 'ED_CD' : 'E' } response = requests.get(url, params=params) response.status_code
200
# 사이트에서 주는 serviceKey와 response.url의 Key값이 다르다 # 공공데이터 포털의 키는 1시간 이후에 정상작동됨 # response.url response.text
'{"response":{"header":{"resultCode":"0000","resultMsg":"OK"},"body":{"items":{"item":{"ed":"방한외래관광객","edCd":"E","natCd":112,"natKorNm":"중 국","num":3935,"rnum":1,"ym":202004}},"numOfRows":10,"pageNo":1,"totalCount":1}}}'
def get_request_url(ym, nat_cd, ed_cd='E'): try: url = 'http://openapi.tour.go.kr/openapi/service/EdrcntTourismStatsService/getEdrcntTourismStatsList' serviceKey = '__ user service key__' serviceKey = requests.utils.unquote(serviceKey) # requests의 디코딩 하는 기능 params = { '_type' : 'json', # json을 지원하는 경우 _type을 붙어야됨. 'serviceKey' : serviceKey, 'YM' : ym, 'NAT_CD' : nat_cd, 'ED_CD' : ed_cd } response = requests.get(url, params=params) if response.status_code == 200: rs_str = '{} Url Request Success' print(rs_str.format(datetime.datetime.now())) return response.json() else: print('status_code : '+response.status_code) return None except Exception as e: print(e) return None
ym = '202004' nat_cd = '112' response = get_request_url(ym, nat_cd, ed_cd='E') response
2020-06-11 14:36:35.555986 Url Request Success {'response': {'header': {'resultCode': '0000', 'resultMsg': 'OK'}, 'body': {'items': {'item': {'ed': '방한외래관광객', 'edCd': 'E', 'natCd': 112, 'natKorNm': '중 국', 'num': 3935, 'rnum': 1, 'ym': 202004}}, 'numOfRows': 10, 'pageNo': 1, 'totalCount': 1}}}
#def main(): jsonResult = [] # 중국 : 112 / 일본 : 130 / 미국 : 275 nStartYear = 2005 nEndYear = 2020 nat_cd = '275' for year in range(nStartYear, nEndYear): for month in range(1, 13): ym = '{0}{1:0>2}'.format(str(year), str(month)) #print(ym) response = get_request_url(ym, nat_cd, ed_cd='E') check = response['response']['header']['resultMsg'] if(check == 'OK'): natKorNm = response['response']['body']['items']['item']['natKorNm'] num = response['response']['body']['items']['item']['num'] item = { 'nat-name': natKorNm, 'nat_cd' : nat_cd, 'yyyymm' : ym, 'visit_cnt' : num } jsonResult.append(item)
2020-06-11 15:10:28.270967 Url Request Success 2020-06-11 15:10:28.317832 Url Request Success 2020-06-11 15:10:28.364696 Url Request Success 2020-06-11 15:10:28.395938 Url Request Success 2020-06-11 15:10:28.442804 Url Request Success 2020-06-11 15:10:28.474056 Url Request Success 2020-06-11 15:10:28.531002 Url Request Success 2020-06-11 15:10:28.557449 Url Request Success 2020-06-11 15:10:28.588729 Url Request Success 2020-06-11 15:10:28.635578 Url Request Success 2020-06-11 15:10:28.682422 Url Request Success 2020-06-11 15:10:28.713667 Url Request Success 2020-06-11 15:10:28.768715 Url Request Success 2020-06-11 15:10:28.790848 Url Request Success 2020-06-11 15:10:28.837733 Url Request Success 2020-06-11 15:10:28.884597 Url Request Success 2020-06-11 15:10:28.915840 Url Request Success 2020-06-11 15:10:28.967746 Url Request Success 2020-06-11 15:10:29.011739 Url Request Success 2020-06-11 15:10:29.055673 Url Request Success 2020-06-11 15:10:29.088710 Url Request Success 2020-06-11 15:10:29.123493 Url Request Success 2020-06-11 15:10:29.154731 Url Request Success 2020-06-11 15:10:29.211546 Url Request Success 2020-06-11 15:10:29.240691 Url Request Success 2020-06-11 15:10:29.271954 Url Request Success 2020-06-11 15:10:29.303210 Url Request Success 2020-06-11 15:10:29.350063 Url Request Success 2020-06-11 15:10:29.396924 Url Request Success 2020-06-11 15:10:29.440333 Url Request Success 2020-06-11 15:10:29.478008 Url Request Success 2020-06-11 15:10:29.506847 Url Request Success 2020-06-11 15:10:29.538078 Url Request Success 2020-06-11 15:10:29.584942 Url Request Success 2020-06-11 15:10:29.631806 Url Request Success 2020-06-11 15:10:29.675396 Url Request Success 2020-06-11 15:10:29.707610 Url Request Success 2020-06-11 15:10:29.738872 Url Request Success 2020-06-11 15:10:29.785717 Url Request Success 2020-06-11 15:10:29.816985 Url Request Success 2020-06-11 15:10:29.863844 Url Request Success 2020-06-11 15:10:29.912767 Url Request Success 2020-06-11 15:10:29.941107 Url Request Success 2020-06-11 15:10:29.972384 Url Request Success 2020-06-11 15:10:30.003625 Url Request Success 2020-06-11 15:10:30.050479 Url Request Success 2020-06-11 15:10:30.097342 Url Request Success 2020-06-11 15:10:30.124261 Url Request Success 2020-06-11 15:10:30.172563 Url Request Success 2020-06-11 15:10:30.206857 Url Request Success 2020-06-11 15:10:30.238117 Url Request Success 2020-06-11 15:10:30.269360 Url Request Success 2020-06-11 15:10:30.300605 Url Request Success 2020-06-11 15:10:30.342276 Url Request Success 2020-06-11 15:10:30.376429 Url Request Success 2020-06-11 15:10:30.407112 Url Request Success 2020-06-11 15:10:30.453951 Url Request Success 2020-06-11 15:10:30.485213 Url Request Success 2020-06-11 15:10:30.532075 Url Request Success 2020-06-11 15:10:30.578907 Url Request Success 2020-06-11 15:10:30.608051 Url Request Success 2020-06-11 15:10:30.654939 Url Request Success 2020-06-11 15:10:30.686178 Url Request Success 2020-06-11 15:10:30.733042 Url Request Success 2020-06-11 15:10:30.783953 Url Request Success 2020-06-11 15:10:30.817861 Url Request Success 2020-06-11 15:10:30.841845 Url Request Success 2020-06-11 15:10:30.873088 Url Request Success 2020-06-11 15:10:30.914338 Url Request Success 2020-06-11 15:10:30.929965 Url Request Success 2020-06-11 15:10:30.961205 Url Request Success 2020-06-11 15:10:31.010780 Url Request Success 2020-06-11 15:10:31.044739 Url Request Success 2020-06-11 15:10:31.073482 Url Request Success 2020-06-11 15:10:31.114742 Url Request Success 2020-06-11 15:10:31.192876 Url Request Success 2020-06-11 15:10:31.240657 Url Request Success 2020-06-11 15:10:31.275604 Url Request Success 2020-06-11 15:10:31.299065 Url Request Success 2020-06-11 15:10:31.331399 Url Request Success 2020-06-11 15:10:31.378237 Url Request Success 2020-06-11 15:10:31.416029 Url Request Success 2020-06-11 15:10:31.459998 Url Request Success 2020-06-11 15:10:31.491130 Url Request Success 2020-06-11 15:10:31.516279 Url Request Success 2020-06-11 15:10:31.563163 Url Request Success 2020-06-11 15:10:31.594428 Url Request Success 2020-06-11 15:10:31.632216 Url Request Success 2020-06-11 15:10:31.688199 Url Request Success 2020-06-11 15:10:31.725293 Url Request Success 2020-06-11 15:10:31.756975 Url Request Success 2020-06-11 15:10:31.788240 Url Request Success 2020-06-11 15:10:31.822478 Url Request Success 2020-06-11 15:10:31.869372 Url Request Success 2020-06-11 15:10:31.919184 Url Request Success 2020-06-11 15:10:31.940641 Url Request Success 2020-06-11 15:10:31.987526 Url Request Success 2020-06-11 15:10:32.023304 Url Request Success 2020-06-11 15:10:32.070178 Url Request Success 2020-06-11 15:10:32.101420 Url Request Success 2020-06-11 15:10:32.160681 Url Request Success 2020-06-11 15:10:32.190745 Url Request Success 2020-06-11 15:10:32.223533 Url Request Success 2020-06-11 15:10:32.254798 Url Request Success 2020-06-11 15:10:32.286048 Url Request Success 2020-06-11 15:10:32.340371 Url Request Success 2020-06-11 15:10:32.369427 Url Request Success 2020-06-11 15:10:32.405738 Url Request Success 2020-06-11 15:10:32.439063 Url Request Success 2020-06-11 15:10:32.485927 Url Request Success 2020-06-11 15:10:32.523772 Url Request Success 2020-06-11 15:10:32.574619 Url Request Success 2020-06-11 15:10:32.610587 Url Request Success 2020-06-11 15:10:32.641223 Url Request Success 2020-06-11 15:10:32.672504 Url Request Success 2020-06-11 15:10:32.719334 Url Request Success 2020-06-11 15:10:32.755121 Url Request Success 2020-06-11 15:10:32.801744 Url Request Success 2020-06-11 15:10:32.840419 Url Request Success 2020-06-11 15:10:32.858197 Url Request Success 2020-06-11 15:10:32.905084 Url Request Success 2020-06-11 15:10:32.940391 Url Request Success 2020-06-11 15:10:32.987250 Url Request Success 2020-06-11 15:10:33.034420 Url Request Success 2020-06-11 15:10:33.057391 Url Request Success 2020-06-11 15:10:33.104277 Url Request Success 2020-06-11 15:10:33.141080 Url Request Success 2020-06-11 15:10:33.172328 Url Request Success 2020-06-11 15:10:33.225715 Url Request Success 2020-06-11 15:10:33.274503 Url Request Success 2020-06-11 15:10:33.306312 Url Request Success 2020-06-11 15:10:33.325940 Url Request Success 2020-06-11 15:10:33.357186 Url Request Success 2020-06-11 15:10:33.388429 Url Request Success 2020-06-11 15:10:33.419671 Url Request Success 2020-06-11 15:10:33.441801 Url Request Success 2020-06-11 15:10:33.473066 Url Request Success 2020-06-11 15:10:33.514950 Url Request Success 2020-06-11 15:10:33.541086 Url Request Success 2020-06-11 15:10:33.572368 Url Request Success 2020-06-11 15:10:33.619215 Url Request Success 2020-06-11 15:10:33.657500 Url Request Success 2020-06-11 15:10:33.704343 Url Request Success 2020-06-11 15:10:33.737771 Url Request Success 2020-06-11 15:10:33.757747 Url Request Success 2020-06-11 15:10:33.788991 Url Request Success 2020-06-11 15:10:33.820234 Url Request Success 2020-06-11 15:10:33.842384 Url Request Success 2020-06-11 15:10:33.873664 Url Request Success 2020-06-11 15:10:33.922123 Url Request Success 2020-06-11 15:10:33.958850 Url Request Success 2020-06-11 15:10:33.990649 Url Request Success 2020-06-11 15:10:34.027411 Url Request Success 2020-06-11 15:10:34.089904 Url Request Success 2020-06-11 15:10:34.127466 Url Request Success 2020-06-11 15:10:34.159075 Url Request Success 2020-06-11 15:10:34.186990 Url Request Success 2020-06-11 15:10:34.216909 Url Request Success 2020-06-11 15:10:34.247827 Url Request Success 2020-06-11 15:10:34.275796 Url Request Success 2020-06-11 15:10:34.310623 Url Request Success 2020-06-11 15:10:34.353169 Url Request Success 2020-06-11 15:10:34.390836 Url Request Success 2020-06-11 15:10:34.425894 Url Request Success 2020-06-11 15:10:34.462592 Url Request Success 2020-06-11 15:10:34.499554 Url Request Success 2020-06-11 15:10:34.536676 Url Request Success 2020-06-11 15:10:34.576791 Url Request Success 2020-06-11 15:10:34.619813 Url Request Success 2020-06-11 15:10:34.657456 Url Request Success 2020-06-11 15:10:37.711709 Url Request Success 2020-06-11 15:10:37.758574 Url Request Success 2020-06-11 15:10:37.796381 Url Request Success 2020-06-11 15:10:37.827624 Url Request Success 2020-06-11 15:10:37.881035 Url Request Success 2020-06-11 15:10:37.921455 Url Request Success 2020-06-11 15:10:37.960350 Url Request Success 2020-06-11 15:10:37.989361 Url Request Success 2020-06-11 15:10:38.004982 Url Request Success 2020-06-11 15:10:38.051845 Url Request Success
len(jsonResult)
180
json 파일로 저장
import json
fn = '해외방문객정보.json' with open(fn, 'w', encoding='utf-8') as outfile: retJson = json.dumps(jsonResult, indent=4, sort_keys=True, ensure_ascii=False) outfile.write(retJson)
그래프로 시각화 하기
cnVisit = [] visitYM = [] index = [] # 변수값 i = 0 for item in jsonResult: index.append(i) cnVisit.append(item['visit_cnt']) visitYM.append(item['yyyymm']) i += 1
cnVisit[0], visitYM[0], index[0]
(34769, '200501', 0)
# 한글폰트설정 font_loca = 'c:/windows/fonts/malgun.ttf' font_name = font_manager.FontProperties(fname=font_loca).get_name() matplotlib.rc('font', family=font_name) plt.xticks(index, visitYM) # x 변수값 plt.plot(index, cnVisit) # y 변수값 plt.xlabel('방문월') # x 변수명 plt.ylabel('방문객수') # y 변수명 plt.grid(True) # 선 생기게 만듬 plt.show()
'Python' 카테고리의 다른 글
Pandas 기초 (0) 2020.07.16 Numpy 기본 (0) 2020.07.16 Naver_API_JSON (0) 2020.06.10 Naver_API_XML (0) 2020.06.10 Open API_Naver (0) 2020.06.10 -