E | ngineering
Seaborn 활용 2
덞웖이
2024. 10. 4. 17:59
환경
사전 세팅
패키지
import requests as rq
from bs4 import BeautifulSoup
user_agent = {'User-Agent': 'Mozilla/5.0 bla bla bla bla'}
import time
from collections import Counter
실행
# CRAWL
cnt = Counter()
idx = 0
while idx < 11: # 11 페이지만
res = rq.get(f'https://qna.programmers.co.kr/?page={idx}', user_agent)
time.sleep(1)
parsed = BeautifulSoup(res.text, 'html.parser')
ul = parsed.find('ul', 'question-list')
lists = ul.find_all('li', 'question-list-item')
for li in lists:
lis = li.find_all('li', 'label-tag')
for l in lis:
cnt[l.text.strip()] += 1
idx += 1
print(cnt)
# PLOT
# 폰트
%rm ~/.cache/matplotlib -rf
font_path = './NanumGothicCoding.ttf'
font_prop = fm.FontProperties(fname=font_path)
print(font_prop.get_name())
plt.rcParams['font.family'] = font_prop.get_name()
# 카운터 상위 10 컷
top_ten = cnt.most_common(10)
# plot setup
plt.figure(figsize=(8, 4)) # this goes before sns
sns.barplot(x=[e[0] for e in top_ten], y=[e[1] for e in top_ten])
plt.title('Thread topics')
plt.xlabel('Names')
plt.ylabel('Frequency')
plt.show()