환경
- Ubuntu Server 22.04
- VSCode Insider
- Jupyter Extension
- Remote SSH Extension
- Python 3.10 venv
사전 세팅
# Headless를 사용해야해서 몇 가지 절차가 추가됨
# 크롬 설치
sudo apt update
wget https://dl.google.com/linux/direct/google-chrome-stable_current_amd64.deb
sudo dpkg -i google-chrome-stable_current_amd64.deb
rm google-chrome-stable_current_amd64.deb
# 드라이버 설치
google-chrome --version # 버전 확인 후 아래에 적용
wget https://storage.googleapis.com/chrome-for-testing-public/<버전>/linux64/chromedriver-linux64.zip
unzip chromedriver_linux64.zip
sudo mv chromedriver_linux64/chromedriver /usr/local/bin/
sudo chmod +x /usr/local/bin/chromedriver
rm -r chromedriver_linux64 && rm chromedriver_linux64.zip
# 주피터에서 % 붙여서 쓰거나, 터미널의 경우 사용할 커널(venv, conda 등) 활성화 후 실행
pip install selenium webdriver-manager -q
패키지
# driver loading
from webdriver_manager.chrome import ChromeDriverManager
# browser execution
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
# for interfacing
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
# render-waits
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
# exceptions for loops
from selenium.common.exceptions import NoSuchElementException, TimeoutException
# Headless로 실행하기 위한 옵션
chrm_options = Options()
chrm_options.add_argument("--headless")
chrm_options.add_argument("--no-sandbox")
chrm_options.add_argument("--disable-dev-shm-usage")
실행
- 요소만 가져오기
with webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrm_options) as driver:
driver.get('https://indistreet.com/live?sortOption=startDate%3AASC')
idx = 1
xpath = f'//*[@id="__next"]/div/main/div[2]/div/div[4]/div[1]/div[{idx}]/div/a/div[2]/p[1]'
# wait for specific target render, max 5 sec
# also check 'driver.implicitly_wait(5)' for simpler use cases
target = WebDriverWait(driver, 5).until(EC.presence_of_element_located((By.XPATH, xpath)))
while target: # raise 하지말고 그냥 None으로 리턴 해주지 ...☢
print(target.text)
idx += 1
xpath = f'//*[@id="__next"]/div/main/div[2]/div/div[4]/div[1]/div[{idx}]/div/a/div[2]/p[1]'
try:
target = driver.find_element(By.XPATH, xpath)
except NoSuchElementException:
print("✋End of list🚫")
break
- 입력 인터페이스 사용
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=chrm_options)
driver.get('https://www.naver.com/') # hashcode사이트 안되잖아 ...😫
time.sleep(1)
# 로그인 버튼
xpath = '//*[@id="account"]/div/a'
try:
# 클릭될때까지 wait
button = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, xpath)))
if button.is_displayed() and button.is_enabled():
# 액션 1개면 체인 안 써도 됨🙄
button.click()
time.sleep(1)
# 틀린/없는 계정 전달
id = driver.find_element(By.ID, 'id')
pw = driver.find_element(By.ID, 'pw')
login_btn = driver.find_element(By.ID, 'log.login')
ActionChains(driver)\
.send_keys_to_element(id, 'test')\
.send_keys_to_element(pw, 'test')\
.click(login_btn).perform()
time.sleep(1)
# 에러 출력
error_xpath = '//*[@id="err_common"]/div'
error = driver.find_element(By.XPATH, error_xpath)
print(error.text)
else:
print("not visible or not enabled")
except TimeoutException:
print("Was not clickable within the period")
except NoSuchElementException:
print("Element not found")
finally:
driver.quit()
'E | ngineering' 카테고리의 다른 글
Seaborn 활용 2 (0) | 2024.10.04 |
---|---|
Seaborn 활용 1 (0) | 2024.10.04 |
BS4만으로 Crawl (0) | 2024.10.03 |
자료구조/알고리즘 (0) | 2024.10.01 |
test3 (1) | 2024.01.18 |