2. 사용법 : php webdriver를 활용한 크롤링
- 프로그래밍/크롤링
- 2022. 5. 9.
** 참고하면 좋은 php-webdriver API
https://php-webdriver.github.io/php-webdriver/latest/Facebook/WebDriver.html
<?php
defined('BASEPATH') OR exit('No direct script access allowed');
//namespace Facebook\WebDriver;
use Facebook\WebDriver\Remote\DesiredCapabilities;
use Facebook\WebDriver\Remote\RemoteWebDriver;
use Facebook\WebDriver\Chrome\ChromeOptions;
use Facebook\WebDriver\WebDriverBy;
use Facebook\WebDriver\WebDriverExpectedCondition;
use Facebook\WebDriver\Cookie;
class Welcome extends CI_Controller {
/**
* Index Page for this controller.
*
* Maps to the following URL
* http://example.com/index.php/welcome
* - or -
* http://example.com/index.php/welcome/index
* - or -
* Since this controller is set as the default controller in
* config/routes.php, it's displayed at http://example.com/
*
* So any other public methods not prefixed with an underscore will
* map to /index.php/welcome/<method_name>
* @see https://codeigniter.com/user_guide/general/urls.html
*/
public function index()
{
$host = 'http://localhost:4444/';
$capabilities = DesiredCapabilities::chrome();
$options = new ChromeOptions();
$options->addArguments(['--headless','--no-sandbox']);
$capabilities->setCapability(ChromeOptions::CAPABILITY, $options);
$driver = RemoteWebDriver::create($host, $capabilities);
//사이트 url 입력
$driver->get('https://en.wikipedia.org/wiki/Selenium_(software)');
//검색인풋에 php 검색하기
$driver->findElement(WebDriverBy::id('searchInput')) // 검색인풋창 id
->sendKeys('PHP') // 검색 인풋창에 입력
->submit(); // submit the whole form
// 페이지 제목 요소에 'PHP'가 표시될 때까지 기다립니다. 왜?? : 페이지가 로드될때까지 기다림
$driver->wait()->until(
WebDriverExpectedCondition::elementTextContains(WebDriverBy::id('firstHeading'), 'PHP')
);
// 현재 페이지의 타이틀
echo "The title is '" . $driver->getTitle() . "'\n";
// 현재 페이지의 url
echo "The current URL is '" . $driver->getCurrentURL() . "'\n";
//css셀럭터로 버튼가져오기
$historyButton = $driver->findElement(
WebDriverBy::cssSelector('#ca-history a')
);
// 버튼의 텍스트 가져오기
echo "About to click to button with text: '" . $historyButton->getText() . "'\n";
// 버튼 클릭
$historyButton->click();
// wait until the target page is loaded
$driver->wait()->until(
WebDriverExpectedCondition::titleContains('Revision history')
);
// 현재 페이지의 타이틀
echo "The title is '" . $driver->getTitle() . "'\n";
// 현재 페이지의 url
echo "The current URI is '" . $driver->getCurrentURL() . "'\n";
// delete all cookies
$driver->manage()->deleteAllCookies();
// add new cookie
$cookie = new Cookie('cookie_set_by_selenium', 'cookie_value');
$driver->manage()->addCookie($cookie);
// dump current cookies to output
$cookies = $driver->manage()->getCookies();
print_r($cookies);
// terminate the session and close the browser
$driver->quit();
}
}
'프로그래밍 > 크롤링' 카테고리의 다른 글
4. 윈도우 설정 : php webdriver를 활용한 크롤링 (0) | 2022.05.16 |
---|---|
윈도우 컴포저 windows composer 설치 및 사용 (0) | 2022.05.16 |
3. xPath사용법 : php webdriver를 활용한 크롤링 (0) | 2022.05.10 |
1. 리눅스 설정 : php webdriver를 활용한 크롤링 (0) | 2022.05.09 |
Headless Browser란? (0) | 2022.05.06 |