2. 사용법 : php webdriver를 활용한 크롤링

** 참고하면 좋은 php-webdriver API

https://php-webdriver.github.io/php-webdriver/latest/Facebook/WebDriver.html

 

<?php

defined('BASEPATH') OR exit('No direct script access allowed');

//namespace Facebook\WebDriver;

use Facebook\WebDriver\Remote\DesiredCapabilities;
use Facebook\WebDriver\Remote\RemoteWebDriver;
use Facebook\WebDriver\Chrome\ChromeOptions;
use Facebook\WebDriver\WebDriverBy;
use Facebook\WebDriver\WebDriverExpectedCondition;
use Facebook\WebDriver\Cookie;

class Welcome extends CI_Controller {

	/**
	 * Index Page for this controller.
	 *
	 * Maps to the following URL
	 * 		http://example.com/index.php/welcome
	 *	- or -
	 * 		http://example.com/index.php/welcome/index
	 *	- or -
	 * Since this controller is set as the default controller in
	 * config/routes.php, it's displayed at http://example.com/
	 *
	 * So any other public methods not prefixed with an underscore will
	 * map to /index.php/welcome/<method_name>
	 * @see https://codeigniter.com/user_guide/general/urls.html
	 */
	public function index()
	{
		$host = 'http://localhost:4444/';

		$capabilities = DesiredCapabilities::chrome();
		
		$options = new ChromeOptions();
		$options->addArguments(['--headless','--no-sandbox']);
		$capabilities->setCapability(ChromeOptions::CAPABILITY, $options);

		$driver = RemoteWebDriver::create($host, $capabilities);
		
		//사이트 url 입력
		$driver->get('https://en.wikipedia.org/wiki/Selenium_(software)');

		//검색인풋에 php 검색하기
		$driver->findElement(WebDriverBy::id('searchInput')) // 검색인풋창 id 
		    ->sendKeys('PHP') // 검색 인풋창에 입력
		    ->submit(); // submit the whole form
		
		// 페이지 제목 요소에 'PHP'가 표시될 때까지 기다립니다. 왜?? : 페이지가 로드될때까지 기다림
		$driver->wait()->until(
		    WebDriverExpectedCondition::elementTextContains(WebDriverBy::id('firstHeading'), 'PHP')
		);
		
		// 현재 페이지의 타이틀
		echo "The title is '" . $driver->getTitle() . "'\n";
		
		// 현재 페이지의 url
		echo "The current URL is '" . $driver->getCurrentURL() . "'\n";

		//css셀럭터로 버튼가져오기
		$historyButton = $driver->findElement(
		    WebDriverBy::cssSelector('#ca-history a')
		);
		
		// 버튼의 텍스트 가져오기
		echo "About to click to button with text: '" . $historyButton->getText() . "'\n";
		
		// 버튼 클릭
		$historyButton->click();
		
		// wait until the target page is loaded
		$driver->wait()->until(
		    WebDriverExpectedCondition::titleContains('Revision history')
		);
		
		// 현재 페이지의 타이틀
		echo "The title is '" . $driver->getTitle() . "'\n";
		
		// 현재 페이지의 url
		echo "The current URI is '" . $driver->getCurrentURL() . "'\n";
		
		// delete all cookies
		$driver->manage()->deleteAllCookies();
		
		// add new cookie
		$cookie = new Cookie('cookie_set_by_selenium', 'cookie_value');
		$driver->manage()->addCookie($cookie);
		
		// dump current cookies to output
		$cookies = $driver->manage()->getCookies();
		print_r($cookies);
		
		// terminate the session and close the browser
		$driver->quit();
	}
}

 

댓글

Designed by JB FACTORY