Skip to main content Link Menu Expand (external link) Document Search Copy Copied

ํ•ด๋‹น ํ”„๋กœ์ ํŠธ๋Š” ํ•™์‚ฌ ์ง„ํ–‰ ์ค‘, ์•ˆ๋“œ๋กœ์ด๋“œ ํ…€ ํ”„๋กœ์ ํŠธ๋กœ ์ง„ํ–‰ํ•œ ๋ง›์ง‘ ์ฐพ๊ธฐ ํ”„๋กœ์ ํŠธ์ž…๋‹ˆ๋‹ค.

  • ์ฐธ์—ฌ์ธ์› : 8์ธ ํŒ€(ํŒ€์› ์—ญํ• )
  • ๊ธฐ๊ฐ„ : 2019๋…„ 04์›” ~ 2019๋…„ 05์›”(2๊ฐœ์›”)
  • ๋‚˜์˜ ์—ญํ• 
    • โœ๏ธ kakao ์ง€๋„์—์„œ ๋ง›์ง‘์„ ํฌ๋กค๋งํ•˜๋Š” ์•Œ๊ณ ๋ฆฌ์ฆ˜ ์ œ์ž‘
  • Github : https://github.com/pnu-005-team1/projectTeam1

๐Ÿ“ƒ โœ๏ธ kakao ์ง€๋„์—์„œ ๋ง›์ง‘ ํฌ๋กค๋Ÿฌ ์ œ์ž‘

์…€๋ ˆ๋‹ˆ์›€ ํฌ๋กค๋Ÿฌ ์ฝ”๋“œ
import org.openqa.selenium.By;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.chrome.ChromeDriver;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.TimeUnit;

public class Daum {

	static String Daum_map_URL = "https://map.kakao.com";
	static String busan = "๋ถ€์‚ฐ";
	static String[] busan_gu = { "๊ฐ•์„œ๊ตฌ", "๊ธˆ์ •๊ตฌ", "๋‚จ๊ตฌ", "๋™๊ตฌ", "๋™๋ž˜๊ตฌ", "์ง„๊ตฌ", "๋ถ๊ตฌ", "์‚ฌ์ƒ๊ตฌ", "์‚ฌํ•˜๊ตฌ", "์„œ๊ตฌ", "์ˆ˜์˜๊ตฌ", "์—ฐ์ œ๊ตฌ", "์˜๋„๊ตฌ",
			"์ค‘๊ตฌ", "ํ•ด๋‹น๋Œ€๊ตฌ", "๊ธฐ์žฅ๊ตฐ" };
	static String[] menu = { "๋ˆ๊ฐ€์Šค" };
	static int count = 0;

	static int count2 = 0;
	static ArrayList<String> all = new ArrayList<>();

	static ArrayList<String> restaurant = new ArrayList<>();
	// ์Œ์‹์  ๋ฉ”๋‰ด์™€ ๊ฐ€๊ฒฉ๋“ค์„ (๋ฉ”๋‰ด, ๊ฐ€๊ฒฉ, ......., "\\", ๋ฉ”๋‰ด, ๊ฐ€๊ฒฉ, .......,"\",...)์ด๋Ÿฐ ์‹์œผ๋กœ ๊ฒฐ์ •
	static ArrayList<String> menu2 = new ArrayList<>();

	// ๊ฐ๊ฐ์˜ ์Œ์‹์  ์ƒ์„ธ๋ณด๊ธฐ ํด๋ฆญ์‹œ ๋“ค์–ด๊ฐ€๋Š” page์˜ url List
	static ArrayList<String> places = new ArrayList<>();

	private static void sleep() {
		try {
			Thread.sleep(1000);
		} catch (InterruptedException e) {
			System.out.println(e.getMessage());
		}
	}

	private static void getMenu(WebDriver driver2) {
		ChromeDriver driver = new ChromeDriver();
		driver.manage().timeouts().pageLoadTimeout(10, TimeUnit.SECONDS);
		driver.manage().timeouts().setScriptTimeout(20, TimeUnit.SECONDS);
		driver.get(places.get(count2++));
		sleep();
		for (int i = 0; i < 1; i++) {

			// ๋ฉ”๋‰ด ํŽผ์น˜๊ธฐ.
			for (int j = 0; j < 5; j++) {
				try {

					driver.findElement(By.xpath("//*[@id=\"mArticle\"]/div[2]/a/span[1]")).click();
					System.out.println("๋”๋ณด๊ธฐ ํด๋ฆญ");
					sleep();

					// #mArticle > div.cont_menu > a ๋ฉ”๋‰ด ํŽผ์น˜๊ธฐ.
					// //*[@id="mArticle"]/div[2]/a/span[1]
					// //*[@id="mArticle"]/div[2]/a/span[1]
					// #mArticle > div.cont_menu > a ๋ฉ”๋‰ด์ ‘๊ธฐ
					// //*[@id="mArticle"]/div[2]/a/span[2]

					// 1๋ฒˆ์งธ ๋ฉ”๋‰ด #mArticle > div.cont_menu > ul > li.nophoto_type.menu_fst > div > span
					// 2๋ฒˆ์งธ ๋ฉ”๋‰ด #mArticle > div.cont_menu > ul > li:nth-child(2) > div > span
					// 1๋ฒˆ์งธ ๊ฐ€๊ฒฉ #mArticle > div.cont_menu > ul > li.nophoto_type.menu_fst > div >
					// em.price_menu
					// 2๋ฒˆ์งธ ๊ฐ€๊ฒฉ #mArticle > div.cont_menu > ul > li:nth-child(2) > div > em.price_menu
					// #mArticle > div.cont_menu > ul > li.nophoto_type.opened_last > div > span
					// #mArticle > div.cont_menu > ul > li:nth-child(21) > div > span
					// #mArticle > div.cont_menu > ul > li.nophoto_type.opened_last > div > span
					// #mArticle > div.cont_menu > ul > li.nophoto_type.opened_last > div >
					// em.price_menu
				} catch (Exception e) {
					System.out.println("๋ฉ”๋‰ด ๋”๋ณด๊ธฐ X");
				}
			}
			try {
				menu2.add(driver.findElement(By.cssSelector("#mArticle > div.cont_essential > div:nth-child(1) > div.place_details > div > h2")).getText());
				String a = driver
						.findElement(By
								.cssSelector("#mArticle > div.cont_menu > ul > li.nophoto_type.menu_fst > div > span"))
						.getText();
				String b = driver
						.findElement(By.cssSelector(
								"#mArticle > div.cont_menu > ul > li.nophoto_type.menu_fst > div > em.price_menu"))
						.getText();
				menu2.add(a);
				menu2.add(b);
				for (int j = 2; j < 30; j++) {
					a = driver.findElement(
							By.cssSelector("#mArticle > div.cont_menu > ul > li:nth-child(" + j + ") > div > span"))
							.getText();
					b = driver
							.findElement(By.cssSelector(
									"#mArticle > div.cont_menu > ul > li:nth-child(" + j + ") > div > em.price_menu"))
							.getText();
					menu2.add(a);
					menu2.add(b);
					System.out.println("๋ฉ”๋‰ด์ถ”๊ฐ€");
					
				}
			} catch (Exception e) {
				System.out.println("๋ฉ”๋‰ด X");
			}

			System.out.println(menu2);
			menu2.add("\\");
			driver.quit();
		}
	}

	// clickํ›„ ๊ทธ ๋ฆฌ์ŠคํŠธ์— ์žˆ๋Š” 15๊ฐœ์˜ attribute๋ฅผ ๊ฐ€์ ธ์˜ค๋Š” ํ•จ์ˆ˜
	public static void getDadd(WebDriver driver) {

		driver.findElement(By.cssSelector("#info\\2e search\\2e place\\2e more")).click();
		// page ๋„˜์–ด๊ฐ€๋Š” term
		sleep();

		driver.findElement(By.cssSelector("#info\\2e search\\2e page\\2e no2")).click();
		sleep();
		// 1,2,3,4,5page 16๊ฐœ line ์ฝ๋Š” ํ•จ์ˆ˜.
		while (true) {
			try {
				for (int i = 1; i < 6; i++) {
					try {
						// page ๋„˜์–ด๊ฐ€๋Š” ์‹œ๊ฐ„
						driver.findElement(By.cssSelector("#info\\2e search\\2e page\\2e no" + i)).click();
						sleep();
						for (int j = 1; j < 16; j++) {
							String a = driver
									.findElement(By.cssSelector("#info\\2e search\\2e place\\2e list > li:nth-child("
											+ j + ") > div.head_item.clickArea > strong > a.link_name"))
									.getText();
							String b = driver
									.findElement(By.cssSelector("#info\\2e search\\2e place\\2e list > li:nth-child("
											+ j + ") > div.info_item > div.addr > p:nth-child(1)"))
									.getText();
							restaurant.add(a);
							restaurant.add(b);
							count++;

							String c = driver
									.findElement(By.cssSelector("#info\\.search\\.place\\.list > li:nth-child(" + j
											+ ") > div.info_item > div.contact.clickArea > a.moreview"))
									.getAttribute("href");
							places.add(c);

						}
						System.out.println("next page");
					} catch (Exception e) {
						System.out.println("16๊ฐœ X");
						return;
					}
					// 4๋ฒˆ์งธ A
					// #info\2e search\2e place\2e list > li:nth-child(1) > div.head_item.clickArea
					// > strong > a.link_name
					// #info\2e search\2e place\2e list > li:nth-child(15) > div.head_item.clickArea
					// > strong > a.link_name

					// ๋ณ„์  1๋ฒˆ์งธ A
					// #info\.search\.place\.list > li:nth-child(1) > div.rating.clickArea >
					// span.score > em
					// #info\.search\.place\.list > li:nth-child(2) > div.rating.clickArea >
					// span.score > em

					// ์ƒ์„ธ๋ณด๊ธฐ 1๋ฒˆ์งธ A
					// #info\.search\.place\.list > li:nth-child(1) > div.info_item >
					// div.contact.clickArea > a.moreview
				}
				driver.findElement(By.cssSelector("#info\\2e search\\2e page\\2e next")).click();
				sleep();
			} catch (Exception e) {
				System.out.println(e.getMessage());
			}
		}
		// 1๋ฒˆ์งธ A ์ฃผ์†Œ #info\2e search\2e place\2e list > li:nth-child(1) > div.info_item >
		// div.addr > p:nth-child(1)
		// 1๋ฒˆ์งธ B ์ฃผ์†Œ #info\2e search\2e place\2e list > li:nth-child(2) > div.info_item >
		// div.addr > p:nth-child(1)
		// #info\2e search\2e page\2e no3
		// #info\2e search\2e page\2e no4
		// 5๋ฒˆ์งธ A
		// #info\2e search\2e place\2e list > li:nth-child(1) > div.head_item.clickArea
		// > strong > a.link_name
		// 16page
		// #info\2e search\2e page\2e no1

	}

	public static void crawler() throws IOException {
		// ์…€๋ ˆ๋‹ˆ์›€ ์…‹ํŒ…
		if (System.getProperty("os.name").toLowerCase().indexOf("window") > -1) {
			System.setProperty("webdriver.chrome.driver",
					"C:\\Users\\ghkdq\\Desktop\\chromedriver_win32 (2)\\chromedriver.exe");
		}

		ChromeDriver driver = new ChromeDriver();
		driver.manage().timeouts().pageLoadTimeout(10, TimeUnit.SECONDS);
		driver.manage().timeouts().setScriptTimeout(20, TimeUnit.SECONDS);

		driver.get(Daum_map_URL);
		WebElement webElement = null;
		driver.findElement(By.xpath("/html/body/div[10]/div/div[2]/a")).click();
		driver.findElement(By.xpath("/html/body/div[10]/div/div/div/span")).click();

		// driver.switchTo().frame(driver.findElement(By.className("box_searchbar")));

		webElement = driver.findElement(By.id("search.keyword.query"));
		String city = "๋ถ€์‚ฐ";
		for (String i : busan_gu) {
			all.add(city + " " + i + " " + menu[0]);
		}
		webElement.sendKeys(all.get(0));
		// ๋ถ€์‚ฐ ๊ธˆ์ •๊ตฌ ๊ตฌ์„œ๋™ click
		driver.findElement(By.xpath("//*[@id=\"search.keyword.submit\"]")).click();
		// clickํ›„ term
		try {
			Thread.sleep(2000);
		} catch (InterruptedException e) {
			System.out.println(e.getMessage());
		}

		getDadd(driver);
		
		
		System.out.println("Finish");
		
		for (int i = 0; i < count; i++) {
			getMenu(driver);
		}
		count = 0;

	}

	public static void main(String[] args) {

		try {
			crawler();
			System.out.println(restaurant);

		} catch (IOException e) {
			e.printStackTrace();
		}
	}
}