-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathscraping-a-text-list.php
84 lines (59 loc) · 1.49 KB
/
scraping-a-text-list.php
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
<?php
require 'vendor/autoload.php';
use GuzzleHttp\Client;
use Symfony\Component\DomCrawler\Crawler;
$products = [];
$discounted_products = [];
$best_price_products = [];
$client = new Client();
$response = $client->request('GET', 'http://testing-ground.scraping.pro/textlist');
$body = $response->getBody()->getContents();
$crawler = new Crawler($body);
$filter = $crawler->filter('#case_textlist')->html();
$expStr=explode("------------------------",$filter);
$resultString=$expStr[1];
$resultString = trim($resultString);
$resultString = trim($resultString, "<br>");
$resultString = trim($resultString, "\r\n");
$lines = explode("<br>", $resultString);
echo "<pre>";
echo "All the text lines:<br/>";
print_r($lines);
echo "</pre>";
$cities_without_notes = [];
$cities_with_notes = [];
$bold_cities = [];
foreach($lines as $line)
{
if(substr( $line, 0, 1 ) === "(")
{
$cities_with_notes[] = $line;
continue;
}
if(substr( $line, 0, 7 ) === "change:")
{
$cities_with_notes[] = $line;
continue;
}
if(substr( $line, 0, 3 ) === "<b>")
{
$bold_cities[] = $line;
}
$cities_without_notes[] = $line;
$cities_with_notes[] = $line;
}
echo "<br/>";
echo "Cities with population without notes:<br/>";
echo "<pre>";
print_r($cities_without_notes);
echo "</pre>";
echo "<br/>";
echo "Cities with population and notes:<br/>";
echo "<pre>";
print_r($cities_with_notes);
echo "</pre>";
echo "<br/>";
echo "Cities marked with bold:<br/>";
echo "<pre>";
print_r($bold_cities);
echo "</pre>";