Scraping In The Name Of!

Displaying The Information

Now that we have our information, we need to display it in some glorious fashion (or inglorious if you prefer). We will use some basic HTML to create our display and style it with some CSS. We will be using some elements of CSS3 so some things may not work in older versions of Internet Explorer, but who uses that anyway? =D

Code:
  1. <?php
  2. function scrapeWebsite ($url, &$weather)
  3. {
  4. // Parse the URL to retrieve the city name and page
  5. $result = preg_match ("/^.*\/weather\/(?P<page>[^\/]+)\/(?P<code>[^\/]+)$/", $url, $matches);
  6.  
  7. // If the result from preg_match is not 1, the pattern was not found so return nothing
  8. if ($result !== 1)
  9. {
  10. return false;
  11. }
  12. else
  13. {
  14. $page = $matches['page'];
  15. $code = $matches['code'];
  16. }
  17.  
  18. // If the code has not yet been added to the container, create it
  19. if (!isset($weather[$code]))
  20. {
  21. $weather[$code] = array ();
  22. }
  23.  
  24. // Initialize a new session and return a cURL handle
  25. $crl = curl_init ();
  26.  
  27. // Set options for cURL
  28. curl_setopt ($crl, CURLOPT_URL, $url); // The URL to fetch
  29. curl_setopt ($crl, CURLOPT_RETURNTRANSFER, 1); // Return the transfer as a string
  30. curl_setopt ($crl, CURLOPT_CONNECTTIMEOUT, 5); // Allow 5 seconds for connecting
  31.  
  32. // Execute the given cURL session
  33. $content = curl_exec ($crl);
  34.  
  35. // Store the content in the container using $page as the key
  36. $weather[$code][$page] = $content;
  37.  
  38. // Close the cURL session
  39. curl_close ($crl);
  40. }
  41.  
  42. $weather = array ();
  43. $display = array ();
  44. $urls = array ('http://www.weather.com/weather/right-now/USCO0357', 'http://www.weather.com/weather/today/USCO0357', 'http://www.weather.com/weather/right-now/USCO0105', 'http://www.weather.com/weather/today/USCO0105');
  45.  
  46. foreach ($urls as $url)
  47. {
  48. scrapeWebsite ($url, $weather);
  49. }
  50.  
  51. require_once ('./simple_html_dom.php');
  52.  
  53. foreach ($weather as $code => $page)
  54. {
  55. // Reset the location
  56. $location = null;
  57.  
  58. foreach ($page as $key => $content)
  59. {
  60. // Create DOM from HTML string
  61. $html = str_get_html ($content);
  62.  
  63. // Create the arrays for location and page if not already created
  64. if ($location === null)
  65. {
  66. $location = $html->find('div.wx-location-title', 0)->find('h1', 0)->plaintext;
  67. $display[$location] = array ();
  68. $display[$location][$key] = array ();
  69. }
  70.  
  71. // Handle each page type we are scraping
  72. switch ($key)
  73. {
  74. case 'right-now' :
  75. $tmp = array ();
  76.  
  77. $main = $html->find('div#wx-main', 0);
  78. $featured = $main->find('div.wx-featured', 0);
  79. $tmp['temp'] = $featured->find('li.wx-temp', 0)->plaintext;
  80. $wind = $main->find('div.wx-cc-blocks-1', 0);
  81.  
  82. // Check if the wind container contains the wind speed element, if not, the value is displayed in a span with class 'wx-temp'
  83. if (count($wind->find('div.wx-cc-wind-speed', 0)) > 0)
  84. {
  85. $tmp['wind'] = $main->find('div.wx-cc-wind-speed', 0)->plaintext;
  86. }
  87. else
  88. {
  89. $tmp['wind'] = $main->find('span.wx-temp', 0)->plaintext;
  90. }
  91. $tmp['feels'] = $featured->find('li.wx-feels', 0)->plaintext;
  92. $tmp['phrase'] = $featured->find('li.wx-phrase', 0)->plaintext;
  93.  
  94. $display[$location][$key] = $tmp;
  95. break;
  96. case 'today' :
  97. $tmp = array ();
  98.  
  99. $container = $html->find('div.wx-12hour', 0);
  100. $day = $container->find('div.wx-daypart', 0);
  101. $night = $container->find('div.wx-daypart', 1);
  102.  
  103. // Determine if the high for the day has already been observed as this affects how it is displayed on weather.com
  104. if (strpos($day->class, 'observed') !== false)
  105. {
  106. $text = $day->find('p.wx-observed', 0)->innertext;
  107. $result = preg_match ('/^[a-zA-Z\' ]+(?P<temp>-?\d+<sup>[^<]+<\/sup>)(.*?)\bwere (?P<phrase>.*)$/', $text, $matches);
  108.  
  109. if ($result !== 1)
  110. {
  111. $tmp['high'] = 'N/A';
  112. $tmp['high-phrase'] = 'Error getting High';
  113. }
  114. else
  115. {
  116. $tmp['high-obs'] = $matches['temp'];
  117. $tmp['high-phrase'] = $matches['phrase'];
  118. }
  119. }
  120. else
  121. {
  122. $high = $day->find('p.wx-temp', 0);
  123. $high->find('span.wx-label', 0)->outertext = '';
  124. $tmp['high'] = $high->innertext;
  125. $tmp['high-phrase'] = $day->find('p.wx-phrase', 0)->innertext;
  126. }
  127.  
  128. $low = $night->find('p.wx-temp', 0);
  129. $low->find('span.wx-label', 0)->outertext = '';
  130. $unit = $low->find('sup', 0);
  131. $unit->innertext = $unit->innertext . 'F';
  132. $tmp['low'] = $low->innertext;
  133. $tmp['low-phrase'] = $night->find('p.wx-phrase', 0)->innertext;
  134.  
  135. $display[$location][$key] = $tmp;
  136. break;
  137. }
  138. }
  139. }
  140. ?>
  141.  
  1. <style type="text/css">
  1. html, body {
  2. margin: 0;
  3. padding: 0;
  4. font-family: sans-serif;
  5. }
  6.  
  7. h1 {
  8. font-size: 160%;
  9. margin: 14px 12px 8px 12px;
  10. }
  11.  
  12. .weather-box {
  13. border: 1px solid #4e5e86;
  14. background: #efefef;
  15. box-shadow: 1px 1px 3px rgba(0, 0, 0, 0.45);
  16. border-radius: 2px;
  17. margin: 0 0 0 24px;
  18. width: 360px;
  19. }
  20.  
  21. .weather-box th, .weather-box td {
  22. margin: 0;
  23. border: 0;
  24. text-align: left;
  25. }
  26.  
  27. .weather-box th {
  28. line-height: 120%;
  29. font-size: 110%;
  30. background-color: #4e5e86;
  31. background: -webkit-gradient(linear, left top, left bottom, from(#4e5e86), to(#6c82ba));
  32. background: -moz-linear-gradient(center top, #4e5e86, #6c82ba) repeat scroll 0 0 transparent;
  33. background: -o-linear-gradient(top, #4e5e86, #6c82ba);
  34. color: #fafafa;
  35. text-shadow: 1px 1px 1px rgba(0, 0, 0, 0.45);
  36. padding: 4px 8px;
  37. }
  38.  
  39. .weather-box tr td {
  40. background: -webkit-gradient(linear, left top, left bottom, from(#dfdfdf), to(#ffffff));
  41. background: -moz-linear-gradient(center top, #dfdfdf, #ffffff) repeat scroll 0 0 transparent;
  42. background: -o-linear-gradient(top, #dfdfdf, #ffffff);
  43. border-top: 1px solid #abbce5;
  44. }
  45.  
  46. .weather-box tr:nth-child(even) td {
  47. background: -webkit-gradient(linear, left top, left bottom, from(#d0d0ff), to(#f0f0ff));
  48. background: -moz-linear-gradient(center top, #d0d0ff, #f0f0ff) repeat scroll 0 0 transparent;
  49. background: -o-linear-gradient(top, #d0d0ff, #f0f0ff);
  50. }
  51.  
  52. .weather-box td.label {
  53. text-align: right;
  54. font-weight: bold;
  55. padding: 4px 8px 4px 24px;
  56. text-shadow: 1px 1px 0px rgba(255, 255, 255, 0.55);
  57. }
  58.  
  59. .weather-box td.value {
  60. font-family: serif;
  61. padding: 4px 12px 4px 4px;
  62. }
  1. </style>
  2. </head>
  3.  
  1. <?php
  2.  
  3. $labels = array (
  4. 'right-now' => 'Current Conditions',
  5. 'today' => 'Forecast',
  6. 'temp' => 'Temperature',
  7. 'wind' => 'Wind Speed',
  8. 'phrase' => 'Conditions',
  9. 'feels' => 'Wind Chill',
  10. 'high' => 'High',
  11. 'high-obs' => 'High (Observed)',
  12. 'high-phrase' => 'Conditions',
  13. 'low' => 'Low',
  14. 'low-phrase' => 'Conditions'
  15. );
  16.  
  17. foreach ($display as $location => $info): ?>
  1.  
  2. <h1><?=$location?></h1>
  3.  
  4. <table class="weather-box" border="0" cellspacing="0" cellpadding="0">
  5.  
  1. <?php foreach ($info as $key => $content): ?>
  1.  
  2. <tr>
  3. <th colspan="2"><?=$labels[$key]?></th>
  4. </tr>
  5. </thead>
  6.  
  7. <tbody>
  1. <?php foreach ($content as $label => $value): ?>
  1. <tr>
  2. <td class="label"><?=$labels[$label]?></td>
  3. <td class="value"><?=$value?></td>
  4. </tr>
  1. <?php endforeach; ?>
  1. </tbody>
  2.  
  1. <?php endforeach; ?>
  1.  
  2. </table>
  3.  
  1. <?php endforeach; ?>
  1.  
  2. </body>
  3. </html>

Let's take a look and see if our hard work paid off.



;