/
var
/
www
/
biocabaz_new
/
web
/
Upload File
HOME
<?php // Load WordPress environment ini_set('display_errors', 1); ini_set('display_startup_errors', 1); error_reporting(E_ALL); // Function to force load a URL function force_load_url($url) { $response = file_get_contents($url); echo $url . "<br>"; // Optionally, you can check the response status code or other details here } // Function to iterate through a sitemap and force load each URL function crawl_sitemap($sitemap_url) { // Retrieve the sitemap XML $sitemap = file_get_contents($sitemap_url); // Parse the sitemap XML $xml = new SimpleXMLElement($sitemap); // Iterate through each URL in the sitemap foreach ($xml->url as $url) { $loc = (string) $url->loc; force_load_url($loc); } // Check if the sitemap has any sub-sitemaps if ($xml->sitemap) { foreach ($xml->sitemap as $sub_sitemap) { $sub_sitemap_url = (string) $sub_sitemap->loc; crawl_sitemap($sub_sitemap_url); } } } // After the crawl is complete, copy the 'wpo-cache' folder to 'wpo-cache1' function copy_cache_folder() { $source_folder = 'wp-conteudos/cache/wpo-cache'; $destination_folder = 'wp-conteudos/cache/wpo-cache1'; // Check if the source folder exists if (file_exists($source_folder)) { // Copy the source folder to the destination folder if (copy_directory($source_folder, $destination_folder)) { echo 'wpo-cache folder copied to wpo-cache1 successfully.'; } else { echo 'Failed to copy wpo-cache folder.'; } } else { echo 'wpo-cache folder does not exist.'; } } // Helper function to recursively copy a directory function copy_directory($source, $destination) { if (!is_dir($destination)) { mkdir($destination); } $dir = dir($source); while (false !== ($entry = $dir->read())) { if ($entry === '.' || $entry === '..') { continue; } $source_path = $source . '/' . $entry; $destination_path = $destination . '/' . $entry; if (is_dir($source_path)) { copy_directory($source_path, $destination_path); } else { copy($source_path, $destination_path); } } $dir->close(); return true; } function rrmdir($dir) { if (is_dir($dir)) { $objects = scandir($dir); foreach ($objects as $object) { if ($object != "." && $object != "..") { if (is_dir($dir . '/' . $object)) { rrmdir($dir . '/' . $object); } else { unlink($dir . '/' . $object); } } } rmdir($dir); } } // rrmdir("wp-content/cache/wpo-cache1/"); rrmdir("wp-conteudos/cache/wpo-cache1/"); // URL of the website to scrape $url = 'https://www.biocabaz.pt/web/'; // Get the HTML content of the page $html = file_get_contents($url); // Create a DOMDocument object $dom = new DOMDocument(); libxml_use_internal_errors(true); // Enable error handling // Load the HTML content into the DOMDocument $dom->loadHTML($html); // Find important links $links = $dom->getElementsByTagName('a'); $important_pages = []; foreach ($links as $link) { $href = $link->getAttribute('href'); // Filter out relevant links excluding 'mailto:' if ($href && (strpos($href, 'biocabaz.pt') !== false) && !in_array($href, $important_pages) && strpos($href, 'mailto:') === false) { $sitemap = file_get_contents($href); echo $href; $important_pages[] = $href; } } // URL of the website to scrape $url = 'https://biocabaz.pt/web/'; // Get the HTML content of the page $html = file_get_contents($url); // Create a DOMDocument object $dom = new DOMDocument(); libxml_use_internal_errors(true); // Enable error handling // Load the HTML content into the DOMDocument $dom->loadHTML($html); // Find important links $links = $dom->getElementsByTagName('a'); $important_pages = []; foreach ($links as $link) { $href = $link->getAttribute('href'); // Filter out relevant links excluding 'mailto:' if ($href && (strpos($href, 'biocabaz.pt') !== false) && !in_array($href, $important_pages) && strpos($href, 'mailto:') === false) { $sitemap = file_get_contents($href); echo $href; $important_pages[] = $href; } } copy_cache_folder();