diff --git a/composer.json b/composer.json index 523c100..4eb8cb2 100644 --- a/composer.json +++ b/composer.json @@ -1,5 +1,5 @@ { - "description": "Scrapping French 'Drive' Supermarket wWbsite", + "description": "Scrapping French 'Drive' Supermarket Website", "license": "MIT", "name": "chrismile0/scrapper", "autoload": { diff --git a/src/scrapper_auchan.php b/src/scrapper_auchan.php index 360928e..474d366 100644 --- a/src/scrapper_auchan.php +++ b/src/scrapper_auchan.php @@ -45,6 +45,8 @@ use Facebook\WebDriver\WebDriverBy as WebDriverBy; use Facebook\WebDriver\WebDriverExpectedCondition as WebDriverExpectedCondition; use Facebook\WebDriver\WebDriverKeys as WebDriverKeys; +use Facebook\WebDriver\Firefox\FirefoxDriver as FirefoxDriver; +use Facebook\WebDriver\Firefox\FirefoxProfile as FirefoxProfile; require __DIR__ . '/../../../autoload.php'; // EXPORT //require __DIR__ . '/../vendor/autoload.php'; // DEV @@ -71,23 +73,40 @@ function change_quantity_a(string $libelle) : string { * [BRIEF] generate an instance of a firefox driver with 'geckodriver' server * (localhost:4444) * @param int $p port + * @param bool $web_server true of false * @example generate_driver_a() * @author chriSmile0 * @return / */ -function generate_driver_a(int $p) { +function generate_driver_a(int $p, bool $web_server) { //-----------------Remote with geckodriver in terminal--------------------// - $host = 'http://localhost:'.$p.'/'; + if(!$web_server) { + $host = 'http://localhost:'.$p.'/'; + $capabilities = DesiredCapabilities::firefox(); + $firefoxOptions = new FirefoxOptions; + $firefoxOptions->addArguments(['-headless']); + $capabilities->setCapability(FirefoxOptions::CAPABILITY, $firefoxOptions); + try { + return RemoteWebDriver::create($host,$capabilities); + } + catch (Exception $e) { + echo "ERRRRRR_REMOTE : ".$e->getMessage()."\n"; + return NULL; + } + } + //------------FirefoxDriver, geckodriver directly on this process--------// + //shell_exec("kill -s kill `ps -e | grep -e geckodriver | grep -Eo '[0-9]{1,10}' | head -n 1`");sleep(1); + $firefoxOptions = new FirefoxOptions(); + $firefoxOptions->setProfile(new FirefoxProfile()); $capabilities = DesiredCapabilities::firefox(); - $firefoxOptions = new FirefoxOptions; $firefoxOptions->addArguments(['--headless']); $capabilities->setCapability(FirefoxOptions::CAPABILITY, $firefoxOptions); try { - return RemoteWebDriver::create($host,$capabilities); + return FirefoxDriver::start($capabilities); } catch (Exception $e) { - echo "ERRRRRR_REMOTE : ".$e->getMessage()."\n"; + echo "ERRRRRR : ".$e->getMessage()."\n"; return NULL; } } @@ -337,13 +356,14 @@ function extract_source_auchan(string $url,$driver, string $town, string $target * @param string $target_product the target product * @param string $town the research area * @param int $p port + * @param bool $web_server true of false * @example content_scrap_auchan((@see URL1),"lardons","Paris") * @author chriSmile0 * @return array array of all product with specific information that we needed */ -function content_scrap_auchan(string $target_product, string $town, int $p) : array { +function content_scrap_auchan(string $target_product, string $town, int $p, bool $web_server) : array { $url = "https://www.auchan.fr/"; - $driver = generate_driver_a($p); + $driver = generate_driver_a($p,$web_server); if($driver === NULL) return array(); @@ -400,15 +420,15 @@ function content_scrap_auchan(string $target_product, string $town, int $p) : ar * test or if the scrapping failed */ function main_a($argc, $argv) : bool { - if($argc == 5) { - if(empty(content_scrap_auchan($argv[1],$argv[2],$argv[3]))) { + if($argc == 6) { + if(empty(content_scrap_auchan($argv[1],$argv[2],$argv[3],strtolower($argv[4])==="true"))) { echo "NO CORRESPONDENCE FOUND \n"; return 0; } return 1; } else { - echo "ERROR : format : ". $argv[0] . "[research_product_type] [town] [port] --with-openssl\n"; + echo "ERROR : format : ". $argv[0] . "[research_product_type] [town] [port] [?webserver] --with-openssl\n"; return 0; } echo "EXECUTION FINISH WITH SUCCESS \n"; diff --git a/src/scrapper_carrefour.php b/src/scrapper_carrefour.php index 3fa7fd7..607f705 100644 --- a/src/scrapper_carrefour.php +++ b/src/scrapper_carrefour.php @@ -49,6 +49,8 @@ use Facebook\WebDriver\Remote\RemoteWebDriver; use Facebook\WebDriver\WebDriverBy as WebDriverBy; use Facebook\WebDriver\WebDriverExpectedCondition as WebDriverExpectedCondition; +use Facebook\WebDriver\Firefox\FirefoxDriver as FirefoxDriver; +use Facebook\WebDriver\Firefox\FirefoxProfile as FirefoxProfile; require __DIR__ . '/../../../autoload.php'; // EXPORT //require __DIR__ . '/../vendor/autoload.php'; // DEV @@ -107,23 +109,40 @@ function change_quantity_c($libelle) { * [BRIEF] generate an instance of a firefox driver with 'geckodriver' server * (localhost:4444) * @param int $p port + * @param bool $web_server true of false * @example generate_driver_c() * @author chriSmile0 * @return / */ -function generate_driver_c(int $p) { +function generate_driver_c(int $p, bool $web_server) { //-----------------Remote with geckodriver in terminal--------------------// - $host = 'http://localhost:'.$p.'/'; + if(!$web_server) { + $host = 'http://localhost:'.$p.'/'; + $capabilities = DesiredCapabilities::firefox(); + $firefoxOptions = new FirefoxOptions; + $firefoxOptions->addArguments(['-headless']); + $capabilities->setCapability(FirefoxOptions::CAPABILITY, $firefoxOptions); + try { + return RemoteWebDriver::create($host,$capabilities); + } + catch (Exception $e) { + echo "ERRRRRR_REMOTE : ".$e->getMessage()."\n"; + return NULL; + } + } + //------------FirefoxDriver, geckodriver directly on this process--------// + //shell_exec("kill -s kill `ps -e | grep -e geckodriver | grep -Eo '[0-9]{1,10}' | head -n 1`");sleep(1); + $firefoxOptions = new FirefoxOptions(); + $firefoxOptions->setProfile(new FirefoxProfile()); $capabilities = DesiredCapabilities::firefox(); - $firefoxOptions = new FirefoxOptions; $firefoxOptions->addArguments(['--headless']); $capabilities->setCapability(FirefoxOptions::CAPABILITY, $firefoxOptions); try { - return RemoteWebDriver::create($host,$capabilities); + return FirefoxDriver::start($capabilities); } catch (Exception $e) { - echo "ERRRRRR_REMOTE : ".$e->getMessage()."\n"; + echo "ERRRRRR : ".$e->getMessage()."\n"; return NULL; } } @@ -472,14 +491,15 @@ function extract_info_for_all_products_c(array $tab_json, array $needed_key) : a * @param string $target_product the target product * @param string $city the city to target * @param int $p port + * @param bool $web_server true of false * @example content_scrap_carrefour((@see URL1),"lardons") * @author chriSmile0 * @return array array of all product with specific information that we needed */ -function content_scrap_carrefour(string $target_product, string $city, int $p) : array { +function content_scrap_carrefour(string $target_product, string $city, int $p, bool $web_server) : array { $url = "https://www.carrefour.fr/courses"; $rtn = array(); - $driver = generate_driver_c($p); + $driver = generate_driver_c($p,$web_server); if($driver !== NULL) { $product_needed_key = [ // On ATTRIBUTES @@ -551,14 +571,14 @@ function content_scrap_carrefour(string $target_product, string $city, int $p) : * test or if the scrapping failed */ function main_c($argc, $argv) : bool { - if($argc == 5) { - if(empty(content_scrap_carrefour($argv[1],$argv[2],$argv[3]))) { + if($argc == 6) { + if(empty(content_scrap_carrefour($argv[1],$argv[2],$argv[3],strtolower($argv[4])==="true"))) { echo "NO CORRESPONDENCE FOUND \n"; return 0; } } else { - echo "ERROR : format : ". $argv[0] . " [research_product_type] [city] [port] --with-openssl\n"; + echo "ERROR : format : ". $argv[0] . " [research_product_type] [city] [port] [?webserver] --with-openssl\n"; return 0; } echo "EXECUTION FINISH WITH SUCCESS \n"; diff --git a/src/scrapper_intermarche.php b/src/scrapper_intermarche.php index d8aaab1..103da4e 100644 --- a/src/scrapper_intermarche.php +++ b/src/scrapper_intermarche.php @@ -56,6 +56,8 @@ use Facebook\WebDriver\WebDriverBy as WebDriverBy; use Facebook\WebDriver\WebDriverExpectedCondition as WebDriverExpectedCondition; use Facebook\WebDriver\WebDriverKeys as WebDriverKeys; +use Facebook\WebDriver\Firefox\FirefoxDriver as FirefoxDriver; +use Facebook\WebDriver\Firefox\FirefoxProfile as FirefoxProfile; require __DIR__ . '/../../../autoload.php'; // EXPORT //require __DIR__ . '/../vendor/autoload.php'; // DEV @@ -113,23 +115,40 @@ function change_quantity_i(string $libelle, $kg_price, $price) : string { * [BRIEF] generate an instance of a firefox driver with 'geckodriver' server * (localhost:4444) * @param int $p port + * @param bool $web_server true of false * @example generate_driver_i() * @author chriSmile0 * @return / */ -function generate_driver_i(int $p) { +function generate_driver_i(int $p, bool $web_server) { //-----------------Remote with geckodriver in terminal--------------------// - $host = 'http://localhost:'.$p.'/'; + if(!$web_server) { + $host = 'http://localhost:'.$p.'/'; + $capabilities = DesiredCapabilities::firefox(); + $firefoxOptions = new FirefoxOptions; + $firefoxOptions->addArguments(['-headless']); + $capabilities->setCapability(FirefoxOptions::CAPABILITY, $firefoxOptions); + try { + return RemoteWebDriver::create($host,$capabilities); + } + catch (Exception $e) { + echo "ERRRRRR_REMOTE : ".$e->getMessage()."\n"; + return NULL; + } + } + //------------FirefoxDriver, geckodriver directly on this process--------// + //shell_exec("kill -s kill `ps -e | grep -e geckodriver | grep -Eo '[0-9]{1,10}' | head -n 1`");sleep(1); + $firefoxOptions = new FirefoxOptions(); + $firefoxOptions->setProfile(new FirefoxProfile()); $capabilities = DesiredCapabilities::firefox(); - $firefoxOptions = new FirefoxOptions; $firefoxOptions->addArguments(['--headless']); $capabilities->setCapability(FirefoxOptions::CAPABILITY, $firefoxOptions); try { - return RemoteWebDriver::create($host,$capabilities); + return FirefoxDriver::start($capabilities); } catch (Exception $e) { - echo "ERRRRRR_REMOTE : ".$e->getMessage()."\n"; + echo "ERRRRRR : ".$e->getMessage()."\n"; return NULL; } } @@ -532,14 +551,15 @@ function extract_needed_information_pro_i(array $json, array $needed_key) : arra * @param string $target_product the target product * @param string $town the town * @param int $p port + * @param bool $web_server true of false * @example content_scrap_intermarche((@see URL1),"lardons") * @author chriSmile0 * @return array array of all product with specific information that we needed */ -function content_scrap_intermarche(string $target_product, string $town, int $p) : array { +function content_scrap_intermarche(string $target_product, string $town, int $p, bool $web_server) : array { $url = "https://www.intermarche.com/"; $rtn = array(); - $driver = generate_driver_i($p); + $driver = generate_driver_i($p,$web_server); if($driver !== NULL) { $product_needed_key = [ // On ATTRIBUTES @@ -606,14 +626,14 @@ function content_scrap_intermarche(string $target_product, string $town, int $p) * test or if the scrapping failed */ function main_i($argc, $argv) : bool { - if($argc == 5) { - if(empty(content_scrap_intermarche($argv[1],$argv[2],$argv[3]))) { + if($argc == 6) { + if(empty(content_scrap_intermarche($argv[1],$argv[2],$argv[3],strtolower($argv[4])==="true"))) { echo "NO CORRESPONDENCE FOUND \n"; return 0; } } else { - echo "ERROR : format : ". $argv[0] . "[research_product_type] [town] [port] --with-openssl\n"; + echo "ERROR : format : ". $argv[0] . "[research_product_type] [town] [port] [?webserver]--with-openssl\n"; return 0; } echo "EXECUTION FINISH WITH SUCCESS \n"; diff --git a/src/scrapper_monoprix.php b/src/scrapper_monoprix.php index e8d4a01..9b1717d 100644 --- a/src/scrapper_monoprix.php +++ b/src/scrapper_monoprix.php @@ -43,6 +43,8 @@ use Facebook\WebDriver\Firefox\FirefoxOptions as FirefoxOptions; use Facebook\WebDriver\Remote\DesiredCapabilities as DesiredCapabilities; use Facebook\WebDriver\Remote\RemoteWebDriver as RemoteWebDriver; +use Facebook\WebDriver\Firefox\FirefoxDriver as FirefoxDriver; +use Facebook\WebDriver\Firefox\FirefoxProfile as FirefoxProfile; require __DIR__ . '/../../../autoload.php'; // EXPORT //require __DIR__ . '/../vendor/autoload.php'; // DEV @@ -100,24 +102,42 @@ function change_quantity_m(string $libelle) : string { /** * [BRIEF] generate an instance of a firefox driver with 'geckodriver' server * (localhost:4444) - * @param int $o port + * @param int $o port + * @param bool $web_server true of false * @example generate_driver_m() * @author chriSmile0 * @return / */ -function generate_driver_m(int $p) { +function generate_driver_m(int $p, bool $web_server) { //-----------------Remote with geckodriver in terminal--------------------// - $host = 'http://localhost:'.$p.'/'; + var_dump($web_server); + if(!$web_server) { + $host = 'http://localhost:'.$p.'/'; + $capabilities = DesiredCapabilities::firefox(); + $firefoxOptions = new FirefoxOptions; + $firefoxOptions->addArguments(['-headless']); + $capabilities->setCapability(FirefoxOptions::CAPABILITY, $firefoxOptions); + try { + return RemoteWebDriver::create($host,$capabilities); + } + catch (Exception $e) { + echo "ERRRRRR_REMOTE : ".$e->getMessage()."\n"; + return NULL; + } + } + //------------FirefoxDriver, geckodriver directly on this process--------// + //shell_exec("kill -s kill `ps -e | grep -e geckodriver | grep -Eo '[0-9]{1,10}' | head -n 1`");sleep(1); + $firefoxOptions = new FirefoxOptions(); + $firefoxOptions->setProfile(new FirefoxProfile()); $capabilities = DesiredCapabilities::firefox(); - $firefoxOptions = new FirefoxOptions; - $firefoxOptions->addArguments(['-headless']); + $firefoxOptions->addArguments(['--headless']); $capabilities->setCapability(FirefoxOptions::CAPABILITY, $firefoxOptions); try { - return RemoteWebDriver::create($host,$capabilities); + return FirefoxDriver::start($capabilities); } catch (Exception $e) { - echo "ERRRRRR_REMOTE : ".$e->getMessage()."\n"; + echo "ERRRRRR : ".$e->getMessage()."\n"; return NULL; } } @@ -128,13 +148,15 @@ function generate_driver_m(int $p) { * @param string $url the url to get in the browser * @param int $js_or_selenium 0 for js 1 for sele * @param int $p port + * @param bool $web_server true or false * @example extract_source_monoprix((@see URL1),1) * @author chriSmile0 * @return string the source code */ -function extract_source_monoprix(string $url, int $js_or_selenium, int $p) : string { +function extract_source_monoprix(string $url, int $js_or_selenium, int $p, + bool $web_server) : string { if($js_or_selenium == 1) { - $driver = generate_driver_m($p); + $driver = generate_driver_m($p,$web_server); if($driver == NULL) return ""; $driver->get($url); @@ -321,15 +343,16 @@ function extract_needed_information_pro_m(array $json, array $needed_key) : arra * * @param string $target_product the target product * @param int $p port + * @param bool $web_server true or false * @example content_scrap_monoprix((@see URL1),"lardons") * @author chriSmile0 * @return array array of all product with specific information that we needed */ -function content_scrap_monoprix(string $target_product, int $p) : array { +function content_scrap_monoprix(string $target_product, int $p, bool $web_server) : array { $url = "https://courses.monoprix.fr/products/search?q="; $rtn = array(); //check if $target_product is in the list of product (lardons,oeufs , etc) - $script = extract_source_monoprix($url.$target_product,1,$p); + $script = extract_source_monoprix($url.$target_product,1,$p,$web_server); if(empty($prods = all_subcontent_with_trunk_v21_m($script,"{\"productId\":",[",\"retailerFinancingPlanIds\""],false,0,"}"))) return $rtn; @@ -356,14 +379,14 @@ function content_scrap_monoprix(string $target_product, int $p) : array { * test or if the scrapping failed */ function main_m($argc, $argv) : bool { - if($argc == 4) { - if(empty(content_scrap_monoprix($argv[1],$argv[2]))) { + if($argc == 5) { + if(empty(content_scrap_monoprix($argv[1],$argv[2],strtolower($argv[3])==="true"))) { echo "NO CORRESPONDENCE FOUND \n"; return 0; } } else { - echo "ERROR : format : ". $argv[0] . "[research_product_type] [port] --with-openssl\n"; + echo "ERROR : format : ". $argv[0] . "[research_product_type] [port] [?webserver] --with-openssl\n"; return 0; } echo "EXECUTION FINISH WITH SUCCESS \n"; diff --git a/your_project/process_p.php b/your_project/process_p.php index a1a2a78..42d84b3 100644 --- a/your_project/process_p.php +++ b/your_project/process_p.php @@ -143,10 +143,9 @@ function sort_list(array $products) { //-----------------------------END UTILS--------------------------------------// - -function main_(array $elements) { +function main_(array $elements, int $web_server) { $str = my_json_encoding($elements); - exec("php usage.php $str > out.txt"); + exec("php usage.php $str $web_server > out.txt"); $rtn = file_get_contents("out.txt"); $parsing = parse_exec_usage($rtn); return $parsing; @@ -154,6 +153,8 @@ function main_(array $elements) { $arr = array("Auchan"=>["Lardons","Paris"]); $arr2 = array("Leclerc"=>["lardons","Voglans"]); +$arr3 = array("Monoprix"=>["lardons"]); +$arr23 = array("Leclerc"=>["lardons","Voglans"],"Monoprix"=>["lardons"]); function my_json_encoding(array $to_encode) { // FOR MAIN PARAMETER @@ -263,7 +264,7 @@ function display_compare(array $ens, string $product, string $label, array $citi $elements = array_merge($elements,[$e=>[$product,$cities[$i]]]); $i++; } - $to_display = main_($elements); // OK + $to_display = main_($elements,1); // OK $full_rtn = display_each_brand(create_cmp_product($to_display,$label)); return $full_rtn; } @@ -352,5 +353,6 @@ function main(string $command_line, bool $web) { } } -main(implode(" ",$argv),false); +//main(implode(" ",$argv),false); +main_($arr23,2); ?> \ No newline at end of file diff --git a/your_project/usage.php b/your_project/usage.php index a865175..e7962b7 100644 --- a/your_project/usage.php +++ b/your_project/usage.php @@ -9,31 +9,30 @@ use function ChriSmile0\Scrapper\content_scrap_intermarche; use function ChriSmile0\Scrapper\content_scrap_monoprix; use function ChriSmile0\Scrapper\content_scrap_systemeu; -use function ChriSmile0\Scrapper\new_version; require_once('../vendor/autoload.php'); function use_scrapper(string $url, bool $with_js) { // OK return scrap_https($url,$with_js); } -function use_content_scrapper_auchan(string $product, string $city, int $p) { // OK - return content_scrap_auchan($product,$city,$p); +function use_content_scrapper_auchan(string $product, string $city, int $p, bool $web_server) { // OK + return content_scrap_auchan($product,$city,$p,$web_server); } -function use_content_scrapper_carrefour(string $product, string $city, int $p) { // OK - return content_scrap_carrefour($product,$city,$p); +function use_content_scrapper_carrefour(string $product, string $city, int $p, bool $web_server) { // OK + return content_scrap_carrefour($product,$city,$p,$web_server); } function use_content_scrapper_leclerc(string $product, string $city) { // OK return content_scrap_leclerc($product,$city); } -function use_content_scrapper_intermarche(string $product, string $city, int $p) { // OK - return content_scrap_intermarche($product,$city,$p); +function use_content_scrapper_intermarche(string $product, string $city, int $p, bool $web_server) { // OK + return content_scrap_intermarche($product,$city,$p,$web_server); } -function use_content_scrapper_monoprix(string $product, int $p) { // OK - return content_scrap_monoprix($product,$p); +function use_content_scrapper_monoprix(string $product, int $p, bool $web_server) { // OK + return content_scrap_monoprix($product,$p,$web_server); } function use_content_scrapper_systemeu(string $product, string $city) { // OK @@ -62,6 +61,7 @@ function use_content_scrapper_systemeu(string $product, string $city) { // OK "Intermarche" => ["lardons","Paris"], "Monoprix" => ["lardons"] ]; + $scrappers_usages_min = [ "Carrefour" => ["lardons","Paris"] ]; @@ -84,6 +84,11 @@ function use_content_scrapper_systemeu(string $product, string $city) { // OK "Auchan" => ["lardons","Paris"] // NOT STABLE ]; +$scrappers_usages_min_mc = [ + "Monoprix" => ["lardons"], + "Leclerc" => ["Lardons","Voglans"] +]; + $scrappers_usages_min5 = [ "Carrefour" => ["lardons","Paris"], "Monoprix" => ["lardons"], @@ -172,16 +177,16 @@ function parrallelize_scrapping_process(string $key, array $scrapper_usage, $exec_scrapper = 1; switch($ens) { case "A": - $content = content_scrap_auchan($scrapper_usage[0],$scrapper_usage[1],$port); + $content = content_scrap_auchan($scrapper_usage[0],$scrapper_usage[1],$port,false); break; case "C": - $content = content_scrap_carrefour($scrapper_usage[0],$scrapper_usage[1],$port); + $content = content_scrap_carrefour($scrapper_usage[0],$scrapper_usage[1],$port,false); break; case "I": - $content = content_scrap_intermarche($scrapper_usage[0],$scrapper_usage[1],$port); + $content = content_scrap_intermarche($scrapper_usage[0],$scrapper_usage[1],$port,false); break; case "M": - $content = content_scrap_monoprix($scrapper_usage[0],$port); + $content = content_scrap_monoprix($scrapper_usage[0],$port,false); break; default: $exec_scrapper = 2; // ERROR @@ -209,6 +214,60 @@ function parrallelize_scrapping_process(string $key, array $scrapper_usage, return ","; } +/** + * @version 2 -> use web_server element to true because geckodriver is in the execution + * [BRIEF] If A/C/I/M then : + * - We launch 2 process, one for the geckodriver, and the other for the duplicate + * - When we sure the geckodriver is launch we call the target scrapper by the `$key` + * - We kill all sub processes and geckodriver processes + * Else (no need process) + * The return is the size of the return or just a ',' if we found nothing + * + * @param string $key the supermarket name + * @param array $scrapper_usage the research and the target city + * @param int $port port to connect geckodriver in case we use the port + * @example parrallelize_scrapping_process("Monoprix",["lardons"],4444=default) + * @author chriSmile0 + * @return string content_scrap_... return or "," if not found +*/ +function parrallelize_scrapping_process_v2(string $key, array $scrapper_usage, + int $port) : string { // gecko -> auchan,monoprix,carrefour,intermarche + $ens = $key[0]; + $rtn = ""; + $content = array(); + if($ens !== "S" && $ens !== "L") { + switch($ens) { + case "A": + $content = content_scrap_auchan($scrapper_usage[0],$scrapper_usage[1],$port,true); + break; + case "C": + $content = content_scrap_carrefour($scrapper_usage[0],$scrapper_usage[1],$port,true); + break; + case "I": + $content = content_scrap_intermarche($scrapper_usage[0],$scrapper_usage[1],$port,true); + break; + case "M": + $content = content_scrap_monoprix($scrapper_usage[0],$port,true); + break; + default: + break; + } + } + else if($ens === "L") { // NO GECKODRIVER + $content = content_scrap_leclerc($scrapper_usage[0],$scrapper_usage[1]); + } + else if($ens === "S") { // PUPPETEER + $content = content_scrap_systemeu($scrapper_usage[0],$scrapper_usage[1]); + } + else { // UNKNOWN + $content = ""; + } + $rtn = (check_scrapper_return($content)==-1) ? "" : json_encode($content); + if($rtn !== "") + return "".strlen($rtn).",".$rtn; + return ","; +} + /** * [BRIEF] * @param array $scrappers_usage target supermarket and target_product and town @@ -216,7 +275,7 @@ function parrallelize_scrapping_process(string $key, array $scrapper_usage, * @author chriSmile0 * @return the scrapping content of each usage in the array in parameter */ -function globals_execs(array $scrappers_usage) { // OK +function globals_execs(array $scrappers_usage, int $v1_or_v2) { // OK $childs = array(); $recv_content = array(); $ports = 4444; @@ -234,7 +293,7 @@ function globals_execs(array $scrappers_usage) { // OK die('Fork failed'); exit(0); case 0: - $rtn = parrallelize_scrapping_process($key,$usages,$port); // DUMP RESULT IN PIPE + $rtn = ($v1_or_v2==1) ? parrallelize_scrapping_process($key,$usages,$port) : parrallelize_scrapping_process_v2($key,$usages,$port); // DUMP RESULT IN PIPE $offset = 0; $size = substr($rtn,0,$offset=strpos($rtn,",")); $size = ($size===FALSE || $size==="") ? "0" : $size; // FALSE -> 7.2, "" -> 8.0 // FOR REST OF THE PACKAGE FOR COMPATIBILITY!!! @@ -294,18 +353,20 @@ function my_json_encoding_2($to_encode) { // FOR MAIN PARAMETER function main_u($argc, $argv) { + $web_server = intval($argv[2]); + array_pop($argv); $arr = implode(",",array_slice($argv,1)); $elements = json_decode($arr,true);// OK - echo json_encode(globals_execs($elements)); // recv this in process_p for print + echo json_encode(globals_execs($elements,$web_server)); // recv this in process_p for print } //echo json_encode(globals_execs($scrappers_usages_min_ca)); main_u($argc,$argv); -//var_dump(globals_execs($scrappers_usages_min_l)); +//var_dump(globals_execs($scrappers_usages_min_mc,2)); //var_dump(use_content_scrapper_auchan("Lardons","Paris",4444)); //var_dump(use_content_scrapper_leclerc("Saumon","Annecy")); //var_dump(use_content_scrapper_carrefour("Lardons fume","Paris",4444)); //var_dump(use_content_scrapper_intermarche("Lardons","Paris",4444)); -//var_dump(use_content_scrapper_monoprix("Saumon",4444)); +//var_dump(use_content_scrapper_monoprix("Lardons",4444,true)); // TRY THIS //var_dump(use_content_scrapper_systemeu("Lardons","Paris")); ?> \ No newline at end of file