基于PHP的cURL快速入门教程 (小偷采集程序)

2019-04-09 06:33:16丽君

// 1. 批处理器
$mh = curl_multi_init();
// 2. 加入需批量处理的URL
for ($i = 0; $i < $max_connections; $i++) {
    add_url_to_multi_handle($mh, $url_list);
}
// 3. 初始处理
do {
    $mrc = curl_multi_exec($mh, $active);
} while ($mrc == CURLM_CALL_MULTI_PERFORM);
// 4. 主循环
while ($active && $mrc == CURLM_OK) {
    // 5. 有活动连接
    if (curl_multi_select($mh) != -1) {
        // 6. 干活
        do {
            $mrc = curl_multi_exec($mh, $active);
        } while ($mrc == CURLM_CALL_MULTI_PERFORM);
        // 7. 有信息否?
        if ($mhinfo = curl_multi_info_read($mh)) {
            // 意味着该连接正常结束
            // 8. 从curl句柄获取信息
            $chinfo = curl_getinfo($mhinfo['handle']);
            // 9. 死链么?
            if (!$chinfo['http_code']) {
                $dead_urls []= $chinfo['url'];
            // 10. 404了?
            } else if ($chinfo['http_code'] == 404) {
                $not_found_urls []= $chinfo['url'];
            // 11. 还能用
            } else {
                $working_urls []= $chinfo['url'];
            }
            // 12. 移除句柄
            curl_multi_remove_handle($mh, $mhinfo['handle']);
            curl_close($mhinfo['handle']);
            // 13. 加入新URL,干活
            if (add_url_to_multi_handle($mh, $url_list)) {
                do {
                    $mrc = curl_multi_exec($mh, $active);
                } while ($mrc == CURLM_CALL_MULTI_PERFORM);
            }
        }
    }
}

相关文章 大家在看