一些php常用函数积累

本文链接
<?php

// id: ecffe70d3af54df9bad97b61918ace7d 
global $ct_path, $ct_log_path;
$log_path = "test_php.txt";
// 是否先log到buffer,再通过CT_flush()一次性写入文件
$ct_log_buffer = true;
$CT_off = true;
$request_num = uniqid();
$CT_format = "";

if ($ct_path) {
    $dir = dirname($ct_path);
    if (!file_exists($dir)) {
        mkdir($dir, 0777, true);
    }
    $file = fopen($ct_path, "a+");
    $file_ct_log = $file;
}
if ($ct_log_path) {
    $dir = dirname($ct_log_path);
    if (!file_exists($dir)) {
        mkdir($dir, 0777, true);
    }
    $file_ct_log = fopen($ct_log_path, "a+");
}

$ct_buffer = [];

$path_my = __DIR__ . "/common.my.php";
if (is_file($path_my)) {
    require $path_my;
}

function clear_log() {
   Global $log_path;
   unlink($log_path);
}

function clog($content, $with_lf = true) {
   Global $log_path;
   $file = fopen($log_path,"a+");
   fwrite($file, $content);
   CT_log($content);
   if($with_lf) {
      fwrite($file, "\n");
      CT_log("\n");
   }
   fclose($file);

}

function get_safe($obj, $key, $def = NULL) {
   if(isset($obj[$key])) {
      return $obj[$key];
   }
   return $def;
}

function get_stack_trace($title = "") {
   $html = "=================stack trace:".$title."\n";
   $array =debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
   foreach($array as $row) {
      $html .= sprintf("file:%s, line:%d, class:%s, function:%s\n",
         get_safe($row, 'file'), get_safe($row, 'line'), get_safe($row, 'class'), get_safe($row, 'function'));
   }
   return $html;
}

function log_stack_trace($title = "") {
   clog(get_stack_trace($title));
}

// error handler function with stack trace.
// use like this:
// $old_error_handler = set_error_handler("err_handler");
function err_handler($errno, $errstr, $errfile, $errline)
{
   $errno_map = array(1 => "E_ERROR", 2 => "E_WARNING", 4 => "E_PARSE", 8 => "E_NOTICE",
      16 => "E_CORE_ERROR", 32 => "E_CORE_WARNING", 64 => "E_COMPILE_ERROR",
      128 => "E_COMPILE_WARNING", 256 => "E_USER_ERROR", 512 => "E_USER_WARNING", 
      1024 => "E_USER_NOTICE", 2048 => "E_STRICT", 4096 => "E_RECOVERABLE_ERROR",
      8192 => "E_DEPRECATED", 16384 => "E_USER_DEPRECATED", 32767 => "E_ALL");
   clog(sprintf("------------ %s(%d), msg:%s", $errno_map[$errno], $errno, $errstr));
   log_stack_trace("");
   /* Don't execute PHP internal error handler */
   return true;
}

// 获取当前系统时间,返回float格式,单位:秒
function get_time() {
   date_default_timezone_set('Asia/Shanghai');
   list($usec, $sec) = explode(" ", microtime());
   return ((float)$usec + (float)$sec);
}

function get_prefix() {
    return "";
    Global $ip, $pid;
    if(!isset($ip)) {
        $pid = getmypid();
        $ip = $_SERVER['REMOTE_ADDR'];
    }
    return $pid.' '.$ip.' '.date("m-d H:i:s ");
}

function CT($content) {
    Global $CT_off, $file;
    if($CT_off || !$file)
        return;

    Global $last_time, $first_time, $is_first, $ct_log_buffer, $ct_buffer, $request_num, $CT_format;
    if ($CT_format == "raw") {
        $all_out = $content . "\n";
    } else {
        // 通过stack trace计算缩进
        $array =debug_backtrace(DEBUG_BACKTRACE_IGNORE_ARGS);
        $ignore_count = 0;
        $count = count($array);
        $ignore_names = ["call_user_func_array", "call_user_func", "spl_autoload_call"];
        $ignore_classes = ["ReflectionClass"];
        for ($i = 2; $i < $count; $i++) {
            $frame = $array[$i];
            if (in_array($frame["function"], $ignore_names) || isset($frame["class"]) && in_array($frame["class"], $ignore_classes)) {
                $ignore_count++;
            }
        }
        $all_out = get_prefix() . str_pad("", $count - 2 - $ignore_count, " ") . $request_num . " " . $content;
        $cur_time=get_time();
        if(!$is_first) {
            $is_first = true;
            $last_time = $first_time = $cur_time;
        }
        $total_time=$cur_time-$first_time;
        $delta_time=$cur_time-$last_time;
        $overtime_flag = "";
        // 添加超时标记
        if($delta_time * 1000 > 10)
            $overtime_flag = "----overtime";

        $all_out = $all_out." cur_time: $cur_time, total_time: $total_time, delta_time: $delta_time $overtime_flag\n";
        $last_time=$cur_time;
    }

    if ($ct_log_buffer === true) {
        $ct_buffer[] = $all_out;
    } else {
        fwrite($file, $all_out);
    }
}

/**
 * 将buffer的CT内容写入文件
 * @param boolean turn_off_buffer, 完成后是否关闭buffer,以保证通过register_shutdown_function等调用的函数能够被输出
 */
function CT_flush($turn_off_buffer)
{
    global $file, $ct_buffer, $ct_log_buffer;
    if (!$file) {
        return;
    }
    fwrite($file, join("", $ct_buffer));
    $ct_buffer = [];
    $ct_log_buffer = !$turn_off_buffer;
}

function CT_log($content = "", $path = NULL) {
    Global $file_ct_log;
    if (!$file_ct_log && !$path) {
        return;
    }
    $content = get_prefix().print_r($content, true)."\n";
    if($path) {
        file_create_path($path);
        $file = fopen($path, "a+");
        if($file) {
            fwrite($file, $content);
            fclose($file);
        }
    }
    else {
        fwrite($file_ct_log, $content);
    }
}

/**
 * 日志输出,使用info level
 * @param $content
 * @param array $params
 * @param string $logger
 */
function slog($content, $params = [], $logger = "default")
{
    SeasLog::info($content, $params, $logger);
}

/**
 * 日志输出,使用debug level
 * @param $content
 * @param array $params
 * @param string $logger
 */
function slog_debug($content, $params = [], $logger = "default")
{
    SeasLog::debug($content, $params, $logger);
}

/**
 * 日志输出,使用error level
 * @param $content
 * @param array $params
 * @param string $logger
 */
function slog_error($content, $params = [], $logger = "default")
{
    SeasLog::error($content, $params, $logger);
}

/**
 * 日志输出,使用warning level
 * @param $content
 * @param array $params
 * @param string $logger
 */
function slog_warning($content, $params = [], $logger = "default")
{
    SeasLog::warning($content, $params, $logger);
}

/*
    获取指定的http response header 值。
    eg:
    HTTP/1.1 200 OK
    Server: Tengine/2.1.2
    Date: Sun, 02 Apr 2017 02:49:34 GMT
    Content-Type: text/html; charset=gb2312
    Content-Length: 124378
    Connection: keep-alive
    Cache-Control: private
    X-AspNetMvc-Version: 4.0
    X-AspNet-Version: 4.0.30319
*/
function curl_get_header($ch, $response, $key)
{
    $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
    $header = substr($response, 0, $header_size);
    // header processing
    $header_arr = explode("\r\n", $header);

    $value = "";
    $key .= ":";
    foreach ($header_arr as $entry) {
        if (!strncmp($entry, $key, strlen($key))) {
            $value = trim(substr($entry, strlen($key)));
            break;
        }
    }
    return $value;
}

// 获取response状态码
function curl_get_status($ch, $response)
{
    $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
    $header = substr($response, 0, $header_size);
    $header_arr = explode("\r\n", $header);
    return explode(" ", $header_arr[0])[1];
}

function startsWith($haystack, $needle)
{
    // search backwards starting from haystack length characters from the end
    return $needle === "" || strrpos($haystack, $needle, -strlen($haystack)) !== FALSE;
}

// http 301 is handled.
function http_get_core($url, &$status = null)
{
    CT_log("-----------http_get:" . $url);
    $ch = curl_init();
    $status = -1;

    // 301 最多嵌套3次。
    for ($i = 0; $i < 3; $i++) {
        $options = array(
            CURLOPT_HEADER => 1,
            CURLOPT_POST => 0,            // 请求方式为POST
            CURLOPT_URL => $url,      // 请求URL
            CURLOPT_RETURNTRANSFER => 1,  // 获取请求结果
            CURLOPT_TIMEOUT_MS => 30000,    // 超时时间(ms)
            CURLOPT_POSTFIELDS => http_build_query(array()), // 注入接口参数
            CURLOPT_SSL_VERIFYPEER => 0,  // 不验证证书
        );
        curl_setopt_array($ch, $options);
        curl_setopt($ch, CURLOPT_ENCODING, "gzip,deflate"); // 百度不支持
        curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');

        if (($response = curl_exec($ch))) {
            // 有的网站header、<head>指定的编码不一致,会导致乱码。因此如果有编码信息,将其转送到client。
            $content_type = curl_get_header($ch, $response, "Content-Type");
            if($content_type) {
                header("Content-Type: " . $content_type);
            }

            $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
            $status = $code = curl_get_status($ch, $response);
            if ($code == 301 || $code == 302) {
                $redirect_url = curl_get_header($ch, $response, "Location");
                $parsed_re = parse_url($redirect_url);
                if(isset($parsed_re["host"])) {
                    $url = $redirect_url;
                }
                else {
                    $parsed = parse_url($url);
                    if(startsWith($redirect_url, "/")) {
                        $url = $parsed["schema"]. "://" . $parsed["host"] . $redirect_url;
                    }
                    else {
                        // TODO 相对路径拼接
                        $content =  "relative path TODO";
                        curl_close($ch);
                        return $content;
                    }
                }
                continue;
            } else if ($code == 404) {
                header("Status: 404 Not Found");
                $msg = array(
                    "status" => 404
                );
                $content = "<p id='http_util_message_block' style='display: none'>" . json_encode($msg) . "</p>";
                $content .= "404, not found!";
            } else if ($code == 200) {
                $content = substr($response, $header_size);
            } else {
                $content = "http error, code=" . $code . "\n" . substr($response, $header_size);
            }
        } else {
            $msg = array(
                "status" => -1
            );
            $content = "<p id='http_util_message_block' style='display: none'>" . json_encode($msg) . "</p>";
            $content .= "invoke error[" . curl_error($ch) . "]";
        }
        curl_close($ch);
        return $content;
    }
}

// 递归为path创建必要的路径
function file_create_path($path)
{
    $dir = dirname($path);
    if ($dir && !file_exists($dir)) {
        mkdir($dir, 0755, true);
    }
}

// $save_path 需要gbk编码
function file_save($content, $save_path, $append)
{
    file_create_path($save_path);
    if ($append) {
        $file = fopen($save_path, "a+");
        if ($file) {
            fwrite($file, $content);
            fclose($file);
        }
    } else {
        file_put_contents($save_path, $content);
    }
}

// 默认的curl封装
function curl_do($url, $close_after_use = true)
{
    CT_log("curl_do: " . $url);
    $ch = curl_init();

    $options = array(
        CURLOPT_HEADER => 0,
        CURLOPT_POST => 0,            // 请求方式为POST
        CURLOPT_URL => $url,      // 请求URL
        CURLOPT_RETURNTRANSFER => 1,  // 获取请求结果
        CURLOPT_TIMEOUT_MS => 30000,    // 超时时间(ms)
        CURLOPT_POSTFIELDS => http_build_query(array()), // 注入接口参数
        CURLOPT_SSL_VERIFYPEER => 0,  // 不验证证书
    );

    curl_setopt_array($ch, $options);
    curl_setopt($ch, CURLOPT_ENCODING, "gzip");
    $response = curl_exec($ch);
    $err = curl_error($ch);
    if ($err) {
        CT_log("curl error: " . $err);
    }
    if ($close_after_use) {
        curl_close($ch);
    }
    return array("handle" => $ch, "response" => $response, "err" => $err);
}

/**
 * url拼接,没有处理user,pass两个components
 * 详细定义参见单元测试
 * @param $url
 * @param $base
 * @return string
 */
function get_absolute_url($url, $base)
{
    // 两种情况直接返回$url:
    if (!$base)
        return $url;
    $url_host = parse_url($url, PHP_URL_HOST);
    if ($url_host) {
        return $url;
    }

    $base_parsed = parse_url($base);
    $base_scheme = get_safe($base_parsed, "scheme", "");
    $base_host = get_safe($base_parsed, "host", "");
    $base_port = isset($base_parsed["port"]) ? ":" . $base_parsed["port"] : "";
    $base_path = get_safe($base_parsed, "path");

    if ($base_host) {
        $base_calc = $base_scheme . "://" . $base_host . $base_port;
        if (startsWith($url, "/")) {
            return $base_calc . $url;
        } else if ($base_path) {
            $pos = strrpos($base_path, "/");
            if ($pos !== false) {
                $dir = substr($base_path, 0, $pos + 1); // with last "/"
                return $base_calc . $dir . $url;
            } else {
                return $base_calc . "/" . $url;
            }
        } else {
            return $base_calc . "/" . $url;
        }
    } else {
        if (startsWith($url, "/")) {
            return $url;
        } else if ($base_path) {
            $pos = strrpos($base_path, "/");
            if ($pos !== false) {
                $dir = substr($base_path, 0, $pos + 1); // with last "/"
                return $dir . $url;
            } else {
                return $url;
            }
        } else {
            return $url;
        }
    }
}

function get_absolute_url_tests()
{
    // empty tests
    $tests[] = [null, null, null];
    $tests[] = ["", null, ""];
    $tests[] = [null, "", null];
    $tests[] = ["", "", ""];

    $tests[] = [null, "/", "/"];
    $tests[] = [null, "/a", "/"];
    $tests[] = [null, "/a/", "/a/"];

    $tests[] = ["a.html", "b", "a.html"];
    $tests[] = ["a.html", "", "a.html"];
    $tests[] = ["a.html", "/", "/a.html"];
    $tests[] = ["a/b/c/a.html", "http://1.1.1.1", "http://1.1.1.1/a/b/c/a.html"];
    $tests[] = ["a.html", "http://1.1.1.1:83", "http://1.1.1.1:83/a.html"];
    $tests[] = ["a.html", "http://1.1.1.1:83/", "http://1.1.1.1:83/a.html"];
    $tests[] = ["a.html", "http://1.1.1.1:83/a/b", "http://1.1.1.1:83/a/a.html"];
    $tests[] = ["a.html", "http://1.1.1.1:83/?", "http://1.1.1.1:83/a.html"];
    $tests[] = ["a.html", "http://1.1.1.1:83?", "http://1.1.1.1:83/a.html"];
    $tests[] = ["a.html", "http://1.1.1.1:83?a=b", "http://1.1.1.1:83/a.html"];
    $tests[] = ["a.html", "https://1.1.1.1:83?a=b#1", "https://1.1.1.1:83/a.html"];
    $tests[] = ["a.html", "www.baidu.com?a=b#1", "a.html"]; // www被认为是path

    // starts with "/"
    $tests[] = ["/a.html", "b", "/a.html"];
    $tests[] = ["/a.html", "", "/a.html"];
    $tests[] = ["/a.html", "/", "/a.html"];
    $tests[] = ["/a/b/c/a.html", "http://1.1.1.1", "http://1.1.1.1/a/b/c/a.html"];
    $tests[] = ["/a.html", "http://1.1.1.1:83", "http://1.1.1.1:83/a.html"];
    $tests[] = ["/a.html", "http://1.1.1.1:83/", "http://1.1.1.1:83/a.html"];
    $tests[] = ["/a.html", "http://1.1.1.1:83/a/b", "http://1.1.1.1:83/a.html"];
    $tests[] = ["/a.html", "http://1.1.1.1:83/?", "http://1.1.1.1:83/a.html"];
    $tests[] = ["/a.html", "http://1.1.1.1:83?", "http://1.1.1.1:83/a.html"];
    $tests[] = ["/a.html", "http://1.1.1.1:83?a=b", "http://1.1.1.1:83/a.html"];
    $tests[] = ["/a.html", "https://1.1.1.1:83?a=b#1", "https://1.1.1.1:83/a.html"];
    $tests[] = ["/a.html", "www.baidu.com?a=b#1", "/a.html"]; // www被认为是path

    $r = true;
    foreach ($tests as $test) {
        $abs = get_absolute_url($test[0], $test[1]);
        echo $abs . "\n";
        if ($abs !== $test[2]) {
            $r = false;
            break;
        }
    }
    echo "pass: " . $r . "\n";

    // return self tests
    $tests_self = [];
    $tests_self[] = ["http://test.com/a.html", "b", ""];
    $tests_self[] = ["http://test.com/a.html", "", ""];
    $tests_self[] = ["http://test.com/a.html", "/", ""];
    $tests_self[] = ["http://test.com/a/b/c/a.html", "http://1.1.1.1", ""];
    $tests_self[] = ["http://test.com/a.html", "http://1.1.1.1:83", ""];
    $tests_self[] = ["http://test.com/a.html", "http://1.1.1.1:83/", ""];
    $tests_self[] = ["http://test.com/a.html", "http://1.1.1.1:83/a/b", ""];
    $tests_self[] = ["http://test.com/a.html", "http://1.1.1.1:83/?", ""];
    $tests_self[] = ["http://test.com/a.html", "http://1.1.1.1:83?", ""];
    $tests_self[] = ["http://test.com/a.html", "http://1.1.1.1:83?a=b", ""];
    $tests_self[] = ["http://test.com/a.html", "https://1.1.1.1:83?a=b#1", ""];
    $tests_self[] = ["http://test.com/a.html", "www.baidu.com?a=b#1", ""]; // www被认为是path

    echo "-------------------return self test--------:\n";
    $r = true;
    foreach ($tests_self as $test) {
        $abs = get_absolute_url($test[0], $test[1]);
        echo $abs . "\n";
        if ($abs !== $test[0]) {
            $r = false;
            break;
        }
    }
    echo "pass: " . $r . "\n";
}

/**
 * 路径规范化。
 * eg:
 * a/../b => b
 * ../a/../../b => ../b
 * ./a/../b => ./b
 * @param $path
 * @return mixed|string
 */
function normalize_path($path)
{
    if (!$path) {
        return $path;
    }
    $path = str_replace("\\", "/", $path);

    $dotdotCount = 0; // 以".."开头的处理
    $arr = explode("/", $path);
    $pathStack = [];
    foreach ($arr as $ele) {
        if ($ele !== "..") {
            array_push($pathStack, $ele);
        } else {
            if (count($pathStack) === 0) {
                $dotdotCount++;
            } else {
                array_pop($pathStack);
            }
        }
    }
    unset($ele);

    $r = str_pad("", $dotdotCount, "../");
    foreach ($pathStack as $path) {
        if ($r === "") {
            $r = $path;
        } else {
            $r .= "/" . $path;
        }
    }
    return $r;
}

// 尝试gbk、utf-8两种编码;优先尝试传入编码
function is_file_ex($path)
{
    if (is_file($path)) {
        return true;
    }
    $enc = mb_detect_encoding($path, "gb2312", true);
    if ($enc === 'EUC-CN') {
        $path2 = iconv("gbk", "utf-8", $path);
    } else {
        $path2 = iconv("utf-8", "gbk", $path);
    }
    return is_file($path2);
}

// 尝试gbk、utf-8两种编码;优先尝试传入编码
function file_get_contents_ex($path)
{
    if (is_file($path)) {
        return file_get_contents($path);
    }
    $enc = mb_detect_encoding($path, "gb2312", true);
    if ($enc === 'EUC-CN') {
        $path2 = iconv("gbk", "utf-8", $path);
    } else {
        $path2 = iconv("utf-8", "gbk", $path);
    }
    if (is_file($path2)) {
        return file_get_contents($path2);
    }
    return false;
}

// 创建文件lock,如果路径不存在则创建之
function create_file_lock($path, &$output = null)
{
    file_create_path($path);
    $f = null;
    // 不使用"@",这样忽略文件存在的报错,其他异常返回(如权限问题)
    try {
        $f = fopen($path, "x");
    } catch (Exception $e) {
        $msg = $e->getMessage();
        if (strpos($msg, "File exists") === false && strpos($msg, "文件已存在") === false) {
            $output = $e->getMessage() . "\n" . $e->getTraceAsString();
        }
    }
    return $f;
}

// 关闭、释放文件lock
function release_file_lock($f, $path)
{
    if ($f) {
        fclose($f);
        unlink($path);
    }
}

/**
 * echo udate('Y-m-d H:i:s.u T');
 * @param string $format
 * @param null $utimestamp
 * @return false|string
 */
function udate ($format = 'u', $utimestamp = null)
{
    if (is_null($utimestamp))
        $utimestamp = microtime(true);

    $timestamp = floor($utimestamp);
    $milliseconds = round(($utimestamp - $timestamp) * 1000000);

    return date(preg_replace('`(?<!\\\\)u`', $milliseconds, $format), $timestamp);
}
?>

学习汇总 2019-12-2

Nginx与前端开发 “Nginx是一款轻量级的HTTP服务器,采用事件驱动的异步非阻塞处理方式框架,这让其具有极好的IO性能,时常用于服务端的反向代理和负载均衡。”
nginx 这一篇就够了 安装、架构、配置、指令、应用。。。

Nginx负载均衡 Nginx做为一个强大的Web服务器软件,具有高性能、高并发性和低内存占用的特点。此外,其也能够提供强大的反向代理功能。
Nginx入门到实践-Nginx中间件 第1章 课程前言 总览课程,介绍课程学习须知,环境准备,了解课程意义。 1-1 课程介绍试看 1-2 学习环境准备

轻松理解webpack热更新原理 Hot Module Replacement,简称HMR,无需完全刷新整个页面的同时,更新模块。HMR的好处,在日常开发工作中体会颇深:节省宝贵的开发时间、提升开发体验

一步步从零开始用 webpack 搭建一个大型项目 很多人都或多或少使用过 webpack,但是很少有人能够系统的学习 webpack 配置,遇到错误的时候就会一脸懵,不知道从哪查起?性能优化时也不知道能做什么,网上的优化教程是不是符合自己的项目?等一系列问题!本文从最基础配置一步步到一个完善的大型项目的过程。让你对 webpack 再也不会畏惧,让它真正成为你的得力助手!

nginx 启动报错:无法找到 “/usr/local/nginx/logs/nginx.pid”文件

在写一个自动部署脚本时,需要修改ningx配置,然后重启:

nginx -s stop

nginx

但是有时候会报错:无法找到 “/usr/local/nginx/logs/nginx.pid”文件。因为不是必然重现,排查了好久,最后怀疑是nginx还没有stop,就执行nginx,导致不能锁定nginx.pid文件。然后试了循环重启:

nginx -s stop
等待 gogs 启动,重试10次(10秒)
try_count=0
while (($try_count < 10)); do :
a=ps -ef|grep nginx|wc -l
if [ “$a” == “1” ]
then
echo “nginx stopped!”
break;
fi
((try_count++))
sleep 1
done
if (($try_count >= 10))
then
echo “nginx stop error!”
fi
nginx

果然,不报错了。但这样写好丑陋,多个地方都需要重启nginx,还得提取一个函数。最后的最后,脑子突然开窍,不是要重启嘛,这样不就行了:

nginx -s reload

因为一开始的代码都是先stop,再启动,我也就一直沿用这个思路。所以,有时候陷入了思维定式,真的很可怕。。。

nginx+pm2+nuxt前端ssr配置+301重定向实战

id: fdlskjfewiohflskdjfoweuqljdsflhavskbvkh

在用nuxt做ssr时,需要实现301重定向,便于搜索引擎加权重都加在一个网址上。参见nginx 301永久重定向配置。最后决定用nginx反向代理来实现,所搜了一阵,发现挺简单的:

 server {
  listen 80;
    # all others than "front host" redirect 301
    # server_name xx.com;
    return 301 http://__front_host$request_uri;
}

upstream nuxtserver{
    server 127.0.0.1:8888;
    keepalive 64;
}
server {
    listen 80;
    server_name __front_host;  #这里对应你服务器的域名
    location / {
        proxy_pass http://nuxtserver;  #这里对应上面upstream中新建的服务名
        index index.html index.htm;
    }
}

注意,__front_host 配置你的域名,其他的配置到第一个server中(可以不配置 server_name,这样所有其他的域名或IP都会返回301重定向!

延伸阅读:301 重定向

HTTP 301 – 维基百科,自由的百科全书 301 Move Permanently 是HTTP协议中的一个状态码(Status Code)。可以简单地理解为该资源已经被永久改变了位置,通常会发送HTTP Location来重定向到正确的

301 Moved Permanently – HTTP | MDN – Mozilla HTTP 301 永久重定向 说明请求的资源已经被移动到了由 Location 头部指定的url上,是固定的不会再改变。搜索引擎会根据该响应修正。

PHP实现文件转存,解决掘金等防盗链问题

现在越来越多的网站出于自身的利益开启了防盗链功能,这样导致我们转载、保存的文章无法看到图片。最近开始用wordpress来搭建自己的网站,于是想着把文章转存到自己服务器,解决防盗链问题。

实现的思路:wordpress 数据库中的 wp_posts 表保存了文章内容,我们把文章读取出来,然后遍历所有的图片,一次通过 curl 获取图片,保存到相应目录,然后替换图片 url 即可。是不是很简单,说做就做!

最终代码用 php 实现:

PicFetcher.php:

<?php

/**---------------------------------------------------------------------------
 * 注意调用前需要加载 common.php
 * deps: logs, startsWith, file_save
 */

/**
 * 获取图片资源
 * @param $url 图片url
 * @return array [errno, errmsg, fetch_status, content], 总是包含此四个字段
 */
function fetchPic($url)
{
    slog("fetching: " . $url);
    $ch = curl_init();
    $fetch_status = -1; // fetch 到的外部 status
    $content = ""; // fetch 到的 content,不一定就是图片
    $ext = "";
    $errno = 0;
    $errmsg = "";

    // 301 最多嵌套3次。
    for ($i = 0; $i < 3; $i++) {
        $options = array(
            CURLOPT_HEADER => 1,
            CURLOPT_POST => 0,
            CURLOPT_URL => $url,
            CURLOPT_RETURNTRANSFER => 1,
            CURLOPT_TIMEOUT_MS => 15000,
            CURLOPT_SSL_VERIFYPEER => 0,
        );
        curl_setopt_array($ch, $options);
        curl_setopt($ch, CURLOPT_ENCODING, "gzip,deflate");
        curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET');

        if (($response = curl_exec($ch))) {
            $fetch_status = $code = curl_get_status($ch, $response);
            if ($code == 301 || $code == 302) {
                $redirect_url = curl_get_header($ch, $response, "Location");
                slog("redirect_url: $redirect_url");
                $parsed_re = parse_url($redirect_url);
                if (isset($parsed_re["host"])) {
                    $url = $redirect_url;
                } else {
                    $parsed = parse_url($url);
                    if (startsWith($redirect_url, "/")) {
                        $url = $parsed["scheme"] . "://" . $parsed["host"] . $redirect_url;
                    } else {
                        // TODO 相对路径拼接 normalize (/a/../b/c.html -> /b/c.html)
                        $dir = pathinfo($parsed["path"])["dirname"];
                        $url = $parsed["scheme"] . "://" . $parsed["host"] . $dir . "/" . $redirect_url;
                    }
                }
                continue;
            } else if ($code == 200) {
                $content_type = curl_get_header($ch, $response, "Content-Type");
                if (startsWith($content_type, "image/")) {
                    $header_size = curl_getinfo($ch, CURLINFO_HEADER_SIZE);
                    $content = substr($response, $header_size);
                    $ext = "." . substr($content_type, strlen("image/"));
                    $errmsg = "OK";
                } else {
                    $errno = -1;
                    $errmsg = "resource is not image!";
                }
            } else {
                $errno = -1;
                $errmsg = "unexpected status code get!";
            }
        } else {
            $errno = -1;
            $errmsg = "curl error!";
            slog("curl errno: " . curl_errno($ch) . ", errmsg: " . curl_error($ch));
        }
        curl_close($ch);
        break;
    }
    $data = [
        "errno" => $errno,
        "errmsg" => $errmsg,
        "fetch_status" => $fetch_status,
        "content" => $content,
        "ext" => $ext
    ];
    return $data;
}

/**
 * fetch & save
 *
 * @param $url
 * @return array
 */
function fetchSavePic($url, $saveRoot, $pic_base, $pic_host = "")
{
    // 不要获取本身的图片
    // uTODO: 以下方式能够绕过验证。可能需要配置,才能将所有的入口拦截下来。
    // 1 http:///somehost
    // 2 http://ip
    // 3 pic/xxx.xxx
    // 需要计算出 $url's host,再进行比较

    // //xx.com 转为 xx.com
    if (startsWith($url, "//")) {
        $url = substr($url, 2);
    }

    $pic_host = ($pic_host && $pic_host !== "") ? $pic_host : $pic_base;
    if (startsWith(strtolower($url), $pic_host) || startsWith(strtolower($url), "/")) {
        return [
            "errno" => 0,
            "errmsg" => "do NOT fetch inner pic!",
            "url" => $url
        ];
    }

    $data = fetchPic($url);
    if ($data["errno"] === 0) {
        $content = $data["content"];
        $save_name = md5($content);
        $save_name = substr($save_name, 0, 2) . "/" . substr($save_name, 2) . $data["ext"];
        file_save($content, $saveRoot . $save_name, false);
        $saveUrl = $pic_base . "/" . $save_name;
    } else {
        $saveUrl = "";
    }
    unset($data["content"]);
    $data["url"] = $saveUrl;
    slog(print_r($data, true));
    return $data;
}

WPPicFetcher.php

<?php
define('APPROOT', __DIR__ . '/../../');

SeasLog::setBasePath(APPROOT . '/logs/seaslog');

require_once APPROOT . "/system/config.php";
require_once APPROOT . "/system/common.php";
require_once APPROOT . "/system/PicFetcher.php";
require_once "phpQuery-onefile.php";

/**
* @param $configPath string 额外的配置文件路径,如果不存在,则忽略
*/
function fetchWPPic($configPath)
{
slog("+++++++++++++++++++++++++++++ wp pic processing: $configPath");
if (file_exists($configPath)) {
require_once $configPath;
}

try {
global $g_config;
$saveRoot = $g_config["save_root"];
$picBase = $g_config["pic_base"];
$dbh = new PDO("mysql:host=${g_config['host']};dbname=${g_config['wp_dbname']}", $g_config["username"], $g_config["pwd"]);
$dbh->setAttribute(PDO::ATTR_ERRMODE, PDO::ERRMODE_EXCEPTION);
$dbh->query("set names utf8;");
$sql = '
SELECT id, post_content from wp_posts where post_status = "publish" and post_type="post" and `post_modified_gmt` > `pic_fetched`
';
foreach ($dbh->query($sql) as $row) {
$id = $row["id"];
$content = $row["post_content"];
slog("++++:processing id:$id");
$doc = \phpQuery::newDocument($content);
$elements = pq("img", $doc)->elements;
$cnt = count($elements);
for ($i = 0; $i < $cnt; $i++) {
$ele = $elements[$i];
// 破图可见处理
$ele->setAttribute("alt", "x");
// 图片转储
$src = $ele->getAttribute("src");
$r = fetchSavePic($src, $saveRoot, $picBase);
// slog($r);
if ($r["errno"] === 0) {
$url = $r["url"];
$ele->setAttribute("src", $url);
slog(">>> fetch ok");
} else {
slog(">>> fetch failed");
}
}

$update = "update wp_posts set post_content=?, pic_fetched=? where id=$id";
$stmt = $dbh->prepare($update);
$stmt->bindValue(1, $doc->html());
$stmt->bindValue(2, gmdate("Y-m-d H:i:s"));
$stmt->execute();
$stmt = null;
}
$dbh = null;
} catch (PDOException $e) {
ob_clean();
slog(print_r($e, true));
die();
}
}

主文件 WPPic.php

<?php
define('DOCSPATH', __DIR__ . '/');

require_once "WPPicFetcher.php";

if ($argc === 1) {
echo "Usage: php WPPic.php config-path1";
echo " Note: only relative path is supported";
return;
}

fetchWPPic(__DIR__ . "/" . $argv[1]);

这个是同事的站点,处理之后效果良好: http://wptp.rongyipiao.com/

当然还有些问题,带有时间后再慢慢优化