避免在 varnish 中缓存来自 Special:Random 的重定向

Avoid caching redirect from Special:Random in varnish

我正在使用 Varnish 将我的 MediaWiki 安装中的内容提供给我的用户。几乎每个页面都被正确缓存,如 vcl 中所定义:

vcl 4.0;

backend default {
    .host = "xxxx";
    .port = "xxxx";
}

backend thumbor {
    .host = "xxxx";
    .port = "xxxx";
}

acl purge {
    "xxxx";
}

sub vcl_recv {
        set req.http.X-Forwarded-For = req.http.X-Forwarded-For + ", " + client.ip;
        set req.backend_hint= default;

        if (req.method == "PURGE") {
            if (!client.ip ~ purge) {
                return (synth(405, "Not allowed."));
            } else {
                return (purge);
            }
        }

        if (req.method != "GET" && req.method != "HEAD" &&
            req.method != "PUT" && req.method != "POST" &&
            req.method != "TRACE" && req.method != "OPTIONS" &&
            req.method != "DELETE") {
                return (pipe);
        }

        if (req.method != "GET" && req.method != "HEAD") {
            return (pass);
        }

    if (req.http.If-None-Match) {
        return (pass);
    }

    if (req.http.X-Debug-Server) {
        return (pass);
    }

    if (req.http.Cache-Control ~ "no-cache") {
        ban(req.url);
    }

        if (req.http.Accept-Encoding) {
          if (req.http.User-Agent ~ "MSIE 6") {
            unset req.http.Accept-Encoding;
          } elsif (req.http.Accept-Encoding ~ "gzip") {
            set req.http.Accept-Encoding = "gzip";
          } elsif (req.http.Accept-Encoding ~ "deflate") {
            set req.http.Accept-Encoding = "deflate";
          } else {
            unset req.http.Accept-Encoding;
          }
        }

    if (req.url ~ "(?i)\.(jpg|jpeg|jpe|png)$" && req.url ~ "(?i)/thumb/" && req.http.Accept ~ "(?i)image/webp" && req.http.x-no-thumbor != "yes") {
        set req.http.x-orig-url = req.url;
        set req.url = regsub(req.url,".*\/thumb\/(.*)","/unsafe/filters:format(webp)/thumb/");
        set req.backend_hint = thumbor;
    }

    if (req.url ~ "action=amp$") {
        unset req.http.Cookie;
        unset req.http.x-wap;
        return (hash);
    }

        if (req.http.Authorization || req.http.Cookie ~ "session" || req.http.Cookie ~ "Token") {
            return (pass);
        }

    if (req.http.Cookie ~ "droidwikiwikicookiewarning_dismissed=true") {
        set req.http.Cookie = "droidwikiwikicookiewarning_dismissed=true";
    } else {
        unset req.http.Cookie;
    }

    unset req.http.x-wap;
    if (req.http.User-Agent ~ "(?i)^(lg-|sie-|nec-|lge-|sgh-|pg-)|(mobi|240x240|240x320|320x320|alcatel|android|audiovox|bada|benq|blackberry|cdm-|compal-|docomo|ericsson|hiptop|htc[-_]|huawei|ipod|kddi-|kindle|meego|midp|mitsu|mmp\/|mot-|motor|ngm_|nintendo|opera.m|palm|panasonic|philips|phone|playstation|portalmmm|sagem-|samsung|sanyo|sec-|sendo|sharp|softbank|symbian|teleca|up.browser|webos)" && req.url !~ "(\?|&)(action=amp)") {
        set req.http.x-wap = "no";
    }

    if (req.http.Cookie ~ "mf_useformat=") {
        set req.http.x-wap = "no";
    }

        return (hash);
}

sub vcl_hash {
    hash_data(req.http.x-wap);
}

sub vcl_pipe {
        set req.http.connection = "close";
}

sub vcl_purge {
    if (req.url !~ "(\?|&)(action=amp)") {
        set req.http.X-Original = req.url;
        if (req.url ~ "&") {
            set req.url = req.url + "&action=amp";
        } else {
            set req.url = req.url + "?action=amp";
        }
        return (restart);
    }

    if (req.http.X-Original) {
        set req.url = req.http.X-Original;
    }

    if (!req.http.x-wap) {
        set req.http.x-wap = "no";
        return (restart);
    }
}

sub vcl_hit {
        if (req.method == "PURGE") {
            ban(req.url);
            return (synth(200, "Purged"));
        }

        if (!obj.ttl > 0s) {
            return (pass);
        }
}

sub vcl_miss {
        if (req.method == "PURGE")  {
            return (synth(200, "Not in cache"));
        }
}

sub vcl_deliver {
    if (resp.http.x-origin == "thumbor" && resp.status != 200) {
        set req.url = req.http.x-orig-url;
        set req.http.x-no-thumbor = "yes";
        return (restart);
    }

    if (obj.hits > 0) {
        set resp.http.X-Cache = "HIT";
    } else {
        set resp.http.X-Cache = "MISS";
    }
}

sub vcl_backend_error {
    set beresp.http.x-origin = beresp.backend.name;
}

sub vcl_backend_response {
        set beresp.grace = 120s;

        if (beresp.ttl < 48h) {
          set beresp.ttl = 48h;
        }       

        if (!beresp.ttl > 0s) {
          set beresp.uncacheable = true;
          return (deliver);
        }

        if (beresp.http.Set-Cookie) {
          set beresp.uncacheable = true;
          return (deliver);
        }

        if (beresp.http.Authorization && !beresp.http.Cache-Control ~ "public") {
          set beresp.uncacheable = true;
          return (deliver);
        }

        return (deliver);
}

但是,当前配置也会导致缓存重定向。虽然这对于通常的重定向页面(如文章 A 重定向到文章 B)来说完全是预期的行为,但对于 Special:Random 页面来说却是意料之外的。每次我打开此页面时,它都会引导我进入一个随机页面。但是,由于重定向由清漆缓存,我总是被重定向到同一页面。

我已经想到了: 1. 根本不缓存 302 重定向,但是,这可能会对其他重定向产生负面影响,我通常希望将其缓存 2. 在 Special:Random 上有一个 url 过滤器,但是,这样做我需要对维基支持的所有语言进行此豁免,以使其可靠。我对此不是很满意

我现在的问题是:有没有人知道如何通过不需要维护且仅涵盖 Special:Random 的规则来实现?也许是维基媒体维基也使用的解决方案?在那里,Special:Random 页面总是重定向到另一个页面,但是,我无法在他们的代码库中找到实现该页面的代码块:(

Varnish 尊重可以从您的应用程序发送的常规 Cache-Control HTTP 响应 header。

我不是 MediaWiki 专家,但我注意到 MediaWiki provides hooks 扩展了它的行为。如果您可以连接到 MediaWiki 的 request/response 流并检测到 Special:Random 情况,您可能能够注入一个 Cache-Control: private, no-cache, no-store header 来强制 Varnish 不缓存这些页面。

这将是获得预期结果的理想方式,而无需编写 VCL。

一般来说,利用 HTTP 的缓存 headers 来控制反向缓存代理(如 Varnish)的行为是一种很好的做法。

  • 它使应用程序更便携
  • 它使应用程序对特定缓存技术的依赖性降低
  • 它使开发人员能够从应用程序的体系结构内部考虑缓存