How to exclude PHPSSID from responses returned by Varnish

345 Views Asked by At

We are currently configure Varnish on our server. This is the mess...

We encounter some difficulties with Varnish. Indeed when we return the response for uncached page, varnish include systematically the PHPSSID of the current user in the response. When another user ask the page wich is cached, he inherites of the PHPSSSID of a foreign user...

vcl 4.0;

include "includes/devicedetect.vcl";

import std;

backend local {
    .host = "127.0.0.1";
    .port = "80";
    .connect_timeout = 5s;
    .first_byte_timeout = 300s;
    .between_bytes_timeout = 300s;
}

acl purge {
    # ACL we'll use later to allow purges
    # web01
    # web02
    "localhost";
    "127.0.0.1";
}


sub vcl_recv {
    # Disable all
    # return (pass);

    # Allow purging
    if (req.method == "PURGE") {
        if (!std.ip(regsub(req.http.X-Forwarded-For, "[, ].*$", ""), client.ip) ~ purge) {
            # Not from an allowed IP? Then die with an error.
            return (synth(405, "This IP is not allowed to send PURGE requests."));
        }
        # If you got this stage (and didn't error out above), purge the cached result
        return (purge);
    }

    # Allow ban (global purge)
    if (req.method == "BAN") {
        if (!std.ip(regsub(req.http.X-Forwarded-For, "[, ].*$", ""), client.ip) ~ purge) {
            # Not from an allowed IP? Then die with an error.
            return (synth(405, "This IP is not allowed to send BAN requests."));
        }
        # If you got this stage (and didn't error out above), purge the cached result
        ban("obj.http.x-url ~ /");
        return (synth(200, "Ban added"));
    }

    # Only cache GET or HEAD requests. This makes sure the POST requests are always passed.
    if (req.method != "GET" && req.method != "HEAD") {
        return (pass);
    }

    if (req.http.Authorization) {
        return (pass);
    }

    if (req.http.host ~ "(api\.domain\.com|api2\.domain\.com)") {
        return (pass);
    }

    # Do not cache logout/login/forgot password url
    if (req.url ~ "/(logout|forgotten-password|login|register|facebookLogin)") {
    return (pass);
    }

    # Do not cache when Set-Cookie contain user[id] OR identity OR PHPSESSID OR PHPSERVERID
    if (req.http.Set-Cookie ~ "(user\[id\]|identity|SERVERID)") {
        return (pass);
    }

    # Do not cache customer french page
    if (req.url ~ "/fr/(client|personnaliser|panier)") {
        return (pass);
    }

    # Do not cache customer english page
    if (req.url ~ "/en/(customer|personalize|cart)") {
        return (pass);
    }

    # Do not cache checkout
    if (req.url ~ "/checkout") {
        return (pass);
    }

    # Normalize the header, remove the port (in case you're testing this on various TCP ports)
    set req.http.Host = regsub(req.http.Host, ":[0-9]+", "");

    # Remove the proxy header (see https://httpoxy.org/#mitigate-varnish)
    unset req.http.proxy;

    # Strip hash, server doesn't need it.
    if (req.url ~ "\#") {
        set req.url = regsub(req.url, "\#.*$", "");
    }       

    # Strip a trailing ? if it exists
    if (req.url ~ "\?$") {
        set req.url = regsub(req.url, "\?$", "");
    }

    # Normalize the query arguments
    # set req.url = std.querysort(req.url);

    # Remove the backend cache parameter for pagination
    if (req.url ~ "(\&|\?)_=[0-9]+$") {
        set req.url = regsub(req.url, "(\&|\?)_=[0-9]+$", "");
    }

    # Cache static files
    if (req.url ~ "^[^?]*\.(7z|avi|bmp|bz2|css|csv|doc|docx|eot|flac|flv|gif|gz|ico|jpeg|jpg|js|less|mka|mkv|mov|mp3|mp4|mpeg|mpg|odt|otf|ogg|ogm|opus|pdf|png|ppt|pptx|rar|rtf|svg|svgz|swf|tar|tbz|tgz|ttf|txt|txz|wav|webm|webp|woff|woff2|xls|xlsx|xml|xz|zip)(\?.*)?$") {
        unset req.http.Cookie;
        return (hash);
    }

    # Mobile Detection set req.http.X-UA-Device
    call devicedetect;

    # Normalize accept-encoding
    if (req.http.Accept-Encoding) {
        if (req.http.Accept-Encoding ~ "gzip") {
            set req.http.Accept-Encoding = "gzip";
        } elsif (req.http.Accept-Encoding ~ "deflate") {
            set req.http.Accept-Encoding = "deflate";
        } else {
            unset req.http.Accept-Encoding;
        }
    }

    # Send Surrogate-Capability headers to announce ESI support to backend
    set req.http.Surrogate-Capability = "abc=ESI/1.0";

    if (req.http.cookie ~ "id_devise=") {
        set req.http.Devise = regsuball(req.http.cookie, "(.*?)(id_devise=[^;]*)(.*)$", "\2");
    }

    if (req.http.cookie ~ "id_mesure=") {
        set req.http.Mesure = regsuball(req.http.cookie, "(.*?)(id_mesure=[^;]*)(.*)$", "\2");
    }

    # Cache the rest
    return (hash);
}

sub vcl_backend_response {
    if (bereq.http.X-UA-Device) {
        if (!beresp.http.Vary) { # no Vary at all
            set beresp.http.Vary = "X-UA-Device";
        } elsif (beresp.http.Vary !~ "X-UA-Device") { # add to existing Vary
            set beresp.http.Vary = beresp.http.Vary + ", X-UA-Device";
        }
    }

    if (bereq.url ~ "/esi/" || bereq.url ~ "checkout" || bereq.url ~ "/fr/(client|personnaliser)" || bereq.url ~ "/en/(customer|personalize)") {
        unset beresp.http.Surrogate-Control;
        set beresp.ttl = 0s;
    } else {
        set beresp.do_esi = true;
        set beresp.ttl = 10m;
    }

    set beresp.grace = 6h;

    return (deliver);
}

sub vcl_backend_error {
    if (beresp.status == 503) {
        set beresp.http.Content-Type = "text/html; charset=utf-8";
        synthetic(std.fileread("/home/domain/www/503.html"));
        return(deliver);
    }
    if (beresp.status == 500) {
        set beresp.http.Content-Type = "text/html; charset=utf-8";
        synthetic(std.fileread("/home/domain/www/500.html"));
        return(deliver);
    }
}

sub vcl_hash {
    hash_data(req.url);

    if (req.http.host) {
        hash_data(req.http.host);
    } else {
        hash_data(server.ip);
    }

    if (req.http.Devise) {
        hash_data(req.http.Devise);
    }

    if (req.http.Mesure) {
        hash_data(req.http.Mesure);
    }

    if (req.http.X-Requested-With) {
        hash_data(req.http.X-Requested-With);
    }

    hash_data(req.http.cookie);

    return (lookup);
}

sub vcl_hit {
    if (obj.ttl == 0s) {
        return (fetch);
    } else if (obj.ttl >= 0s) {
        return (deliver);
    }

    if (!std.healthy(req.backend_hint)) {
        # backend is sick - use full grace
        if (obj.ttl + obj.grace > 0s) {
            return (deliver);
        }
    }

    return (fetch);
}

sub vcl_deliver {
    if (obj.hits > 0) {
        set resp.http.X-Cache = "HIT";
    } else {
        set resp.http.X-Cache = "MISS";
    }

    if ((req.http.X-UA-Device) && (resp.http.Vary)) {
        set resp.http.Vary = regsub(resp.http.Vary, "X-UA-Device", "User-Agent");
    }

    set resp.http.X-Cache-Hits = obj.hits;

    unset resp.http.X-Powered-By;
    unset resp.http.Server;
    unset resp.http.X-Drupal-Cache;
    unset resp.http.X-Varnish;
    unset resp.http.Via;
    unset resp.http.Link;
    unset resp.http.X-Generator;

    return (deliver);
}

sub vcl_pipe {
    set bereq.http.Connection = "Close";

    return (pipe);
}

sub vcl_pass {
    return (fetch);
}

sub vcl_purge {
    # Only handle actual PURGE HTTP methods, everything else is discarded
    if (req.method != "PURGE") {
        # restart request
        set req.http.X-Purge = "Yes";
        return(restart);
    }
}

sub vcl_synth {
    if (resp.status == 720) {
        # We use this special error status 720 to force redirects with 301 (permanent) redirects
        # To use this, call the following from anywhere in vcl_recv: return (synth(720, "http://host/new.html"));
        set resp.http.Location = resp.reason;
        set resp.status = 301;
        return (deliver);
    } elseif (resp.status == 721) {
        # And we use error status 721 to force redirects with a 302 (temporary) redirect
        # To use this, call the following from anywhere in vcl_recv: return (synth(720, "http://host/new.html"));
        set resp.http.Location = resp.reason;
        set resp.status = 302;
        return (deliver);
    }

    return (deliver);
}
1

There are 1 best solutions below

1
On

1) By default Varnish does not cache any responses which set a cookie:

sub vcl_backend_response {
    if (beresp.ttl <= 0s ||
      beresp.http.Set-Cookie ||
      beresp.http.Surrogate-control ~ "no-store" ||
      (!beresp.http.Surrogate-Control &&
        beresp.http.Cache-Control ~ "no-cache|no-store|private") ||
      beresp.http.Vary == "*") {
        /*
        * Mark as "Hit-For-Pass" for the next 2 minutes
        */
        set beresp.ttl = 120s;
        set beresp.uncacheable = true;
    }
    return (deliver);
}

See https://github.com/varnish/Varnish-Cache/blob/4.1/bin/varnishd/builtin.vcl

In your "sub vcl_backend_response" you always "return (deliver);". This means that the Varnish builtin VCL is never executed. Just remove "return (deliver);" from your own "sub vcl_backend_response" and things should get better.

2) Set-Cookie is a respose header and not a request header so this does not make sense to me

# Do not cache when Set-Cookie contain user[id] OR identity OR PHPSESSID OR PHPSERVERID
if (req.http.Set-Cookie ~ "(user\[id\]|identity|SERVERID)") {
    return (pass);
}

3) In general you should not always call "return" in your own versions of "vcl_*" as it will always prevent the builtin VCL (with sensible default behaviour) from being executed. For example you should not call "return (hash);" at the end of "vcl_receive". If you did not do this you would not need stuff like

if (req.method != "GET" && req.method != "HEAD") {
    return (pass);
}

if (req.http.Authorization) {
    return (pass);
}

in your "vcl_receive" because this would also be handled in the builtin version.

4) Also your "sub vcl_hash" looks very dodgy to me. I would never hash on cookies like you do:

hash_data(req.http.cookie);

By default Varnish does not cache any request which has a Cookie - also see the builtin.vcl

if (req.http.Authorization || req.http.Cookie) {
        /* Not cacheable by default */
        return (pass);
}

That is why normally you remove any Cookies in your own "vcl_receive" for requests you want Varnish to cache, like you do for static files.

So yes - it is a bit of a mess.