高可扩展性、高性能、大数据量、高并发、分布式的系统架构---varnish浅谈 weir 2015-06-16 13:41:37.0 java,分布式 2858 Varnish对于互联网公司又不陌生,我们都知道作为计算机读取速度最快的那个东西是什么呀,有人会说内存有人会说CPU寄存器或缓存,但是我们最能利用空间大的还是内存,如果你想让你的系统运算速度快首选的就是内存。我亲身经历这样一个环境就是把数据库安装到内存里面,你可以想象这是什么结果速度提升那不是几十倍而是几百倍几千倍,还有现在比较火热的大数据最快的运算不还是在内存里面么。你可以说我们现在有固态硬盘速度也是杠杠的,但是固态硬盘也有它的致命缺陷我们不做过多分析,总之计算机发展到现在的水平在内存里面读写数据应该是最理想也是运算速度最客观的。Varnish就是这样一个存活在内存里面并极致运用内存的东西,在分布是高性能的架构中varnish所担任的角色就是把静态资源和部分动态页面缓存到内存里面供下次系统调用的这么一个高性能的http web加速器。 Varnish也是基于一样配置文件的所以需要学习各种参数和配置规则,就像nginx这点最讨厌了。Varnish要比nginx麻烦得多,当然这要看你怎么使用它了,如果是仅仅缓存下来静态资源这很简单,甚至一些动态的页面我们也可以单独去做缓存这都不是很复杂。Varnish里面有个命中率的概念,什么意思呢?就是你想要缓存下来的东西varnish真正起到多大作用是不是都向我们预期的那样缓存下来了,这就是命中率。 # This is a basic VCL configuration file for varnish. See the vcl(7) # man page for details on VCL syntax and semantics. # # Default backend definition. Set this to point to your content # server. # import std; backend b1 { .host = "127.0.0.1"; .port = "8080"; ##等待连接后端的时间 .connect_timeout = 1s; ##等待从backend传输过来的第一个字符的时间 .first_byte_timeout = 5s; ##两个字符的间隔时间 .between_bytes_timeout = 2s; .probe = { .url = "/index.jsp"; .timeout = 0.3 s; .window = 8; //要检查后端服务器的次数 .threshold = 3; //.window里面要有多少polls成功就认为后端是健康的 .initial = 3; //当varnish启动的时候,要确保多少个probe正常 } } probe p1 { .url = "/index.jsp"; .timeout = 0.3 s; .window = 8; //要检查后端服务器的次数 .threshold = 3; //.window里面要有多少polls成功就认为后端是健康的 .initial = 3; //当varnish启动的时候,要确保多少个probe正常 } backend b2 { .host = "127.0.0.1"; .port = "9080"; .connect_timeout = 1s; .first_byte_timeout = 5s; .between_bytes_timeout = 2s; ##可以设置连接后端服务器得最大限制数 .max_connections=1000; .probe = p1; } director d1 fallback { { .backend = b1;//引用已经存在的backend } { .backend = b2; } } acl purgeallow { "127.0.0.1"; #"192.168.2.114"; } sub vcl_recv { set req.backend=d1; if(!req.backend.healthy){ set req.grace = 30m; }else{ set req.grace = 5s; } if(req.request == "PURGE") { if(!client.ip ~ purgeallow) { error 405 "not allowed."; } return(lookup); } if (req.request == "GET" && req.url ~ "\.(jpg|png|gif|swf|flv|ico|jpeg)$") { unset req.http.cookie; } if (req.request =="GET" && req.url ~ "(?i)\.jsp($|\?)"){ return (pass); } if (req.restarts == 0) { if (req.http.x-forwarded-for) { set req.http.X-Forwarded-For = req.http.X-Forwarded-For + ", " + client.ip; } else { set req.http.X-Forwarded-For = client.ip; } } if (req.request != "GET" && req.request != "HEAD" && req.request != "PUT" && req.request != "POST" && req.request != "TRACE" && req.request != "OPTIONS" && req.request != "DELETE") { /* Non-RFC2616 or CONNECT which is weird. */ return (pipe); } if (req.request != "GET" && req.request != "HEAD") { /* We only deal with GET and HEAD by default */ return (pass); } if (req.http.Authorization || req.http.Cookie) { /* Not cacheable by default */ return (pass); } return (lookup); } # sub vcl_pipe { # # Note that only the first request to the backend will have # # X-Forwarded-For set. If you use X-Forwarded-For and want to # # have it set for all requests, make sure to have: # # set bereq.http.connection = "close"; # # here. It is not set by default as it might break some broken web # # applications, like IIS with NTLM authentication. return (pipe); } # sub vcl_pass { return (pass); } # sub vcl_hash { hash_data(req.url); if (req.http.host) { hash_data(req.http.host); } else { hash_data(server.ip); } return (hash); } # sub vcl_hit { return (deliver); } # sub vcl_miss { std.log("now miss url===" + req.url); return (fetch); } # sub vcl_fetch { set beresp.grace = 30m; if(beresp.status == 500){ set beresp.saintmode = 20s; return(restart); } if (beresp.ttl <= 0s || beresp.http.Set-Cookie || beresp.http.Vary == "*") { /* * Mark as "Hit-For-Pass" for the next 2 minutes */ set beresp.ttl = 120 s; return (hit_for_pass); } if (beresp.http.Pragma ~ "no-cache" || beresp.http.Cache-Control ~ "no-cache" || beresp.http.Cache-Control ~ "private") { return(deliver); } if(beresp.status == 404 || beresp.status == 300) { error 404; } if (req.request == "GET" && req.url ~ "\.(jpg|png|gif|swf|flv|ico|jpeg)$") { set beresp.ttl = 1m; } if (req.request == "GET" && req.url ~ "\.(htm|html)$") { set beresp.ttl = 1d; } # if (req.url ~ "\.(png|gif|jpg)$") { # unset beresp.http.set-cookie; # set beresp.ttl = 1h; # } return (deliver); } # sub vcl_deliver { if (obj.hits > 0) { set resp.http.X-Cache = "cached"; } else { set resp.http.x-Cache = "uncached"; } # Remove some headers: PHP version unset resp.http.X-Powered-By; # Remove some headers: Apache version & OS unset resp.http.Server; return (deliver); } sub vcl_error { set obj.http.Content-Type = "text/html; charset=utf-8"; set obj.http.Retry-After = "5"; synthetic {"<?xml version="1.0" encoding="utf-8"?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> <html> ccccccccccccccccccccccccc<br> <head> <title>"} + obj.status + " " + obj.response + {"</title> </head> <body> <h1>Error "} + obj.status + " " + obj.response + {"</h1> <p>"} + obj.response + {"</p> <h3>Guru Meditation:</h3> <p>XID: "} + req.xid + {"</p> <hr> <p>Varnish cache server</p> </body> </html> "}; return (deliver); } sub vcl_init { return (ok); } # sub vcl_fini { return (ok); } # # Below is a commented-out copy of the default VCL logic. If you # redefine any of these subroutines, the built-in logic will be # appended to your code. # sub vcl_recv { # if (req.restarts == 0) { # if (req.http.x-forwarded-for) { # set req.http.X-Forwarded-For = # req.http.X-Forwarded-For + ", " + client.ip; # } else { # set req.http.X-Forwarded-For = client.ip; # } # } # if (req.request != "GET" && # req.request != "HEAD" && # req.request != "PUT" && # req.request != "POST" && # req.request != "TRACE" && # req.request != "OPTIONS" && # req.request != "DELETE") { # /* Non-RFC2616 or CONNECT which is weird. */ # return (pipe); # } # if (req.request != "GET" && req.request != "HEAD") { # /* We only deal with GET and HEAD by default */ # return (pass); # } # if (req.http.Authorization || req.http.Cookie) { # /* Not cacheable by default */ # return (pass); # } # return (lookup); # } # # sub vcl_pipe { # # Note that only the first request to the backend will have # # X-Forwarded-For set. If you use X-Forwarded-For and want to # # have it set for all requests, make sure to have: # # set bereq.http.connection = "close"; # # here. It is not set by default as it might break some broken web # # applications, like IIS with NTLM authentication. # return (pipe); # } # # sub vcl_pass { # return (pass); # } # # sub vcl_hash { # hash_data(req.url); # if (req.http.host) { # hash_data(req.http.host); # } else { # hash_data(server.ip); # } # return (hash); # } # # sub vcl_hit { # return (deliver); # } # # sub vcl_miss { # return (fetch); # } # # sub vcl_fetch { # if (beresp.ttl <= 0s || # beresp.http.Set-Cookie || # beresp.http.Vary == "*") { # /* # * Mark as "Hit-For-Pass" for the next 2 minutes # */ # set beresp.ttl = 120 s; # return (hit_for_pass); # } # return (deliver); # } # # sub vcl_deliver { # return (deliver); # } # # sub vcl_error { # set obj.http.Content-Type = "text/html; charset=utf-8"; # set obj.http.Retry-After = "5"; # synthetic {" # <?xml version="1.0" encoding="utf-8"?> # <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" # "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"> # <html> # <head> # <title>"} + obj.status + " " + obj.response + {"</title> # </head> # <body> # <h1>Error "} + obj.status + " " + obj.response + {"</h1> # <p>"} + obj.response + {"</p> # <h3>Guru Meditation:</h3> # <p>XID: "} + req.xid + {"</p> # <hr> # <p>Varnish cache server</p> # </body> # </html> # "}; # return (deliver); # } # # sub vcl_init { # return (ok); # } # # sub vcl_fini { # return (ok); # } 说实话像这样的东西基本就是学习各种配置参数和怎么使用它们,它们的灵活度很高带来的却是我们要付出很大的代价去学会运用它,不想java中的框架一样我们稍微做一下配置就可以发挥它们的专长。从某种意义上来说所谓的分布式高并发高性能说到底就是去学习每个工具怎么使用,而且更多的还是在实践中摸索,这么多配置和规则单单是学习是远远达不到实战的效果的。这也是经验积累的过程,这个过程需要实现去完成,这也是我现在最头疼的事情,没有找到一个好的平台去实践更确切的说是没有人提供给你这样一个环境去实践。 我这里给你说再多的优化规则和怎么配置也没用。还有这些配置中的含义你要不要每一个都消化了,你除了查资料动手去做,没有什么好办法。 我么在这里先做一些浅层次的分析知道这个东西是干什么的,每一个工具的学习都是需要不断的实践过程。