ELK 系列八、全链路监控

时间:2024-03-19 17:29:08

一、简介

写这篇文章的出发点是总结这一段时间工作之全链路监控实施过程的重点配置,整体架构和具体细节都由本人完成,以下的配置的是研究时候搭建的,本篇侧重数据采集和入库和查询展现。全链路有很多方式,我这边是以网关为切路点进行监控,具体的设计流程如下:

其中lbid为负载均衡节点序号,rid为requestid,其中有很多技术细节这边就不记录了,因为没那么多时间,本文章主要记录

数据源:nginx和kong的配置

采集器:filebeat和logstash的配置

数据展现:kibana

ELK 系列八、全链路监控

二、数据源配置

2.1 nginx的配置如下:

vim /usr/local/openresty/nginx/conf/nginx.conf,其中重点是

log_format elk '[$time_iso8601] $msec "$http_host" $remote_addr "$server_addr:$server_port" $status $request_time $bytes_sent "$server_protocol" "$request_method" "$request_uri" "$http_referer" "$request_body" "$upstream_addr" $upstream_response_time "$http_user_agent" "$http_x_forwarded_for" "$request_id" "$http_rid" "$http_qc_lb_id"';

proxy_set_header rid  $uuid;


#user  nobody;
worker_processes  1;

error_log  logs/error.log debug;
#error_log  logs/error.log  notice;
#error_log  logs/error.log  info;

pid        logs/nginx.pid;

events {
    worker_connections  102400;
}


http {
    include       mime.types;
    default_type  application/octet-stream;
    server_names_hash_bucket_size 64; 

    # log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
    #                   '$status $body_bytes_sent "$http_referer" '
    #                   '"$http_user_agent" "$http_x_forwarded_for"';

    log_format  elk '[$time_iso8601] $msec "$http_host" $remote_addr "$server_addr:$server_port" $status $request_time $bytes_sent "$server_protocol" "$request_method" "$request_uri" "$http_referer" "$request_body" "$upstream_addr" $upstream_response_time "$http_user_agent" "$http_x_forwarded_for" "$request_id" "$http_rid" "$http_qc_lb_id"';    
    # access_log  logs/access.log  main;
    access_log  logs/access_elk.log  elk;

    sendfile        on;
    #tcp_nopush     on;

    #keepalive_timeout  0;
    keepalive_timeout  65;
    gzip  on;
    underscores_in_headers on; #wuwei
    lua_code_cache on; #wuwei


    server {
        listen 80;
        charset utf-8;
        default_type  text/html;

        server_name dev-qll.otosaas.com;

        location ^~ / {
            proxy_redirect   off;
            proxy_set_header Host            $host;
            proxy_set_header X-Real-Ip       $remote_addr;
            proxy_set_header X-Forwarded-for $remote_addr;
	
        if ( $http_rid = '' ){
                set $uuid $request_id;
        }

        if ( $http_rid != '' ){
            set $uuid $http_rid;
        }

        proxy_set_header rid  $uuid;

        proxy_pass http://127.0.0.1:8080;
        add_header  'Access-Control-Allow-Origin' '*';
	    add_header  'Access-Control-Allow-Cerdentials' 'true';
	    add_header  'Access-Control-Allow-Methods' 'OPTIONS,POST,GET';
        add_header  'Access-Control-Allow-Headers' 'rid';
        }
    }
}

ELK 系列八、全链路监控

使修改的配置生效

nginx -s reload

2.2 kong的配置如下:

vim /usr/local/share/lua/5.1/kong/templates/nginx_kong.lua

因为kong的配置比较大,这边就不全部贴出来,只贴需要更新的

 全部配置如下:

 

return [[
charset UTF-8;

> if anonymous_reports then
${{SYSLOG_REPORTS}}
> end

log_format  main  '$remote_addr - $remote_user [$time_local] "$http_host" "$request" '
                  '$status $body_bytes_sent "$http_referer" "$http_user_agent" $request_time';

log_format  elk '[$time_iso8601] $msec "$http_host" $remote_addr "$server_addr:$server_port" $status $request_time $bytes_sent "$server_protocol" "$request_method" "$request_uri" "$http_referer" "$request_body" "$upstream_addr" $upstream_response_time "$http_user_agent" "$http_x_forwarded_for" "$request_id" "$http_rid" "$http_qc_lb_id"';

error_log ${{PROXY_ERROR_LOG}} ${{LOG_LEVEL}};

> if nginx_optimizations then
>-- send_timeout 60s;          # default value
>-- keepalive_timeout 75s;     # default value
>-- client_body_timeout 60s;   # default value
>-- client_header_timeout 60s; # default value
>-- tcp_nopush on;             # disabled until benchmarked
>-- proxy_buffer_size 128k;    # disabled until benchmarked
>-- proxy_buffers 4 256k;      # disabled until benchmarked
>-- proxy_busy_buffers_size 256k; # disabled until benchmarked
>-- reset_timedout_connection on; # disabled until benchmarked
> end

client_max_body_size 0;
proxy_ssl_server_name on;
underscores_in_headers on;

real_ip_header X-Forwarded-For;
set_real_ip_from 0.0.0.0/0;
real_ip_recursive on;

lua_package_path '${{LUA_PACKAGE_PATH}};;';
lua_package_cpath '${{LUA_PACKAGE_CPATH}};;';
lua_code_cache ${{LUA_CODE_CACHE}};
lua_socket_pool_size ${{LUA_SOCKET_POOL_SIZE}};
lua_max_running_timers 4096;
lua_max_pending_timers 16384;
lua_shared_dict kong 4m;
lua_shared_dict cache ${{MEM_CACHE_SIZE}};
lua_shared_dict cache_locks 100k;
lua_shared_dict process_events 1m;
lua_shared_dict cassandra 5m;
lua_socket_log_errors off;
> if lua_ssl_trusted_certificate then
lua_ssl_trusted_certificate '${{LUA_SSL_TRUSTED_CERTIFICATE}}';
lua_ssl_verify_depth ${{LUA_SSL_VERIFY_DEPTH}};
> end

init_by_lua_block {
    require 'luarocks.loader'
    require 'resty.core'
    kong = require 'kong'
    kong.init()
}

init_worker_by_lua_block {
    kong.init_worker()
}

proxy_next_upstream_tries 999;

upstream kong_upstream {
    server 0.0.0.1;
    balancer_by_lua_block {
        kong.balancer()
    }
    keepalive ${{UPSTREAM_KEEPALIVE}};
}

map $http_upgrade $upstream_connection {
    default keep-alive;
    websocket upgrade;
}

map $http_upgrade $upstream_upgrade {
    default '';
    websocket websocket;
}

server {
    server_name kong;
    listen ${{PROXY_LISTEN}};
    error_page 404 408 411 412 413 414 417 /kong_error_handler;
    error_page 500 502 503 504 /kong_error_handler;

    access_log ${{PROXY_ACCESS_LOG}} main;
    access_log logs/access_elk.log elk;
    error_log ${{PROXY_ERROR_LOG}} ${{LOG_LEVEL}};


> if ssl then
    listen ${{PROXY_LISTEN_SSL}} ssl;
    ssl_certificate ${{SSL_CERT}};
    ssl_certificate_key ${{SSL_CERT_KEY}};
    ssl_protocols TLSv1.1 TLSv1.2;
    ssl_certificate_by_lua_block {
        kong.ssl_certificate()
    }

    ssl_session_cache shared:SSL:10m;
    ssl_session_timeout 10m;
    ssl_prefer_server_ciphers on;
    ssl_ciphers ${{SSL_CIPHERS}};
> end

> if client_ssl then
    proxy_ssl_certificate ${{CLIENT_SSL_CERT}};
    proxy_ssl_certificate_key ${{CLIENT_SSL_CERT_KEY}};
> end

    location / {
        set $upstream_host nil;
        set $upstream_scheme nil;

        rewrite_by_lua_block {
            kong.rewrite()
        }

        access_by_lua_block {
            kong.access()
        }

        proxy_http_version 1.1;
        proxy_set_header X-Real-IP $remote_addr;
        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
        proxy_set_header X-Forwarded-Proto $scheme;
        proxy_set_header Host $upstream_host;
        proxy_set_header Upgrade $upstream_upgrade;
        proxy_set_header Connection $upstream_connection;
        proxy_pass_header Server;
       
	if ( $http_rid = '' ){
            set $uuid $request_id;
        }

        if ( $http_rid != '' ){
            set $uuid $http_rid;
        }

        proxy_set_header rid  $uuid; 
        proxy_ssl_name $upstream_host;

        proxy_pass $upstream_scheme://kong_upstream;

        header_filter_by_lua_block {
            kong.header_filter()
        }

        body_filter_by_lua_block {
            kong.body_filter()
        }

        log_by_lua_block {
            kong.log()
        }

    }

    location = /kong_error_handler {
        internal;
        content_by_lua_block {
            require('kong.core.error_handlers')(ngx)
        }
    }
}

server {
    server_name kong_admin;
    listen ${{ADMIN_LISTEN}};

    access_log ${{ADMIN_ACCESS_LOG}};
    error_log ${{ADMIN_ERROR_LOG}} ${{LOG_LEVEL}};

    client_max_body_size 10m;
    client_body_buffer_size 10m;

> if admin_ssl then
    listen ${{ADMIN_LISTEN_SSL}} ssl;
    ssl_certificate ${{ADMIN_SSL_CERT}};
    ssl_certificate_key ${{ADMIN_SSL_CERT_KEY}};
    ssl_protocols TLSv1.1 TLSv1.2;

    ssl_session_cache shared:SSL:10m;
    ssl_session_timeout 10m;
    ssl_prefer_server_ciphers on;
    ssl_ciphers ${{SSL_CIPHERS}};
> end

    location / {
        default_type application/json;
        content_by_lua_block {
            ngx.header['Access-Control-Allow-Origin'] = '*'

            if ngx.req.get_method() == 'OPTIONS' then
                ngx.header['Access-Control-Allow-Methods'] = 'GET,HEAD,PUT,PATCH,POST,DELETE'
                ngx.header['Access-Control-Allow-Headers'] = 'Content-Type'
                ngx.exit(204)
            end

            require('lapis').serve('kong.api')
        }
    }

    location /nginx_status {
        internal;
        access_log off;
        stub_status;
    }

    location /robots.txt {
        return 200 'User-agent: *\nDisallow: /';
    }
}
]]

下面配置为修改配置:

 

ELK 系列八、全链路监控ELK 系列八、全链路监控

ELK 系列八、全链路监控

ELK 系列八、全链路监控

 

使修改的配置生效

kong restart

三、采集器设置

3.1 filebeat设置如下

 vim /data/filebeat-6.5.3-linux-x86_64/filebeat.yml (每个人的目录都可能不一样)


filebeat.inputs:
- type: log

  enabled: true

  paths:
     - /usr/local/kong/logs/access_elk.log
  fields_under_root: true
  fields:
     logtype: otosaas_qlllog

filebeat.config.modules:
  path: ${path.config}/modules.d/*.yml
  reload.enabled: false
setup.template.settings:
  index.number_of_shards: 3

setup.kibana:

output.logstash:
  # The Logstash hosts
  hosts: ["192.168.0.15:5044"]

processors:
  - add_host_metadata: ~
  - add_cloud_metadata: ~

使配置生效,我这边是使用守护进程重启的,具体守护进程的安装方式详见

https://blog.csdn.net/wu2700222/article/details/85044117

supervisorctl restart  filebeat

3.2 logstash 的设置如下

vim /data/filebeat-6.5.3-linux-x86_64/filebeat.yml

其中grok就是解析之前网关中的日志,mutate是对字段类型的转化,如果不转化默认都是字符串

# 监听5044端口作为输入
input {
    beats {
        port => "5044"
    }
}
# 数据过滤
filter {
  #全链路日志
  if [logtype] == "otosaas_qlllog" {
    grok {
        match => { "message" => '\[%{TIMESTAMP_ISO8601:timestamp}\] %{NUMBER:mesc} %{DATA:httphost} %{IPORHOST:client_ip} \"%{DATA:server_ip}\" %{NUMBER:request_status} %{BASE16FLOAT:request_time} %{NUMBER:bytes_sent} \"(?:HTTP/%{NUMBER:httpversion})\" \"%{WORD:request_method}\" \"%{DATA:request_uri}\" \"%{DATA:http_referer}\" \"(%{DATA:request_body}?|-)\" \"%{DATA:upstream_ip}\" (?:%{NUMBER:upstream_time}|-) %{QS:agent} %{QS:referrer} \"%{WORD:request_id}\" \"%{DATA:http_rid}\" \"%{DATA:http_lbid}\"' }
    }
    mutate{
        convert => {
         "request_time" => "float"
         "request_status" => "integer"
         "bytes_sent" => "integer"
         "httpversion" => "float"
        }
    }
    geoip {
        source => "clientip"
    }
  }
}
# 输出配置为本机的9200端口,这是ElasticSerach服务的监听端口
output {
  if [logtype] == "otosaas_qlllog" {
    elasticsearch {
        user => "elastic"
        password => "es的密码"
        hosts => ["127.0.0.1:9200"]
        index => "otosaas_qlllog-%{+YYYY.MM.dd}"
    }
  }
}    

使配置生效,我这边是使用守护进程重启的,具体守护进程的安装方式详见

https://blog.csdn.net/wu2700222/article/details/85044117

supervisorctl restart  logstash

 

四、数据展现

进入kibana的web界面,进入系统管理建立索引

测试,自己写了一个python的程序,进行全链路的测试,调用接口都是通过kong,

源码在github上有兴趣的朋友可以看看,写的很简单,当初为了快速测试,很多东西没有配置化

https://github.com/wu2700222/djangotest/tree/dev/djangotest

ELK 系列八、全链路监控

ELK 系列八、全链路监控

然后我们进入kibana搜索以下rid=f20fdb3cef9c4a7286befd669f2af071

ELK 系列八、全链路监控

ok,大公告成