From d7e59113d5ac8740bc48a784bf186cada6978813 Mon Sep 17 00:00:00 2001 From: Yuelin Zheng <52862365+Firstsawyou@users.noreply.github.com> Date: Fri, 27 Nov 2020 18:33:02 +0800 Subject: [PATCH] feat: How to distinguish whether the 5xx status code (eg 500) comes from upstream or apisix (#2817) close #2501 --- apisix/init.lua | 20 ++ doc/README.md | 1 + doc/debug-function.md | 151 +++++++++++++ doc/zh-cn/README.md | 1 + doc/zh-cn/debug-function.md | 151 +++++++++++++ t/lib/server.lua | 5 + t/node/upstream-status-5xx.t | 419 +++++++++++++++++++++++++++++++++++ 7 files changed, 748 insertions(+) create mode 100644 doc/debug-function.md create mode 100644 doc/zh-cn/debug-function.md create mode 100644 t/node/upstream-status-5xx.t diff --git a/apisix/init.lua b/apisix/init.lua index 629e7507226d..7333dd14936f 100644 --- a/apisix/init.lua +++ b/apisix/init.lua @@ -36,6 +36,7 @@ local type = type local ngx_now = ngx.now local str_byte = string.byte local str_sub = string.sub +local tonumber = tonumber local load_balancer local local_conf local dns_resolver @@ -626,9 +627,28 @@ local function common_phase(phase_name) end +local function set_resp_upstream_status(up_status) + core.response.set_header("X-APISIX-Upstream-Status", up_status) + core.log.info("X-APISIX-Upstream-Status: ", up_status) +end + + function _M.http_header_filter_phase() core.response.set_header("Server", ver_header) + local up_status = get_var("upstream_status") + if up_status and #up_status == 3 + and tonumber(up_status) >= 500 + and tonumber(up_status) <= 599 + then + set_resp_upstream_status(up_status) + elseif up_status and #up_status > 3 then + local last_status = str_sub(up_status, -3) + if tonumber(last_status) >= 500 and tonumber(last_status) <= 599 then + set_resp_upstream_status(up_status) + end + end + common_phase("header_filter") end diff --git a/doc/README.md b/doc/README.md index f2ea095a451f..00226c84ae39 100644 --- a/doc/README.md +++ b/doc/README.md @@ -35,6 +35,7 @@ * [Changelog](../CHANGELOG.md) * [Benchmark](benchmark.md) * [Code Style](../CODE_STYLE.md) +* [Debug Function](./debug-function.md) * [FAQ](../FAQ.md) ## Plugins diff --git a/doc/debug-function.md b/doc/debug-function.md new file mode 100644 index 000000000000..d292e547e5a6 --- /dev/null +++ b/doc/debug-function.md @@ -0,0 +1,151 @@ + + +[Chinese](zh-cn/debug-function.md) + +## `5xx` response status code + +Similar `5xx` status codes such as 500, 502, 503, etc., are the status codes in response to a server error. When a request has a `5xx` status code; it may come from `APISIX` or `Upstream`. How to identify the source of these response status codes is a very meaningful thing. It can quickly help us determine the problem. + +## How to identify the source of the `5xx` response status code + +In the response header of the request, through the response header of `X-APISIX-Upstream-Status`, we can effectively identify the source of the `5xx` status code. When the `5xx` status code comes from `Upstream`, the response header `X-APISIX-Upstream-Status` can be seen in the response header, and the value of this response header is the response status code. When the `5xx` status code is derived from `APISIX`, there is no response header information of `X-APISIX-Upstream-Status` in the response header. That is, only when the status code of `5xx` is derived from Upstream will the `X-APISIX-Upstream-Status` response header appear. + +## Example + +>Example 1: `502` response status code comes from `Upstream` (IP address is not available) + +```shell +$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "methods": ["GET"], + "upstream": { + "nodes": { + "127.0.0.1:1": 1 + }, + "type": "roundrobin" + }, + "uri": "/hello" +}' +``` + +Test: + +```shell +$ curl http://127.0.0.1:9080/hello -v +...... +< HTTP/1.1 502 Bad Gateway +< Date: Wed, 25 Nov 2020 14:40:22 GMT +< Content-Type: text/html; charset=utf-8 +< Content-Length: 154 +< Connection: keep-alive +< Server: APISIX/2.0 +< X-APISIX-Upstream-Status: 502 +< + +502 Bad Gateway + +

502 Bad Gateway

+
openresty
+ + + +``` + +It has a response header of `X-APISIX-Upstream-Status: 502`. + +>Example 2: `502` response status code comes from `APISIX` + +```shell +$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "plugins": { + "fault-injection": { + "abort": { + "http_status": 500, + "body": "Fault Injection!\n" + } + } + }, + "uri": "/hello" +}' +``` + +Test: + +```shell +$ curl http://127.0.0.1:9080/hello -v +...... +< HTTP/1.1 500 Internal Server Error +< Date: Wed, 25 Nov 2020 14:50:20 GMT +< Content-Type: text/plain; charset=utf-8 +< Transfer-Encoding: chunked +< Connection: keep-alive +< Server: APISIX/2.0 +< +Fault Injection! +``` + +There is no response header for `X-APISIX-Upstream-Status`. + +>Example 3: `Upstream` has multiple nodes, and all nodes are unavailable + +```shell +$ curl http://127.0.0.1:9080/apisix/admin/upstreams/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "nodes": { + "127.0.0.3:1": 1, + "127.0.0.2:1": 1, + "127.0.0.1:1": 1 + }, + "retries": 2, + "type": "roundrobin" +}' +``` + +```shell +$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "uri": "/hello", + "upstream_id": "1" +}' +``` + +Test: + +```shell +$ curl http://127.0.0.1:9080/hello -v +< HTTP/1.1 502 Bad Gateway +< Date: Wed, 25 Nov 2020 15:07:34 GMT +< Content-Type: text/html; charset=utf-8 +< Content-Length: 154 +< Connection: keep-alive +< Server: APISIX/2.0 +< X-APISIX-Upstream-Status: 502, 502, 502 +< + +502 Bad Gateway + +

502 Bad Gateway

+
openresty
+ + +``` + +It has a response header of `X-APISIX-Upstream-Status: 502, 502, 502`. diff --git a/doc/zh-cn/README.md b/doc/zh-cn/README.md index 5d83b0bdec5d..53250ab9a4b6 100644 --- a/doc/zh-cn/README.md +++ b/doc/zh-cn/README.md @@ -34,6 +34,7 @@ * [变更日志](../../CHANGELOG_CN.md) * [压力测试](benchmark.md) * [代码风格](../../CODE_STYLE_CN.md) +* [调试功能](./debug-function.md) * [常见问答](../../FAQ_CN.md) ## 插件 diff --git a/doc/zh-cn/debug-function.md b/doc/zh-cn/debug-function.md new file mode 100644 index 000000000000..b096c03c6d2c --- /dev/null +++ b/doc/zh-cn/debug-function.md @@ -0,0 +1,151 @@ + + +[English](../debug-function.md) + +## `5xx` 响应状态码 + +500、502、503等类似的 `5xx` 状态码,是由于服务器错误而响应的状态码,当一个请求出现 `5xx` 状态码时;它可能来源于 `APISIX` 或 `Upstream` 。如何识别这些响应状态码的来源,是一件很有意义的事,它能够快速的帮助我们确定问题的所在。 + +## 如何识别 `5xx` 响应状态码的来源 + +在请求的响应头中,通过 `X-APISIX-Upstream-Status` 这个响应头,我们可以有效的识别 `5xx` 状态码的来源。当 `5xx` 状态码来源于 `Upstream` 时,在响应头中可以看到 `X-APISIX-Upstream-Status` 这个响应头,并且这个响应头的值为响应的状态码。当 `5xx` 状态码来源于 `APISIX` 时,响应头中没有 `X-APISIX-Upstream-Status` 的响应头信息。也就是只有 `5xx` 状态码来源于 `Upstream` 时,才会有 `X-APISIX-Upstream-Status` 响应头。 + +## 示例 + +示例1:`502` 响应状态码来源于 `Upstream` (IP地址不可用) + +```shell +$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "methods": ["GET"], + "upstream": { + "nodes": { + "127.0.0.1:1": 1 + }, + "type": "roundrobin" + }, + "uri": "/hello" +}' +``` + +测试: + +```shell +$ curl http://127.0.0.1:9080/hello -v +...... +< HTTP/1.1 502 Bad Gateway +< Date: Wed, 25 Nov 2020 14:40:22 GMT +< Content-Type: text/html; charset=utf-8 +< Content-Length: 154 +< Connection: keep-alive +< Server: APISIX/2.0 +< X-APISIX-Upstream-Status: 502 +< + +502 Bad Gateway + +

502 Bad Gateway

+
openresty
+ + + +``` + +具有 `X-APISIX-Upstream-Status: 502` 的响应头。 + +示例2: `502` 响应状态码来源于 `APISIX` + +```shell +$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "plugins": { + "fault-injection": { + "abort": { + "http_status": 500, + "body": "Fault Injection!\n" + } + } + }, + "uri": "/hello" +}' +``` + +测试: + +```shell +$ curl http://127.0.0.1:9080/hello -v +...... +< HTTP/1.1 500 Internal Server Error +< Date: Wed, 25 Nov 2020 14:50:20 GMT +< Content-Type: text/plain; charset=utf-8 +< Transfer-Encoding: chunked +< Connection: keep-alive +< Server: APISIX/2.0 +< +Fault Injection! +``` + +没有 `X-APISIX-Upstream-Status` 的响应头。 + +示例3:`Upstream` 具有多节点,并且所有节点不可用 + +```shell +$ curl http://127.0.0.1:9080/apisix/admin/upstreams/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "nodes": { + "127.0.0.3:1": 1, + "127.0.0.2:1": 1, + "127.0.0.1:1": 1 + }, + "retries": 2, + "type": "roundrobin" +}' +``` + +```shell +$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d ' +{ + "uri": "/hello", + "upstream_id": "1" +}' +``` + +测试: + +```shell +$ curl http://127.0.0.1:9080/hello -v +< HTTP/1.1 502 Bad Gateway +< Date: Wed, 25 Nov 2020 15:07:34 GMT +< Content-Type: text/html; charset=utf-8 +< Content-Length: 154 +< Connection: keep-alive +< Server: APISIX/2.0 +< X-APISIX-Upstream-Status: 502, 502, 502 +< + +502 Bad Gateway + +

502 Bad Gateway

+
openresty
+ + +``` + +具有 `X-APISIX-Upstream-Status: 502, 502, 502` 的响应头。 diff --git a/t/lib/server.lua b/t/lib/server.lua index 57ea46a087f5..f3ca00f5b5a6 100644 --- a/t/lib/server.lua +++ b/t/lib/server.lua @@ -365,4 +365,9 @@ function _M.log() end +function _M.server_error() + error("500 Internal Server Error") +end + + return _M diff --git a/t/node/upstream-status-5xx.t b/t/node/upstream-status-5xx.t new file mode 100644 index 000000000000..7f42bec98745 --- /dev/null +++ b/t/node/upstream-status-5xx.t @@ -0,0 +1,419 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +use t::APISIX 'no_plan'; + +repeat_each(1); +log_level('info'); +worker_connections(256); +no_root_location(); +no_shuffle(); + +run_tests(); + +__DATA__ + +=== TEST 1: set route(id: 1) and available upstream +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "methods": ["GET"], + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/hello" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 2: hit the route and $upstream_status is 200 +--- request +GET /hello +--- response_body +hello world +--- no_error_log +[error] +--- grep_error_log eval +qr/X-APISIX-Upstream-Status: 200/ +--- grep_error_log_out + + + +=== TEST 3: set route(id: 1) and set the timeout field +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "methods": ["GET"], + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin", + "timeout": { + "connect": 0.5, + "send": 0.5, + "read": 0.5 + } + }, + "uri": "/sleep1" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 4: hit routes (timeout) and $upstream_status is 504 +--- request +GET /sleep1 +--- error_code: 504 +--- response_body eval +qr/504 Gateway Time-out/ +--- error_log +X-APISIX-Upstream-Status: 504 + + + +=== TEST 5: set route(id: 1), upstream service is not available +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "methods": ["GET"], + "upstream": { + "nodes": { + "127.0.0.1:1": 1 + }, + "type": "roundrobin" + }, + "uri": "/hello" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 6: hit routes and $upstream_status is 502 +--- request +GET /hello +--- error_code: 502 +--- response_body eval +qr/502 Bad Gateway/ +--- error_log +X-APISIX-Upstream-Status: 502 + + + +=== TEST 7: set route(id: 1) and uri is `/server_error` +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "methods": ["GET"], + "upstream": { + "nodes": { + "127.0.0.1:1980": 1 + }, + "type": "roundrobin" + }, + "uri": "/server_error" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 8: hit routes and $upstream_status is 500 +--- request +GET /server_error +--- error_code: 500 +--- response_body eval +qr/500 Internal Server Error/ +--- error_log +X-APISIX-Upstream-Status: 500 + + + +=== TEST 9: set upstream(id: 1, retries = 2), has available upstream +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/upstreams/1', + ngx.HTTP_PUT, + [[{ + "nodes": { + "127.0.0.2:1": 1, + "127.0.0.1:1980": 1 + }, + "retries": 2, + "type": "roundrobin" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 10: set route(id: 1) and bind the upstream_id +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "uri": "/hello", + "upstream_id": "1" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 11: hit routes and $upstream_status is `502, 200` +--- request +GET /hello +--- response_body +hello world +--- grep_error_log eval +qr/X-APISIX-Upstream-Status: 502, 200/ +--- grep_error_log_out + + + +=== TEST 12: set upstream(id: 1, retries = 2), all upstream nodes are unavailable +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/upstreams/1', + ngx.HTTP_PUT, + [[{ + "nodes": { + "127.0.0.3:1": 1, + "127.0.0.2:1": 1, + "127.0.0.1:1": 1 + + }, + "retries": 2, + "type": "roundrobin" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 13: hit routes, retry between upstream failed, $upstream_status is `502, 502, 502` +--- request +GET /hello +--- error_code: 502 +--- error_log eval +qr/X-APISIX-Upstream-Status: 502, 502, 502/ + + + +=== TEST 14: return 500 status code from APISIX +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "plugins": { + "fault-injection": { + "abort": { + "http_status": 500, + "body": "Fault Injection!\n" + } + } + }, + "uri": "/hello" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 15: hit routes, status code is 500 +--- request +GET /hello +--- error_code: 500 +--- response_body +Fault Injection! +--- grep_error_log eval +qr/X-APISIX-Upstream-Status: 500/ +--- grep_error_log_out + + + +=== TEST 16: return 200 status code from APISIX +--- config + location /t { + content_by_lua_block { + local t = require("lib.test_admin").test + local code, body = t('/apisix/admin/routes/1', + ngx.HTTP_PUT, + [[{ + "plugins": { + "fault-injection": { + "abort": { + "http_status": 200, + "body": "Fault Injection!\n" + } + } + }, + "uri": "/hello" + }]] + ) + + if code >= 300 then + ngx.status = code + end + ngx.say(body) + } + } +--- request +GET /t +--- response_body +passed +--- no_error_log +[error] + + + +=== TEST 17: hit routes, status code is 200 +--- request +GET /hello +--- response_body +Fault Injection! +--- grep_error_log eval +qr/X-APISIX-Upstream-Status: 200/ +--- grep_error_log_out