From d7e59113d5ac8740bc48a784bf186cada6978813 Mon Sep 17 00:00:00 2001
From: Yuelin Zheng <52862365+Firstsawyou@users.noreply.github.com>
Date: Fri, 27 Nov 2020 18:33:02 +0800
Subject: [PATCH] feat: How to distinguish whether the 5xx status code (eg 500)
comes from upstream or apisix (#2817)
close #2501
---
apisix/init.lua | 20 ++
doc/README.md | 1 +
doc/debug-function.md | 151 +++++++++++++
doc/zh-cn/README.md | 1 +
doc/zh-cn/debug-function.md | 151 +++++++++++++
t/lib/server.lua | 5 +
t/node/upstream-status-5xx.t | 419 +++++++++++++++++++++++++++++++++++
7 files changed, 748 insertions(+)
create mode 100644 doc/debug-function.md
create mode 100644 doc/zh-cn/debug-function.md
create mode 100644 t/node/upstream-status-5xx.t
diff --git a/apisix/init.lua b/apisix/init.lua
index 629e7507226d..7333dd14936f 100644
--- a/apisix/init.lua
+++ b/apisix/init.lua
@@ -36,6 +36,7 @@ local type = type
local ngx_now = ngx.now
local str_byte = string.byte
local str_sub = string.sub
+local tonumber = tonumber
local load_balancer
local local_conf
local dns_resolver
@@ -626,9 +627,28 @@ local function common_phase(phase_name)
end
+local function set_resp_upstream_status(up_status)
+ core.response.set_header("X-APISIX-Upstream-Status", up_status)
+ core.log.info("X-APISIX-Upstream-Status: ", up_status)
+end
+
+
function _M.http_header_filter_phase()
core.response.set_header("Server", ver_header)
+ local up_status = get_var("upstream_status")
+ if up_status and #up_status == 3
+ and tonumber(up_status) >= 500
+ and tonumber(up_status) <= 599
+ then
+ set_resp_upstream_status(up_status)
+ elseif up_status and #up_status > 3 then
+ local last_status = str_sub(up_status, -3)
+ if tonumber(last_status) >= 500 and tonumber(last_status) <= 599 then
+ set_resp_upstream_status(up_status)
+ end
+ end
+
common_phase("header_filter")
end
diff --git a/doc/README.md b/doc/README.md
index f2ea095a451f..00226c84ae39 100644
--- a/doc/README.md
+++ b/doc/README.md
@@ -35,6 +35,7 @@
* [Changelog](../CHANGELOG.md)
* [Benchmark](benchmark.md)
* [Code Style](../CODE_STYLE.md)
+* [Debug Function](./debug-function.md)
* [FAQ](../FAQ.md)
## Plugins
diff --git a/doc/debug-function.md b/doc/debug-function.md
new file mode 100644
index 000000000000..d292e547e5a6
--- /dev/null
+++ b/doc/debug-function.md
@@ -0,0 +1,151 @@
+
+
+[Chinese](zh-cn/debug-function.md)
+
+## `5xx` response status code
+
+Similar `5xx` status codes such as 500, 502, 503, etc., are the status codes in response to a server error. When a request has a `5xx` status code; it may come from `APISIX` or `Upstream`. How to identify the source of these response status codes is a very meaningful thing. It can quickly help us determine the problem.
+
+## How to identify the source of the `5xx` response status code
+
+In the response header of the request, through the response header of `X-APISIX-Upstream-Status`, we can effectively identify the source of the `5xx` status code. When the `5xx` status code comes from `Upstream`, the response header `X-APISIX-Upstream-Status` can be seen in the response header, and the value of this response header is the response status code. When the `5xx` status code is derived from `APISIX`, there is no response header information of `X-APISIX-Upstream-Status` in the response header. That is, only when the status code of `5xx` is derived from Upstream will the `X-APISIX-Upstream-Status` response header appear.
+
+## Example
+
+>Example 1: `502` response status code comes from `Upstream` (IP address is not available)
+
+```shell
+$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d '
+{
+ "methods": ["GET"],
+ "upstream": {
+ "nodes": {
+ "127.0.0.1:1": 1
+ },
+ "type": "roundrobin"
+ },
+ "uri": "/hello"
+}'
+```
+
+Test:
+
+```shell
+$ curl http://127.0.0.1:9080/hello -v
+......
+< HTTP/1.1 502 Bad Gateway
+< Date: Wed, 25 Nov 2020 14:40:22 GMT
+< Content-Type: text/html; charset=utf-8
+< Content-Length: 154
+< Connection: keep-alive
+< Server: APISIX/2.0
+< X-APISIX-Upstream-Status: 502
+<
+
+
502 Bad Gateway
+
+502 Bad Gateway
+
openresty
+
+
+
+```
+
+It has a response header of `X-APISIX-Upstream-Status: 502`.
+
+>Example 2: `502` response status code comes from `APISIX`
+
+```shell
+$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d '
+{
+ "plugins": {
+ "fault-injection": {
+ "abort": {
+ "http_status": 500,
+ "body": "Fault Injection!\n"
+ }
+ }
+ },
+ "uri": "/hello"
+}'
+```
+
+Test:
+
+```shell
+$ curl http://127.0.0.1:9080/hello -v
+......
+< HTTP/1.1 500 Internal Server Error
+< Date: Wed, 25 Nov 2020 14:50:20 GMT
+< Content-Type: text/plain; charset=utf-8
+< Transfer-Encoding: chunked
+< Connection: keep-alive
+< Server: APISIX/2.0
+<
+Fault Injection!
+```
+
+There is no response header for `X-APISIX-Upstream-Status`.
+
+>Example 3: `Upstream` has multiple nodes, and all nodes are unavailable
+
+```shell
+$ curl http://127.0.0.1:9080/apisix/admin/upstreams/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d '
+{
+ "nodes": {
+ "127.0.0.3:1": 1,
+ "127.0.0.2:1": 1,
+ "127.0.0.1:1": 1
+ },
+ "retries": 2,
+ "type": "roundrobin"
+}'
+```
+
+```shell
+$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d '
+{
+ "uri": "/hello",
+ "upstream_id": "1"
+}'
+```
+
+Test:
+
+```shell
+$ curl http://127.0.0.1:9080/hello -v
+< HTTP/1.1 502 Bad Gateway
+< Date: Wed, 25 Nov 2020 15:07:34 GMT
+< Content-Type: text/html; charset=utf-8
+< Content-Length: 154
+< Connection: keep-alive
+< Server: APISIX/2.0
+< X-APISIX-Upstream-Status: 502, 502, 502
+<
+
+502 Bad Gateway
+
+502 Bad Gateway
+
openresty
+
+
+```
+
+It has a response header of `X-APISIX-Upstream-Status: 502, 502, 502`.
diff --git a/doc/zh-cn/README.md b/doc/zh-cn/README.md
index 5d83b0bdec5d..53250ab9a4b6 100644
--- a/doc/zh-cn/README.md
+++ b/doc/zh-cn/README.md
@@ -34,6 +34,7 @@
* [变更日志](../../CHANGELOG_CN.md)
* [压力测试](benchmark.md)
* [代码风格](../../CODE_STYLE_CN.md)
+* [调试功能](./debug-function.md)
* [常见问答](../../FAQ_CN.md)
## 插件
diff --git a/doc/zh-cn/debug-function.md b/doc/zh-cn/debug-function.md
new file mode 100644
index 000000000000..b096c03c6d2c
--- /dev/null
+++ b/doc/zh-cn/debug-function.md
@@ -0,0 +1,151 @@
+
+
+[English](../debug-function.md)
+
+## `5xx` 响应状态码
+
+500、502、503等类似的 `5xx` 状态码,是由于服务器错误而响应的状态码,当一个请求出现 `5xx` 状态码时;它可能来源于 `APISIX` 或 `Upstream` 。如何识别这些响应状态码的来源,是一件很有意义的事,它能够快速的帮助我们确定问题的所在。
+
+## 如何识别 `5xx` 响应状态码的来源
+
+在请求的响应头中,通过 `X-APISIX-Upstream-Status` 这个响应头,我们可以有效的识别 `5xx` 状态码的来源。当 `5xx` 状态码来源于 `Upstream` 时,在响应头中可以看到 `X-APISIX-Upstream-Status` 这个响应头,并且这个响应头的值为响应的状态码。当 `5xx` 状态码来源于 `APISIX` 时,响应头中没有 `X-APISIX-Upstream-Status` 的响应头信息。也就是只有 `5xx` 状态码来源于 `Upstream` 时,才会有 `X-APISIX-Upstream-Status` 响应头。
+
+## 示例
+
+示例1:`502` 响应状态码来源于 `Upstream` (IP地址不可用)
+
+```shell
+$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d '
+{
+ "methods": ["GET"],
+ "upstream": {
+ "nodes": {
+ "127.0.0.1:1": 1
+ },
+ "type": "roundrobin"
+ },
+ "uri": "/hello"
+}'
+```
+
+测试:
+
+```shell
+$ curl http://127.0.0.1:9080/hello -v
+......
+< HTTP/1.1 502 Bad Gateway
+< Date: Wed, 25 Nov 2020 14:40:22 GMT
+< Content-Type: text/html; charset=utf-8
+< Content-Length: 154
+< Connection: keep-alive
+< Server: APISIX/2.0
+< X-APISIX-Upstream-Status: 502
+<
+
+502 Bad Gateway
+
+502 Bad Gateway
+
openresty
+
+
+
+```
+
+具有 `X-APISIX-Upstream-Status: 502` 的响应头。
+
+示例2: `502` 响应状态码来源于 `APISIX`
+
+```shell
+$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d '
+{
+ "plugins": {
+ "fault-injection": {
+ "abort": {
+ "http_status": 500,
+ "body": "Fault Injection!\n"
+ }
+ }
+ },
+ "uri": "/hello"
+}'
+```
+
+测试:
+
+```shell
+$ curl http://127.0.0.1:9080/hello -v
+......
+< HTTP/1.1 500 Internal Server Error
+< Date: Wed, 25 Nov 2020 14:50:20 GMT
+< Content-Type: text/plain; charset=utf-8
+< Transfer-Encoding: chunked
+< Connection: keep-alive
+< Server: APISIX/2.0
+<
+Fault Injection!
+```
+
+没有 `X-APISIX-Upstream-Status` 的响应头。
+
+示例3:`Upstream` 具有多节点,并且所有节点不可用
+
+```shell
+$ curl http://127.0.0.1:9080/apisix/admin/upstreams/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d '
+{
+ "nodes": {
+ "127.0.0.3:1": 1,
+ "127.0.0.2:1": 1,
+ "127.0.0.1:1": 1
+ },
+ "retries": 2,
+ "type": "roundrobin"
+}'
+```
+
+```shell
+$ curl http://127.0.0.1:9080/apisix/admin/routes/1 -H 'X-API-KEY: edd1c9f034335f136f87ad84b625c8f1' -X PUT -d '
+{
+ "uri": "/hello",
+ "upstream_id": "1"
+}'
+```
+
+测试:
+
+```shell
+$ curl http://127.0.0.1:9080/hello -v
+< HTTP/1.1 502 Bad Gateway
+< Date: Wed, 25 Nov 2020 15:07:34 GMT
+< Content-Type: text/html; charset=utf-8
+< Content-Length: 154
+< Connection: keep-alive
+< Server: APISIX/2.0
+< X-APISIX-Upstream-Status: 502, 502, 502
+<
+
+502 Bad Gateway
+
+502 Bad Gateway
+
openresty
+
+
+```
+
+具有 `X-APISIX-Upstream-Status: 502, 502, 502` 的响应头。
diff --git a/t/lib/server.lua b/t/lib/server.lua
index 57ea46a087f5..f3ca00f5b5a6 100644
--- a/t/lib/server.lua
+++ b/t/lib/server.lua
@@ -365,4 +365,9 @@ function _M.log()
end
+function _M.server_error()
+ error("500 Internal Server Error")
+end
+
+
return _M
diff --git a/t/node/upstream-status-5xx.t b/t/node/upstream-status-5xx.t
new file mode 100644
index 000000000000..7f42bec98745
--- /dev/null
+++ b/t/node/upstream-status-5xx.t
@@ -0,0 +1,419 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+use t::APISIX 'no_plan';
+
+repeat_each(1);
+log_level('info');
+worker_connections(256);
+no_root_location();
+no_shuffle();
+
+run_tests();
+
+__DATA__
+
+=== TEST 1: set route(id: 1) and available upstream
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "methods": ["GET"],
+ "upstream": {
+ "nodes": {
+ "127.0.0.1:1980": 1
+ },
+ "type": "roundrobin"
+ },
+ "uri": "/hello"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
+--- no_error_log
+[error]
+
+
+
+=== TEST 2: hit the route and $upstream_status is 200
+--- request
+GET /hello
+--- response_body
+hello world
+--- no_error_log
+[error]
+--- grep_error_log eval
+qr/X-APISIX-Upstream-Status: 200/
+--- grep_error_log_out
+
+
+
+=== TEST 3: set route(id: 1) and set the timeout field
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "methods": ["GET"],
+ "upstream": {
+ "nodes": {
+ "127.0.0.1:1980": 1
+ },
+ "type": "roundrobin",
+ "timeout": {
+ "connect": 0.5,
+ "send": 0.5,
+ "read": 0.5
+ }
+ },
+ "uri": "/sleep1"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
+--- no_error_log
+[error]
+
+
+
+=== TEST 4: hit routes (timeout) and $upstream_status is 504
+--- request
+GET /sleep1
+--- error_code: 504
+--- response_body eval
+qr/504 Gateway Time-out/
+--- error_log
+X-APISIX-Upstream-Status: 504
+
+
+
+=== TEST 5: set route(id: 1), upstream service is not available
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "methods": ["GET"],
+ "upstream": {
+ "nodes": {
+ "127.0.0.1:1": 1
+ },
+ "type": "roundrobin"
+ },
+ "uri": "/hello"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
+--- no_error_log
+[error]
+
+
+
+=== TEST 6: hit routes and $upstream_status is 502
+--- request
+GET /hello
+--- error_code: 502
+--- response_body eval
+qr/502 Bad Gateway/
+--- error_log
+X-APISIX-Upstream-Status: 502
+
+
+
+=== TEST 7: set route(id: 1) and uri is `/server_error`
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "methods": ["GET"],
+ "upstream": {
+ "nodes": {
+ "127.0.0.1:1980": 1
+ },
+ "type": "roundrobin"
+ },
+ "uri": "/server_error"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
+--- no_error_log
+[error]
+
+
+
+=== TEST 8: hit routes and $upstream_status is 500
+--- request
+GET /server_error
+--- error_code: 500
+--- response_body eval
+qr/500 Internal Server Error/
+--- error_log
+X-APISIX-Upstream-Status: 500
+
+
+
+=== TEST 9: set upstream(id: 1, retries = 2), has available upstream
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/upstreams/1',
+ ngx.HTTP_PUT,
+ [[{
+ "nodes": {
+ "127.0.0.2:1": 1,
+ "127.0.0.1:1980": 1
+ },
+ "retries": 2,
+ "type": "roundrobin"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
+--- no_error_log
+[error]
+
+
+
+=== TEST 10: set route(id: 1) and bind the upstream_id
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "uri": "/hello",
+ "upstream_id": "1"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
+--- no_error_log
+[error]
+
+
+
+=== TEST 11: hit routes and $upstream_status is `502, 200`
+--- request
+GET /hello
+--- response_body
+hello world
+--- grep_error_log eval
+qr/X-APISIX-Upstream-Status: 502, 200/
+--- grep_error_log_out
+
+
+
+=== TEST 12: set upstream(id: 1, retries = 2), all upstream nodes are unavailable
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/upstreams/1',
+ ngx.HTTP_PUT,
+ [[{
+ "nodes": {
+ "127.0.0.3:1": 1,
+ "127.0.0.2:1": 1,
+ "127.0.0.1:1": 1
+
+ },
+ "retries": 2,
+ "type": "roundrobin"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
+--- no_error_log
+[error]
+
+
+
+=== TEST 13: hit routes, retry between upstream failed, $upstream_status is `502, 502, 502`
+--- request
+GET /hello
+--- error_code: 502
+--- error_log eval
+qr/X-APISIX-Upstream-Status: 502, 502, 502/
+
+
+
+=== TEST 14: return 500 status code from APISIX
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "plugins": {
+ "fault-injection": {
+ "abort": {
+ "http_status": 500,
+ "body": "Fault Injection!\n"
+ }
+ }
+ },
+ "uri": "/hello"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
+--- no_error_log
+[error]
+
+
+
+=== TEST 15: hit routes, status code is 500
+--- request
+GET /hello
+--- error_code: 500
+--- response_body
+Fault Injection!
+--- grep_error_log eval
+qr/X-APISIX-Upstream-Status: 500/
+--- grep_error_log_out
+
+
+
+=== TEST 16: return 200 status code from APISIX
+--- config
+ location /t {
+ content_by_lua_block {
+ local t = require("lib.test_admin").test
+ local code, body = t('/apisix/admin/routes/1',
+ ngx.HTTP_PUT,
+ [[{
+ "plugins": {
+ "fault-injection": {
+ "abort": {
+ "http_status": 200,
+ "body": "Fault Injection!\n"
+ }
+ }
+ },
+ "uri": "/hello"
+ }]]
+ )
+
+ if code >= 300 then
+ ngx.status = code
+ end
+ ngx.say(body)
+ }
+ }
+--- request
+GET /t
+--- response_body
+passed
+--- no_error_log
+[error]
+
+
+
+=== TEST 17: hit routes, status code is 200
+--- request
+GET /hello
+--- response_body
+Fault Injection!
+--- grep_error_log eval
+qr/X-APISIX-Upstream-Status: 200/
+--- grep_error_log_out