diff --git a/README.md b/README.md index 3f23188..ee91a18 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ features to help your app run efficiently and safely on the open Internet: - Basic HTTP caching - X-Sendfile support for efficient file serving - Automatic GZIP compression +- Image proxy links to sanitize external image URLs Thruster tries to be as zero-config as possible, so most features are automatically enabled with sensible defaults. @@ -46,6 +47,36 @@ Or with automatic SSL: $ SSL_DOMAIN=myapp.example.com thrust bin/rails server ``` +## Image proxy links + +Applications that allow user-generated content often need a way to sanitize +external image URLs, to guard against the security risks of maliciously crafted +images. + +Thruster includes a minimal image proxy that inspects the content of external +images before serving them. Images will be served if they: + +- Appear to be valid image files +- Are in a permitted format: GIF, JPEG, PNG or WebP +- Do not have an excessive width or height (5000 pixels max, by default) + +External images that do not meet these criteria will be served with a `403 +Forbidden` status. + +To use the image proxy, your application should rewrite external image URLs in +user-generated content to use Thruster's image proxy path. This path is provided +to your application in the `IMAGE_PROXY_PATH` environment variable. Specify the +URL of the image to proxy as a query parameter named `src`. + +Thruster provides a helper method to form these paths for you: + +```ruby +Thruster.image_proxy_path('https://example.com/image.jpg') +``` + +When your application is running outside of Thruster, +`Thruster.image_proxy_path` will return the original URL unchanged. + ## Custom configuration Thruster provides a number of environment variables that can be used to @@ -57,19 +88,21 @@ For example, `SSL_DOMAIN` can also be set as `THRUSTER_SSL_DOMAIN`. Whenever a prefixed variable is set, Thruster will use it in preference to the unprefixed version. -| Variable Name | Description | Default Value | -|-----------------------|---------------------------------------------------------------------------------|---------------| -| `SSL_DOMAIN` | The domain name to use for SSL provisioning. If not set, SSL will be disabled. | None | -| `TARGET_PORT` | The port that your Puma server should run on. Thruster will set `PORT` to this when starting your server. | 3000 | -| `CACHE_SIZE` | The size of the HTTP cache in bytes. | 64MB | -| `MAX_CACHE_ITEM_SIZE` | The maximum size of a single item in the HTTP cache in bytes. | 1MB | -| `X_SENDFILE_ENABLED` | Whether to enable X-Sendfile support. Set to `0` or `false` to disable. | Enabled | -| `MAX_REQUEST_BODY` | The maximum size of a request body in bytes. Requests larger than this size will be refused; `0` means no maximum size. | `0` | -| `STORAGE_PATH` | The path to store Thruster's internal state. | `./storage/thruster` | -| `BAD_GATEWAY_PAGE` | Path to an HTML file to serve when the backend server returns a 502 Bad Gateway error. If there is no file at the specific path, Thruster will serve an empty 502 response instead. | `./public/502.html` | -| `HTTP_PORT` | The port to listen on for HTTP traffic. | 80 | -| `HTTPS_PORT` | The port to listen on for HTTPS traffic. | 443 | -| `HTTP_IDLE_TIMEOUT` | The maximum time in seconds that a client can be idle before the connection is closed. | 60 | -| `HTTP_READ_TIMEOUT` | The maximum time in seconds that a client can take to send the request headers. | 30 | -| `HTTP_WRITE_TIMEOUT` | The maximum time in seconds during which the client must read the response. | 30 | -| `DEBUG` | Set to `1` or `true` to enable debug logging. | Disabled | +| Variable Name | Description | Default Value | +|-----------------------------|---------------------------------------------------------------------------------|---------------| +| `SSL_DOMAIN` | The domain name to use for SSL provisioning. If not set, SSL will be disabled. | None | +| `TARGET_PORT` | The port that your Puma server should run on. Thruster will set `PORT` to this when starting your server. | 3000 | +| `CACHE_SIZE` | The size of the HTTP cache in bytes. | 64MB | +| `MAX_CACHE_ITEM_SIZE` | The maximum size of a single item in the HTTP cache in bytes. | 1MB | +| `X_SENDFILE_ENABLED` | Whether to enable X-Sendfile support. Set to `0` or `false` to disable. | Enabled | +| `IMAGE_PROXY_ENABLED` | Whether to enable the built in image proxy. Set to `0` or `false` to disable. | Enabled | +| `IMAGE_PROXY_MAX_DIMENSION` | When using the image proxy, only serve images with a width and height less than this, in pixels | 5000 | +| `MAX_REQUEST_BODY` | The maximum size of a request body in bytes. Requests larger than this size will be refused; `0` means no maximum size. | `0` | +| `STORAGE_PATH` | The path to store Thruster's internal state. | `./storage/thruster` | +| `BAD_GATEWAY_PAGE` | Path to an HTML file to serve when the backend server returns a 502 Bad Gateway error. If there is no file at the specific path, Thruster will serve an empty 502 response instead. | `./public/502.html` | +| `HTTP_PORT` | The port to listen on for HTTP traffic. | 80 | +| `HTTPS_PORT` | The port to listen on for HTTPS traffic. | 443 | +| `HTTP_IDLE_TIMEOUT` | The maximum time in seconds that a client can be idle before the connection is closed. | 60 | +| `HTTP_READ_TIMEOUT` | The maximum time in seconds that a client can take to send the request headers. | 30 | +| `HTTP_WRITE_TIMEOUT` | The maximum time in seconds during which the client must read the response. | 30 | +| `DEBUG` | Set to `1` or `true` to enable debug logging. | Disabled | diff --git a/go.mod b/go.mod index f96705c..0bd12ef 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/klauspost/compress v1.17.4 github.com/stretchr/testify v1.8.4 golang.org/x/crypto v0.17.0 + golang.org/x/image v0.15.0 ) require ( diff --git a/go.sum b/go.sum index aea6d8a..699f7df 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcU github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= golang.org/x/crypto v0.17.0 h1:r8bRNjWL3GshPW3gkd+RpvzWrZAwPS49OmTGZ/uhM4k= golang.org/x/crypto v0.17.0/go.mod h1:gCAAfMLgwOJRpTjQ2zCCt2OcSfYMTeZVSRtQlPC7Nq4= +golang.org/x/image v0.15.0 h1:kOELfmgrmJlw4Cdb7g/QGuB3CvDrXbqEIww/pNtNBm8= +golang.org/x/image v0.15.0/go.mod h1:HUYqC05R2ZcZ3ejNQsIHQDQiwWM4JBqmm6MKANTp4LE= golang.org/x/net v0.17.0 h1:pVaXccu2ozPjCXewfr1S7xza/zcXTity9cCdXQYSjIM= golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE= golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ= diff --git a/internal/config.go b/internal/config.go index 36aece4..5ce4beb 100644 --- a/internal/config.go +++ b/internal/config.go @@ -20,8 +20,9 @@ const ( defaultMaxCacheItemSizeBytes = 1 * MB defaultMaxRequestBody = 0 - defaultStoragePath = "./storage/thruster" - defaultBadGatewayPage = "./public/502.html" + defaultStoragePath = "./storage/thruster" + defaultBadGatewayPage = "./public/502.html" + defaultImageProxyMaxDimension = 5000 defaultHttpPort = 80 defaultHttpsPort = 443 @@ -37,10 +38,12 @@ type Config struct { UpstreamCommand string UpstreamArgs []string - CacheSizeBytes int - MaxCacheItemSizeBytes int - XSendfileEnabled bool - MaxRequestBody int + CacheSizeBytes int + MaxCacheItemSizeBytes int + XSendfileEnabled bool + ImageProxyEnabled bool + ImageProxyMaxDimension int + MaxRequestBody int SSLDomain string StoragePath string @@ -70,10 +73,12 @@ func NewConfig() (*Config, error) { UpstreamCommand: os.Args[1], UpstreamArgs: os.Args[2:], - CacheSizeBytes: getEnvInt("CACHE_SIZE", defaultCacheSize), - MaxCacheItemSizeBytes: getEnvInt("MAX_CACHE_ITEM_SIZE", defaultMaxCacheItemSizeBytes), - XSendfileEnabled: getEnvBool("X_SENDFILE_ENABLED", true), - MaxRequestBody: getEnvInt("MAX_REQUEST_BODY", defaultMaxRequestBody), + CacheSizeBytes: getEnvInt("CACHE_SIZE", defaultCacheSize), + MaxCacheItemSizeBytes: getEnvInt("MAX_CACHE_ITEM_SIZE", defaultMaxCacheItemSizeBytes), + XSendfileEnabled: getEnvBool("X_SENDFILE_ENABLED", true), + ImageProxyEnabled: getEnvBool("IMAGE_PROXY_ENABLED", true), + ImageProxyMaxDimension: getEnvInt("IMAGE_PROXY_MAX_DIMENSION", defaultImageProxyMaxDimension), + MaxRequestBody: getEnvInt("MAX_REQUEST_BODY", defaultMaxRequestBody), SSLDomain: getEnvString("SSL_DOMAIN", ""), StoragePath: getEnvString("STORAGE_PATH", defaultStoragePath), diff --git a/internal/fixtures/image.gif b/internal/fixtures/image.gif new file mode 100644 index 0000000..c9d04d7 Binary files /dev/null and b/internal/fixtures/image.gif differ diff --git a/internal/fixtures/image.jpg b/internal/fixtures/image.jpg index 9b7cb60..6b593f4 100644 Binary files a/internal/fixtures/image.jpg and b/internal/fixtures/image.jpg differ diff --git a/internal/fixtures/image.png b/internal/fixtures/image.png new file mode 100644 index 0000000..92ba3c4 Binary files /dev/null and b/internal/fixtures/image.png differ diff --git a/internal/fixtures/image.svg b/internal/fixtures/image.svg new file mode 100644 index 0000000..fd297ff --- /dev/null +++ b/internal/fixtures/image.svg @@ -0,0 +1,4 @@ + + + + diff --git a/internal/fixtures/image.webp b/internal/fixtures/image.webp new file mode 100644 index 0000000..5dfa3ef Binary files /dev/null and b/internal/fixtures/image.webp differ diff --git a/internal/handler.go b/internal/handler.go index bd17e3f..7f814d5 100644 --- a/internal/handler.go +++ b/internal/handler.go @@ -15,9 +15,12 @@ type HandlerOptions struct { maxRequestBody int targetUrl *url.URL xSendfileEnabled bool + imageProxyEnabled bool } func NewHandler(options HandlerOptions) http.Handler { + mux := http.NewServeMux() + handler := NewProxyHandler(options.targetUrl, options.badGatewayPage) handler = NewCacheHandler(options.cache, options.maxCacheableResponseBody, handler) handler = NewSendfileHandler(options.xSendfileEnabled, handler) @@ -25,5 +28,11 @@ func NewHandler(options HandlerOptions) http.Handler { handler = NewMaxRequestBodyHandler(options.maxRequestBody, handler) handler = NewLoggingMiddleware(slog.Default(), handler) - return handler + if options.imageProxyEnabled { + RegisterNewImageProxyHandler(mux) + } + + mux.Handle("/", handler) + + return mux } diff --git a/internal/image_proxy_handler.go b/internal/image_proxy_handler.go new file mode 100644 index 0000000..652055c --- /dev/null +++ b/internal/image_proxy_handler.go @@ -0,0 +1,117 @@ +package internal + +import ( + "bytes" + "image" + "io" + "log/slog" + "net/http" + "net/url" + "slices" + "time" + + _ "image/gif" + _ "image/jpeg" + _ "image/png" + + _ "golang.org/x/image/webp" +) + +var allowedFormats = []string{"gif", "jpeg", "png", "webp"} + +const ( + imageProxyHandlerPath = "/_t/image" + imageProxyMaxDimension = 5000 +) + +type ImageProxyHandler struct { + httpClient *http.Client +} + +func RegisterNewImageProxyHandler(mux *http.ServeMux) { + handler := &ImageProxyHandler{ + httpClient: &http.Client{ + Timeout: 10 * time.Second, + }, + } + + mux.Handle("GET "+imageProxyHandlerPath, handler) +} + +func (h *ImageProxyHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { + remoteURL := h.extractRemoteURL(r) + if remoteURL == nil { + http.Error(w, "invalid url", http.StatusNotFound) + return + } + + resp, err := h.httpClient.Get(remoteURL.String()) + if err != nil { + http.Error(w, "error fetching remote image", http.StatusBadGateway) + return + } + + if resp.StatusCode != http.StatusOK { + h.copyHeaders(w, resp) + w.WriteHeader(resp.StatusCode) + return + } + + imageReader := h.sanitizeImage(resp.Body) + if imageReader == nil { + http.Error(w, "invalid image", http.StatusForbidden) + return + } + + slog.Info("Proxying remote image", "url", remoteURL) + + h.copyHeaders(w, resp) + w.WriteHeader(http.StatusOK) + io.Copy(w, imageReader) +} + +// Private + +func (h *ImageProxyHandler) extractRemoteURL(r *http.Request) *url.URL { + urlString := r.URL.Query().Get("src") + if urlString == "" { + return nil + } + + remoteURL, err := url.Parse(urlString) + if err != nil || (remoteURL.Scheme != "http" && remoteURL.Scheme != "https") { + return nil + } + + return remoteURL +} + +func (h *ImageProxyHandler) copyHeaders(w http.ResponseWriter, resp *http.Response) { + for k, v := range resp.Header { + w.Header()[k] = v + } +} + +func (h *ImageProxyHandler) sanitizeImage(f io.Reader) io.Reader { + var buf bytes.Buffer + reader := io.TeeReader(f, &buf) + + cfg, format, err := image.DecodeConfig(reader) + if err != nil { + slog.Debug("ImageProxy: image format not valid", "err", err) + return nil + } + + if !slices.Contains(allowedFormats, format) { + slog.Debug("ImageProxy: image format not allowed", "format", format) + return nil + } + + if cfg.Width > imageProxyMaxDimension || cfg.Height > imageProxyMaxDimension { + slog.Debug("ImageProxy: image too large", "width", cfg.Width, "height", cfg.Height) + return nil + } + + slog.Debug("ImageProxy: image acceptable", "format", format, "width", cfg.Width, "height", cfg.Height) + return io.MultiReader(&buf, f) +} diff --git a/internal/image_proxy_handler_test.go b/internal/image_proxy_handler_test.go new file mode 100644 index 0000000..d067937 --- /dev/null +++ b/internal/image_proxy_handler_test.go @@ -0,0 +1,61 @@ +package internal + +import ( + "image" + "net/http" + "net/http/httptest" + "net/url" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestImageProxy_serving_valid_images(t *testing.T) { + tests := map[string]struct { + filename string + statusCode int + }{ + "valid gif": {"image.gif", http.StatusOK}, + "valid jpg": {"image.jpg", http.StatusOK}, + "valid png": {"image.png", http.StatusOK}, + "valid webp": {"image.webp", http.StatusOK}, + "valid svg": {"image.svg", http.StatusForbidden}, + "not an image": {"loremipsum.txt", http.StatusForbidden}, + "missing file": {"doesnotexist.txt", http.StatusNotFound}, + } + + for name, tc := range tests { + t.Run(name, func(t *testing.T) { + remoteServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if !fixtureExists(tc.filename) { + w.WriteHeader(http.StatusNotFound) + return + } + + w.Write(fixtureContent(tc.filename)) + })) + defer remoteServer.Close() + + mux := http.NewServeMux() + RegisterNewImageProxyHandler(mux) + localServer := httptest.NewServer(mux) + defer localServer.Close() + + imageURL, _ := url.Parse(localServer.URL + imageProxyHandlerPath) + params := url.Values{} + params.Add("src", remoteServer.URL) + imageURL.RawQuery = params.Encode() + + resp, err := http.Get(imageURL.String()) + + require.NoError(t, err) + assert.Equal(t, tc.statusCode, resp.StatusCode) + + if tc.statusCode == http.StatusOK { + _, _, err = image.Decode(resp.Body) + require.NoError(t, err) + } + }) + } +} diff --git a/internal/service.go b/internal/service.go index 1abc18c..dcb331e 100644 --- a/internal/service.go +++ b/internal/service.go @@ -23,6 +23,7 @@ func (s *Service) Run() int { xSendfileEnabled: s.config.XSendfileEnabled, maxCacheableResponseBody: s.config.MaxCacheItemSizeBytes, badGatewayPage: s.config.BadGatewayPage, + imageProxyEnabled: s.config.ImageProxyEnabled, } handler := NewHandler(handlerOptions) @@ -56,4 +57,9 @@ func (s *Service) targetUrl() *url.URL { func (s *Service) setEnvironment() { // Set PORT to be inherited by the upstream process. os.Setenv("PORT", fmt.Sprintf("%d", s.config.TargetPort)) + + // Set IMAGE_PROXY_PATH, if enabled + if s.config.ImageProxyEnabled { + os.Setenv("IMAGE_PROXY_PATH", imageProxyHandlerPath) + } } diff --git a/internal/testing.go b/internal/testing.go index 121f2d6..64b3885 100644 --- a/internal/testing.go +++ b/internal/testing.go @@ -10,6 +10,16 @@ func fixturePath(name string) string { return path.Join("fixtures", name) } +func fixtureExists(name string) bool { + f, err := os.Open(fixturePath(name)) + if err != nil { + return false + } + defer f.Close() + + return true +} + func fixtureContent(name string) []byte { result, _ := os.ReadFile(fixturePath(name)) return result diff --git a/lib/thruster.rb b/lib/thruster.rb index c6ffb73..053882a 100644 --- a/lib/thruster.rb +++ b/lib/thruster.rb @@ -2,3 +2,4 @@ module Thruster end require_relative "thruster/version" +require_relative "thruster/helpers" diff --git a/lib/thruster/helpers.rb b/lib/thruster/helpers.rb new file mode 100644 index 0000000..6dddd62 --- /dev/null +++ b/lib/thruster/helpers.rb @@ -0,0 +1,9 @@ +module Thruster + def self.image_proxy_path(src) + proxy_path = ENV["IMAGE_PROXY_PATH"] + return src if proxy_path.nil? + + query = URI.encode_www_form({ src: src }) + "#{proxy_path}?#{query}" + end +end