diff options
author | bnewbold <bnewbold@robocracy.org> | 2025-02-28 13:09:48 -0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-02-28 16:09:48 -0500 |
commit | 462708e207627971f68b603e61ad70c5d6953cff (patch) | |
tree | c82cc9811bd6ddf681ac71c1a3b8e07b651be4f5 /bskyweb | |
parent | 36a7a8d8c68fc1d3d3fd396dd9f705c966552fcb (diff) | |
download | voidsky-462708e207627971f68b603e61ad70c5d6953cff.tar.zst |
bskyweb: add robots disallow and 'canonical' config flags (#4760)
Diffstat (limited to 'bskyweb')
-rw-r--r-- | bskyweb/cmd/bskyweb/main.go | 14 | ||||
-rw-r--r-- | bskyweb/cmd/bskyweb/server.go | 26 | ||||
-rw-r--r-- | bskyweb/static/robots-disallow-all.txt | 3 |
3 files changed, 36 insertions, 7 deletions
diff --git a/bskyweb/cmd/bskyweb/main.go b/bskyweb/cmd/bskyweb/main.go index 5c46af418..da8a30953 100644 --- a/bskyweb/cmd/bskyweb/main.go +++ b/bskyweb/cmd/bskyweb/main.go @@ -94,6 +94,20 @@ func run(args []string) { Value: "", EnvVars: []string{"STATIC_CDN_HOST"}, }, + &cli.BoolFlag{ + Name: "bsky-canonical-instance", + Usage: "Enable if this is the canonical deployment (bsky.app)", + Value: false, + Required: false, + EnvVars: []string{"BSKY_CANONICAL_INSTANCE"}, + }, + &cli.BoolFlag{ + Name: "robots-disallow-all", + Usage: "Crawling is allowed by default. Enable this flag to Disallow all", + Value: false, + Required: false, + EnvVars: []string{"ROBOTS_DISALLOW_ALL"}, + }, }, }, } diff --git a/bskyweb/cmd/bskyweb/server.go b/bskyweb/cmd/bskyweb/server.go index 6543b33a8..73fa71019 100644 --- a/bskyweb/cmd/bskyweb/server.go +++ b/bskyweb/cmd/bskyweb/server.go @@ -63,6 +63,8 @@ func serve(cctx *cli.Context) error { corsOrigins := cctx.StringSlice("cors-allowed-origins") staticCDNHost := cctx.String("static-cdn-host") staticCDNHost = strings.TrimSuffix(staticCDNHost, "/") + canonicalInstance := cctx.Bool("bsky-canonical-instance") + robotsDisallowAll := cctx.Bool("robots-disallow-all") // Echo e := echo.New() @@ -204,13 +206,23 @@ func serve(cctx *cli.Context) error { return http.FS(fsys) }()) - e.GET("/robots.txt", echo.WrapHandler(staticHandler)) - e.GET("/ips-v4", echo.WrapHandler(staticHandler)) - e.GET("/ips-v6", echo.WrapHandler(staticHandler)) - e.GET("/.well-known/*", echo.WrapHandler(staticHandler)) - e.GET("/security.txt", func(c echo.Context) error { - return c.Redirect(http.StatusMovedPermanently, "/.well-known/security.txt") - }) + // enable some special endpoints for the "canonical" deployment (bsky.app). not having these enabled should *not* impact regular operation + if canonicalInstance { + e.GET("/ips-v4", echo.WrapHandler(staticHandler)) + e.GET("/ips-v6", echo.WrapHandler(staticHandler)) + e.GET("/security.txt", func(c echo.Context) error { + return c.Redirect(http.StatusMovedPermanently, "/.well-known/security.txt") + }) + e.GET("/.well-known/*", echo.WrapHandler(staticHandler)) + } + + // default to permissive, but Disallow all if flag set + if robotsDisallowAll { + e.File("/robots.txt", "static/robots-disallow-all.txt") + } else { + e.GET("/robots.txt", echo.WrapHandler(staticHandler)) + } + e.GET("/iframe/youtube.html", echo.WrapHandler(staticHandler)) e.GET("/static/*", echo.WrapHandler(http.StripPrefix("/static/", staticHandler)), func(next echo.HandlerFunc) echo.HandlerFunc { return func(c echo.Context) error { diff --git a/bskyweb/static/robots-disallow-all.txt b/bskyweb/static/robots-disallow-all.txt new file mode 100644 index 000000000..65400a08c --- /dev/null +++ b/bskyweb/static/robots-disallow-all.txt @@ -0,0 +1,3 @@ +# This is an development or self-hosted instance of the bsky web app, and crawling has been disallowed by the operator team. +User-Agent: * +Disallow: / |