about summary refs log tree commit diff
path: root/bskyweb
diff options
context:
space:
mode:
authorbnewbold <bnewbold@robocracy.org>2025-02-28 13:09:48 -0800
committerGitHub <noreply@github.com>2025-02-28 16:09:48 -0500
commit462708e207627971f68b603e61ad70c5d6953cff (patch)
treec82cc9811bd6ddf681ac71c1a3b8e07b651be4f5 /bskyweb
parent36a7a8d8c68fc1d3d3fd396dd9f705c966552fcb (diff)
downloadvoidsky-462708e207627971f68b603e61ad70c5d6953cff.tar.zst
bskyweb: add robots disallow and 'canonical' config flags (#4760)
Diffstat (limited to 'bskyweb')
-rw-r--r--bskyweb/cmd/bskyweb/main.go14
-rw-r--r--bskyweb/cmd/bskyweb/server.go26
-rw-r--r--bskyweb/static/robots-disallow-all.txt3
3 files changed, 36 insertions, 7 deletions
diff --git a/bskyweb/cmd/bskyweb/main.go b/bskyweb/cmd/bskyweb/main.go
index 5c46af418..da8a30953 100644
--- a/bskyweb/cmd/bskyweb/main.go
+++ b/bskyweb/cmd/bskyweb/main.go
@@ -94,6 +94,20 @@ func run(args []string) {
 					Value:    "",
 					EnvVars:  []string{"STATIC_CDN_HOST"},
 				},
+				&cli.BoolFlag{
+					Name:     "bsky-canonical-instance",
+					Usage:    "Enable if this is the canonical deployment (bsky.app)",
+					Value:    false,
+					Required: false,
+					EnvVars:  []string{"BSKY_CANONICAL_INSTANCE"},
+				},
+				&cli.BoolFlag{
+					Name:     "robots-disallow-all",
+					Usage:    "Crawling is allowed by default. Enable this flag to Disallow all",
+					Value:    false,
+					Required: false,
+					EnvVars:  []string{"ROBOTS_DISALLOW_ALL"},
+				},
 			},
 		},
 	}
diff --git a/bskyweb/cmd/bskyweb/server.go b/bskyweb/cmd/bskyweb/server.go
index 6543b33a8..73fa71019 100644
--- a/bskyweb/cmd/bskyweb/server.go
+++ b/bskyweb/cmd/bskyweb/server.go
@@ -63,6 +63,8 @@ func serve(cctx *cli.Context) error {
 	corsOrigins := cctx.StringSlice("cors-allowed-origins")
 	staticCDNHost := cctx.String("static-cdn-host")
 	staticCDNHost = strings.TrimSuffix(staticCDNHost, "/")
+	canonicalInstance := cctx.Bool("bsky-canonical-instance")
+	robotsDisallowAll := cctx.Bool("robots-disallow-all")
 
 	// Echo
 	e := echo.New()
@@ -204,13 +206,23 @@ func serve(cctx *cli.Context) error {
 		return http.FS(fsys)
 	}())
 
-	e.GET("/robots.txt", echo.WrapHandler(staticHandler))
-	e.GET("/ips-v4", echo.WrapHandler(staticHandler))
-	e.GET("/ips-v6", echo.WrapHandler(staticHandler))
-	e.GET("/.well-known/*", echo.WrapHandler(staticHandler))
-	e.GET("/security.txt", func(c echo.Context) error {
-		return c.Redirect(http.StatusMovedPermanently, "/.well-known/security.txt")
-	})
+	// enable some special endpoints for the "canonical" deployment (bsky.app). not having these enabled should *not* impact regular operation
+	if canonicalInstance {
+		e.GET("/ips-v4", echo.WrapHandler(staticHandler))
+		e.GET("/ips-v6", echo.WrapHandler(staticHandler))
+		e.GET("/security.txt", func(c echo.Context) error {
+			return c.Redirect(http.StatusMovedPermanently, "/.well-known/security.txt")
+		})
+		e.GET("/.well-known/*", echo.WrapHandler(staticHandler))
+	}
+
+	// default to permissive, but Disallow all if flag set
+	if robotsDisallowAll {
+		e.File("/robots.txt", "static/robots-disallow-all.txt")
+	} else {
+		e.GET("/robots.txt", echo.WrapHandler(staticHandler))
+	}
+
 	e.GET("/iframe/youtube.html", echo.WrapHandler(staticHandler))
 	e.GET("/static/*", echo.WrapHandler(http.StripPrefix("/static/", staticHandler)), func(next echo.HandlerFunc) echo.HandlerFunc {
 		return func(c echo.Context) error {
diff --git a/bskyweb/static/robots-disallow-all.txt b/bskyweb/static/robots-disallow-all.txt
new file mode 100644
index 000000000..65400a08c
--- /dev/null
+++ b/bskyweb/static/robots-disallow-all.txt
@@ -0,0 +1,3 @@
+# This is an development or self-hosted instance of the bsky web app, and crawling has been disallowed by the operator team.
+User-Agent: *
+Disallow: /