Skip to content

Add Bot Spoofing Protection #590

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions end2end/server/src/handlers/lists.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const {
getBlockedIPAddresses,
getBlockedUserAgents,
getAllowedIPAddresses,
getBotSpoofingData,
getMonitoredUserAgents,
getMonitoredIPAddresses,
getUserAgentDetails,
Expand All @@ -15,6 +16,7 @@ module.exports = function lists(req, res) {
const blockedIps = getBlockedIPAddresses(req.app);
const blockedUserAgents = getBlockedUserAgents(req.app);
const allowedIps = getAllowedIPAddresses(req.app);
const botSpoofingData = getBotSpoofingData(req.app);
const monitoredUserAgents = getMonitoredUserAgents(req.app);
const monitoredIps = getMonitoredIPAddresses(req.app);
const userAgentDetails = getUserAgentDetails(req.app);
Expand Down Expand Up @@ -47,6 +49,7 @@ module.exports = function lists(req, res) {
},
]
: [],
botSpoofingProtection: botSpoofingData,
monitoredIPAddresses:
monitoredIps.length > 0
? monitoredIps
Expand Down
12 changes: 8 additions & 4 deletions end2end/server/src/handlers/updateLists.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ const {
updateBlockedIPAddresses,
updateBlockedUserAgents,
updateAllowedIPAddresses,
updateBotSpoofingData,
updateMonitoredUserAgents,
updateMonitoredIPAddresses,
updateUserAgentDetails,
Expand Down Expand Up @@ -49,6 +50,12 @@ module.exports = function updateIPLists(req, res) {
updateAllowedIPAddresses(req.app, req.body.allowedIPAddresses);
}

if (
req.body.botSpoofingProtection &&
Array.isArray(req.body.botSpoofingProtection)
) {
updateBotSpoofingData(req.app, req.body.botSpoofingProtection);
}
if (
req.body.monitoredUserAgents &&
typeof req.body.monitoredUserAgents === "string"
Expand All @@ -63,10 +70,7 @@ module.exports = function updateIPLists(req, res) {
updateMonitoredIPAddresses(req.app, req.body.monitoredIPAddresses);
}

if (
req.body.userAgentDetails &&
Array.isArray(req.body.userAgentDetails)
) {
if (req.body.userAgentDetails && Array.isArray(req.body.userAgentDetails)) {
updateUserAgentDetails(req.app, req.body.userAgentDetails);
}

Expand Down
27 changes: 27 additions & 0 deletions end2end/server/src/zen/config.js
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ function updateAppConfig(app, newConfig) {
const blockedIPAddresses = [];
const blockedUserAgents = [];
const allowedIPAddresses = [];
const botSpoofingData = [];
const monitoredUserAgents = [];
const monitoredIPAddresses = [];
const userAgentDetails = [];
Expand Down Expand Up @@ -116,6 +117,20 @@ function getBlockedUserAgents(app) {
return "";
}

function updateBotSpoofingData(app, data) {
let entry = botSpoofingData.find((d) => d.serviceId === app.serviceId);

if (entry) {
entry.data = data;
} else {
entry = { serviceId: app.serviceId, data: data };
botSpoofingData.push(entry);
}

// Bump lastUpdatedAt
updateAppConfig(app, {});
}

function updateMonitoredUserAgents(app, uas) {
let entry = monitoredUserAgents.find((e) => e.serviceId === app.serviceId);

Expand All @@ -130,6 +145,16 @@ function updateMonitoredUserAgents(app, uas) {
updateAppConfig(app, {});
}

function getBotSpoofingData(app) {
const entry = botSpoofingData.find((d) => d.serviceId === app.serviceId);

if (entry) {
return entry.data;
}

return [];
}

function getMonitoredUserAgents(app) {
const entry = monitoredUserAgents.find((e) => e.serviceId === app.serviceId);

Expand Down Expand Up @@ -197,6 +222,8 @@ module.exports = {
getBlockedUserAgents,
getAllowedIPAddresses,
updateAllowedIPAddresses,
updateBotSpoofingData,
getBotSpoofingData,
updateMonitoredUserAgents,
getMonitoredUserAgents,
updateMonitoredIPAddresses,
Expand Down
148 changes: 148 additions & 0 deletions end2end/tests/hono-sqlite3-bot-spoofing.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
const t = require("tap");
const { spawn } = require("child_process");
const { resolve } = require("path");
const timeout = require("../timeout");

const pathToApp = resolve(
__dirname,
"../../sample-apps/hono-sqlite3",
"app.js"
);
const testServerUrl = "http://localhost:5874";

let token;
t.beforeEach(async () => {
const response = await fetch(`${testServerUrl}/api/runtime/apps`, {
method: "POST",
});
const body = await response.json();
token = body.token;

const lists = await fetch(`${testServerUrl}/api/runtime/firewall/lists`, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: token,
},
body: JSON.stringify({
allowedIPAddresses: [],
blockedIPAddresses: [],
blockedUserAgents: "",
botSpoofingProtection: [
{
key: "google_test",
uaPattern: "Googlebot|GoogleStoreBot",
ips: ["1.2.3.4/24", "4.3.2.1"],
hostnames: ["google.com", "googlebot.com"],
},
],
}),
});
t.same(lists.status, 200);
});

t.test("it blocks spoofed bots", (t) => {
const server = spawn(`node`, [pathToApp, "4012"], {
env: {
...process.env,
AIKIDO_DEBUG: "true",
AIKIDO_BLOCK: "true",
AIKIDO_TOKEN: token,
AIKIDO_ENDPOINT: testServerUrl,
},
});

server.on("close", () => {
t.end();
});

server.on("error", (err) => {
t.fail(err.message);
});

let stdout = "";
server.stdout.on("data", (data) => {
stdout += data.toString();
});

let stderr = "";
server.stderr.on("data", (data) => {
stderr += data.toString();
});

// Wait for the server to start
timeout(4000)
.then(async () => {
{
const response = await fetch("http://127.0.0.1:4012/", {
headers: {
"user-agent": "Googlebot",
"x-forwarded-for": "1.1.1.1",
},
signal: AbortSignal.timeout(5000),
});
t.same(response.status, 403);
t.same(
await response.text(),
"You are not allowed to access this resource."
);
}
{
const response = await fetch("http://127.0.0.1:4012/", {
headers: {
"user-agent": "Googlebot",
"x-forwarded-for": "127.0.0.1",
},
signal: AbortSignal.timeout(5000),
});
t.same(response.status, 200); // localhost is not blocked
}
{
const response = await fetch("http://127.0.0.1:4012/", {
headers: {
"user-agent":
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.4 Safari/605.1.15",
"x-forwarded-for": "1.1.1.1",
},
signal: AbortSignal.timeout(5000),
});
t.same(response.status, 200); // not a protected bot
}
{
const response = await fetch("http://127.0.0.1:4012/", {
headers: {
"user-agent": "Googlebot",
"x-forwarded-for": "66.249.90.77",
},
signal: AbortSignal.timeout(5000),
});
t.same(response.status, 200); // Real Googlebot ip
}
{
const response = await fetch("http://127.0.0.1:4012/", {
headers: {
"user-agent": "Googlebot",
"x-forwarded-for": "1.2.3.4",
},
signal: AbortSignal.timeout(5000),
});
t.same(response.status, 200); // whitelisted ip
}
{
const response = await fetch("http://127.0.0.1:4012/", {
headers: {
"user-agent": "Googlebot",
"x-forwarded-for": "4.3.2.1",
},
signal: AbortSignal.timeout(5000),
});
t.same(response.status, 200); // whitelisted ip
}
})
.catch((error) => {
t.fail(error.message);
})
.finally(() => {
server.kill();
});
});
1 change: 1 addition & 0 deletions library/agent/Agent.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ wrap(fetch, "fetch", function mock() {
pattern: "Bytespider",
},
],
botSpoofingProtection: [],
} satisfies Response),
};
};
Expand Down
2 changes: 2 additions & 0 deletions library/agent/Agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -387,13 +387,15 @@ export class Agent {
blockedIPAddresses,
blockedUserAgents,
allowedIPAddresses,
botSpoofingProtection,
monitoredIPAddresses,
monitoredUserAgents,
userAgentDetails,
} = await fetchBlockedLists(this.token);
this.serviceConfig.updateBlockedIPAddresses(blockedIPAddresses);
this.serviceConfig.updateBlockedUserAgents(blockedUserAgents);
this.serviceConfig.updateAllowedIPAddresses(allowedIPAddresses);
this.serviceConfig.updateBotSpoofingData(botSpoofingProtection);
this.serviceConfig.updateMonitoredIPAddresses(monitoredIPAddresses);
this.serviceConfig.updateMonitoredUserAgents(monitoredUserAgents);
this.serviceConfig.updateUserAgentDetails(userAgentDetails);
Expand Down
39 changes: 38 additions & 1 deletion library/agent/ServiceConfig.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,20 @@ import { IPMatcher } from "../helpers/ip-matcher/IPMatcher";
import { LimitedContext, matchEndpoints } from "../helpers/matchEndpoints";
import { isPrivateIP } from "../vulnerabilities/ssrf/isPrivateIP";
import type { Endpoint, EndpointConfig } from "./Config";
import { IPList, UserAgentDetails } from "./api/fetchBlockedLists";
import {
BotSpoofingData,
IPList,
UserAgentDetails,
} from "./api/fetchBlockedLists";
import { safeCreateRegExp } from "./safeCreateRegExp";

export type ServiceConfigBotSpoofingData = {
key: string;
uaPattern: RegExp;
ips: IPMatcher | undefined;
hostnames: string[];
};

export class ServiceConfig {
private blockedUserIds: Map<string, string> = new Map();
// IP addresses that are allowed to bypass rate limiting, attack blocking, etc.
Expand All @@ -23,6 +34,7 @@ export class ServiceConfig {
allowlist: IPMatcher;
description: string;
}[] = [];
private botSpoofingData: ServiceConfigBotSpoofingData[] = [];
private monitoredIPAddresses: { list: IPMatcher; key: string }[] = [];
private monitoredUserAgentRegex: RegExp | undefined;
private userAgentDetails: { pattern: RegExp; key: string }[] = [];
Expand Down Expand Up @@ -278,4 +290,29 @@ export class ServiceConfig {
hasReceivedAnyStats() {
return this.receivedAnyStats;
}

updateBotSpoofingData(data: BotSpoofingData[]) {
this.botSpoofingData = [];

for (const source of data) {
// Skip empty
if (source.ips.length === 0 && source.hostnames.length === 0) {
continue;
}

const uaPattern = safeCreateRegExp(source.uaPattern, "i");
if (uaPattern) {
this.botSpoofingData.push({
key: source.key,
uaPattern,
ips: new IPMatcher(source.ips),
hostnames: source.hostnames,
});
}
}
}

getBotSpoofingData() {
return this.botSpoofingData;
}
}
13 changes: 13 additions & 0 deletions library/agent/api/fetchBlockedLists.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@ export type IPList = {
ips: string[];
};

export type BotSpoofingData = {
key: string;
uaPattern: string;
ips: string[];
hostnames: string[];
};

export type UserAgentDetails = {
key: string; // e.g. "claudebot"
pattern: string; // e.g. "ClaudeBot" (the regex pattern)
Expand All @@ -20,6 +27,7 @@ export type Response = {
allowedIPAddresses: IPList[];
monitoredIPAddresses: IPList[];
blockedUserAgents: string;
botSpoofingProtection: BotSpoofingData[];
monitoredUserAgents: string;
// `monitoredUserAgents` and `blockedUserAgents` are one big regex pattern
// If we want to collect stats about the individual user agents,
Expand Down Expand Up @@ -54,6 +62,7 @@ export async function fetchBlockedLists(token: Token): Promise<Response> {
allowedIPAddresses: IPList[];
monitoredIPAddresses: IPList[];
blockedUserAgents: string;
botSpoofingProtection: BotSpoofingData[];
monitoredUserAgents: string;
userAgentDetails: UserAgentDetails[];
} = JSON.parse(body);
Expand All @@ -76,6 +85,10 @@ export async function fetchBlockedLists(token: Token): Promise<Response> {
result && typeof result.blockedUserAgents === "string"
? result.blockedUserAgents
: "",
botSpoofingProtection:
result && Array.isArray(result.botSpoofingProtection)
? result.botSpoofingProtection
: [],
// Monitored user agents are stored as a string pattern for usage in a regex (e.g. "ClaudeBot|ChatGPTBot")
monitoredUserAgents:
result && typeof result.monitoredUserAgents === "string"
Expand Down
1 change: 1 addition & 0 deletions library/sources/HTTPServer.stats.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ wrap(fetchBlockedLists, "fetchBlockedLists", function fetchBlockedLists() {
return {
allowedIPAddresses: [],
blockedIPAddresses: [],
botSpoofingProtection: [],
monitoredIPAddresses: [
{
key: "known_threat_actors/public_scanners",
Expand Down
Loading