srctree

Gregory Mullen parent 01126b1a 2aed4319
add real bot detection subsystem

google bot is so well behaved it's just noise, and I'd like to be ableto ignore it's requests in my logs :)

inlinesplit
src/bot-detection.zig added: 62, removed: 11, total 51
@@ -22,6 +22,7 @@ const default_malicious: BotDetection = .{
};
 
pub const ANOMALY_MAX: f16 = 0.5;
pub const BOT_DEVIANCE: f16 = 0.2;
 
pub fn init(r: *const Request) BotDetection {
if (r.user_agent == null) return .default_malicious;
@@ -36,9 +37,12 @@ pub fn init(r: *const Request) BotDetection {
switch (ua.resolved) {
.bot => {
bot.bot = true;
inline for (rules.bots) |rule| {
inline for (rules.bot) |rule| {
rule(ua, r, &bot.score) catch @panic("not implemented");
}
// the score of something actively identifying itself as a bot
// is only related to it's malfeasance
bot.malicious = bot.score >= BOT_DEVIANCE;
},
.browser => |browser| {
inline for (rules.browser) |rule| {
@@ -66,6 +70,7 @@ pub fn init(r: *const Request) BotDetection {
 
const RuleError = error{
Generic,
NotABot,
};
 
const RuleFn = fn (UA, *const Request, *f16) RuleError!void;
@@ -78,8 +83,8 @@ const rules = struct {
browsers.Rules.protocolVer,
browsers.Rules.acceptStr,
};
const bots = [_]RuleFn{
//
const bot = [_]RuleFn{
bots.Rules.knownSubnet,
};
};
 
 
src/bot-detection/bots.zig added: 62, removed: 11, total 51
@@ -7,6 +7,18 @@ pub const Rules = struct {
_ = score;
if (false) {}
}
 
pub fn knownSubnet(ua: UA, r: *const Request, score: *f16) !void {
if (ua.resolved != .bot) return error.NotABot;
 
if (bots.get(ua.resolved.bot.name).network) |nw| {
for (nw.nets) |net| {
if (startsWith(u8, r.remote_addr, net)) break;
} else {
if (nw.exaustive) score.* = @max(score.*, 1.0);
}
}
}
};
 
pub const TxtRules = struct {
@@ -15,5 +27,42 @@ pub const TxtRules = struct {
delay: bool = false,
};
 
/// This isn't the final implementation, I'm just demoing some ideas
pub const Network = struct {
nets: []const []const u8,
exaustive: bool = true,
};
 
pub const Identity = struct {
bot: Bots,
network: ?Network,
};
 
pub const Bots = enum {
googlebot,
unknown,
 
pub const fields = @typeInfo(Bots).@"enum".fields;
pub const len = fields.len;
};
 
pub const bots: std.EnumArray(Bots, Identity) = .{
.values = [Bots.len]Identity{
.{
.bot = .googlebot,
.network = Network{
// Yes, I know strings are the stupid way of doing this, this is
// "temporary"
.nets = &[_][]const u8{
"66.249",
},
},
},
.{ .bot = .unknown, .network = null },
},
};
 
const UA = @import("../user-agent.zig");
const Request = @import("../request.zig");
const std = @import("std");
const startsWith = std.mem.startsWith;
 
src/user-agent.zig added: 62, removed: 11, total 51
@@ -183,12 +183,9 @@ test Resolved {
}
 
pub const Bot = struct {
name: Name = .unknown,
name: Bots = .unknown,
 
pub const Name = enum {
googlebot,
unknown,
};
pub const Bots = BotDetection.bots.Bots;
 
pub const unknown: Bot = .{ .name = .unknown };
};