File:Enwiki LLM blocks.svg

Summary

Description
English: English Wikipedia non-IP Accounts Blocked as LLMs
Date
Source Own work (Quarry query)
Author TestUser345
Permission
(Reusing this file)
CC0 public domain
Other versions File:Enwiki LLM blocks.jpg

Licensing

I, the copyright holder of this work, hereby publish it under the following license:
Creative Commons CC-Zero This file is made available under the Creative Commons CC0 1.0 Universal Public Domain Dedication.
The person who associated a work with this deed has dedicated the work to the public domain by waiving all of their rights to the work worldwide under copyright law, including all related and neighboring rights, to the extent allowed by law. You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.

Category:CC-Zero#Enwiki%20LLM%20blocks.svgCategory:Self-published work

Python source code

import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime

# Load CSV
csv_path = "quarry-97983-llm-editor-blocks-on-enwiki-with-edit-counts-by-month.csv"
df = pd.read_csv(csv_path)
df['log_month'] = pd.to_datetime(df['log_month'])
df = df.sort_values('log_month')

months = df['log_month']
block_counts = df['block_count'].astype(float)
total_edits = df['total_edits_of_blocked_users'].astype(float)

# Axes alignment
Lmax = max(2, int(math.ceil(block_counts.max() * 1.1)))
Rmax = 5 * 10**4
if Lmax <= 1:
    Lmax = 2
a = np.log10(Rmax) / (Lmax - 1)
Rmin = 10 ** (-a)

# Fits with covariance
x_num = mdates.date2num(months)

coef_b, cov_b = np.polyfit(x_num, block_counts, 1, cov=True)
slope_b, intercept_b = coef_b
slope_b_se = float(np.sqrt(cov_b[0, 0]))

valid = total_edits > 0
coef_e, cov_e = np.polyfit(x_num[valid], np.log(total_edits[valid]), 1, cov=True)
slope_e, intercept_e = coef_e
slope_e_se = float(np.sqrt(cov_e[0, 0]))

p_blocks = np.poly1d([slope_b, intercept_b])
p_edits = np.poly1d([slope_e, intercept_e])

Z = 1.96

def dt_and_ci_from_slope_lin(mean_level, slope, slope_se):
    if slope <= 0:
        return np.inf, (np.inf, np.inf)
    dt = mean_level / slope
    dt_se = abs(mean_level) * slope_se / (slope**2)
    return dt, (max(dt - Z*dt_se, 0), dt + Z*dt_se)

def dt_and_ci_from_slope_exp(slope, slope_se):
    if slope <= 0:
        return np.inf, (np.inf, np.inf)
    dt = np.log(2) / slope
    dt_se = abs(np.log(2)) * slope_se / (slope**2)
    return dt, (max(dt - Z*dt_se, 0), dt + Z*dt_se)

dt_b, ci_dt_b = dt_and_ci_from_slope_lin(block_counts.mean(), slope_b, slope_b_se)
dt_e, ci_dt_e = dt_and_ci_from_slope_exp(slope_e, slope_e_se)

def fmt_ci(dt, ci):
    return f"{int(round(dt))} days, 95% CI: [{int(round(ci[0]))}, {int(round(ci[1]))}]"

# Plot
fig, ax1 = plt.subplots(figsize=(8, 6))
width_days = 10
magenta_dark, teal_dark = '#8B008B', '#008080'

ax1.bar(months - pd.Timedelta(days=width_days/2), block_counts, width=width_days, color=magenta_dark)
ax1.set_ylabel('Block Count', color=magenta_dark)
ax1.tick_params(axis='y', labelcolor=magenta_dark)
ax1.set_ylim(0, Lmax)

ax2 = ax1.twinx()
ax2.bar(months + pd.Timedelta(days=width_days/2), total_edits, width=width_days, color=teal_dark)
ax2.set_ylabel('Total Edits (Log Scale)', color=teal_dark)
ax2.tick_params(axis='y', labelcolor=teal_dark)
ax2.set_yscale('log')
ax2.set_ylim(Rmin, Rmax)

ax2.set_yticks([10**i for i in range(0, 5)])
ax2.set_yticklabels([r'$10^0$', r'$10^1$', r'$10^2$', r'$10^3$', r'$10^4$'])

xmin = mdates.date2num(datetime.datetime(2022, 3, 1))
xmax = mdates.date2num((months.max() + pd.DateOffset(months=1)).to_pydatetime())
ax1.set_xlim(xmin, xmax)
ax1.set_xlabel('Month')
ax1.set_title('English Wikipedia non-IP Accounts Blocked as LLMs')
ax1.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.setp(ax1.get_xticklabels(), rotation=45, ha="right")

fig.tight_layout()

x_line = np.linspace(xmin, xmax, 1000)
line_blocks, = ax1.plot(x_line, p_blocks(x_line), "--", color=magenta_dark, linewidth=2.5)
line_edits, = ax2.plot(x_line, np.exp(p_edits(x_line)), "--", color=teal_dark, linewidth=2.5)

handles = [ax1.patches[0], line_blocks, ax2.patches[0], line_edits]
labels = [
    "Block Count",
    f"Blocks double in {fmt_ci(dt_b, ci_dt_b)}",
    "Total Edits",
    f"Edits double in {fmt_ci(dt_e, ci_dt_e)}"
]
fig.legend(handles, labels, loc="upper left", bbox_to_anchor=(0, 1), bbox_transform=ax1.transAxes)

fig.savefig('wikipedia_block_stats_latest_ci.svg', format='svg')
Category:English Wikipedia editor statistics Category:English Wikipedia edit counts Category:Large language models Category:Charts showing data through 2025 Category:Wikimedia projects and large language models Category:Created with Matplotlib
Category:CC-Zero Category:Charts showing data through 2025 Category:Created with Matplotlib Category:English Wikipedia edit counts Category:English Wikipedia editor statistics Category:Large language models Category:Self-published work Category:Wikimedia projects and large language models