File:Enwiki LLM blocks.svg
Summary
| Description |
English: English Wikipedia non-IP Accounts Blocked as LLMs |
| Date | |
| Source | Own work (Quarry query) |
| Author | TestUser345 |
| Permission (Reusing this file) |
CC0 public domain |
| Other versions | File:Enwiki LLM blocks.jpg |
Licensing
I, the copyright holder of this work, hereby publish it under the following license:
| This file is made available under the Creative Commons CC0 1.0 Universal Public Domain Dedication. | |
| The person who associated a work with this deed has dedicated the work to the public domain by waiving all of their rights to the work worldwide under copyright law, including all related and neighboring rights, to the extent allowed by law. You can copy, modify, distribute and perform the work, even for commercial purposes, all without asking permission.
|
Python source code
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
# Load CSV
csv_path = "quarry-97983-llm-editor-blocks-on-enwiki-with-edit-counts-by-month.csv"
df = pd.read_csv(csv_path)
df['log_month'] = pd.to_datetime(df['log_month'])
df = df.sort_values('log_month')
months = df['log_month']
block_counts = df['block_count'].astype(float)
total_edits = df['total_edits_of_blocked_users'].astype(float)
# Axes alignment
Lmax = max(2, int(math.ceil(block_counts.max() * 1.1)))
Rmax = 5 * 10**4
if Lmax <= 1:
Lmax = 2
a = np.log10(Rmax) / (Lmax - 1)
Rmin = 10 ** (-a)
# Fits with covariance
x_num = mdates.date2num(months)
coef_b, cov_b = np.polyfit(x_num, block_counts, 1, cov=True)
slope_b, intercept_b = coef_b
slope_b_se = float(np.sqrt(cov_b[0, 0]))
valid = total_edits > 0
coef_e, cov_e = np.polyfit(x_num[valid], np.log(total_edits[valid]), 1, cov=True)
slope_e, intercept_e = coef_e
slope_e_se = float(np.sqrt(cov_e[0, 0]))
p_blocks = np.poly1d([slope_b, intercept_b])
p_edits = np.poly1d([slope_e, intercept_e])
Z = 1.96
def dt_and_ci_from_slope_lin(mean_level, slope, slope_se):
if slope <= 0:
return np.inf, (np.inf, np.inf)
dt = mean_level / slope
dt_se = abs(mean_level) * slope_se / (slope**2)
return dt, (max(dt - Z*dt_se, 0), dt + Z*dt_se)
def dt_and_ci_from_slope_exp(slope, slope_se):
if slope <= 0:
return np.inf, (np.inf, np.inf)
dt = np.log(2) / slope
dt_se = abs(np.log(2)) * slope_se / (slope**2)
return dt, (max(dt - Z*dt_se, 0), dt + Z*dt_se)
dt_b, ci_dt_b = dt_and_ci_from_slope_lin(block_counts.mean(), slope_b, slope_b_se)
dt_e, ci_dt_e = dt_and_ci_from_slope_exp(slope_e, slope_e_se)
def fmt_ci(dt, ci):
return f"{int(round(dt))} days, 95% CI: [{int(round(ci[0]))}, {int(round(ci[1]))}]"
# Plot
fig, ax1 = plt.subplots(figsize=(8, 6))
width_days = 10
magenta_dark, teal_dark = '#8B008B', '#008080'
ax1.bar(months - pd.Timedelta(days=width_days/2), block_counts, width=width_days, color=magenta_dark)
ax1.set_ylabel('Block Count', color=magenta_dark)
ax1.tick_params(axis='y', labelcolor=magenta_dark)
ax1.set_ylim(0, Lmax)
ax2 = ax1.twinx()
ax2.bar(months + pd.Timedelta(days=width_days/2), total_edits, width=width_days, color=teal_dark)
ax2.set_ylabel('Total Edits (Log Scale)', color=teal_dark)
ax2.tick_params(axis='y', labelcolor=teal_dark)
ax2.set_yscale('log')
ax2.set_ylim(Rmin, Rmax)
ax2.set_yticks([10**i for i in range(0, 5)])
ax2.set_yticklabels([r'$10^0$', r'$10^1$', r'$10^2$', r'$10^3$', r'$10^4$'])
xmin = mdates.date2num(datetime.datetime(2022, 3, 1))
xmax = mdates.date2num((months.max() + pd.DateOffset(months=1)).to_pydatetime())
ax1.set_xlim(xmin, xmax)
ax1.set_xlabel('Month')
ax1.set_title('English Wikipedia non-IP Accounts Blocked as LLMs')
ax1.xaxis.set_major_locator(mdates.MonthLocator(interval=3))
ax1.xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m'))
plt.setp(ax1.get_xticklabels(), rotation=45, ha="right")
fig.tight_layout()
x_line = np.linspace(xmin, xmax, 1000)
line_blocks, = ax1.plot(x_line, p_blocks(x_line), "--", color=magenta_dark, linewidth=2.5)
line_edits, = ax2.plot(x_line, np.exp(p_edits(x_line)), "--", color=teal_dark, linewidth=2.5)
handles = [ax1.patches[0], line_blocks, ax2.patches[0], line_edits]
labels = [
"Block Count",
f"Blocks double in {fmt_ci(dt_b, ci_dt_b)}",
"Total Edits",
f"Edits double in {fmt_ci(dt_e, ci_dt_e)}"
]
fig.legend(handles, labels, loc="upper left", bbox_to_anchor=(0, 1), bbox_transform=ax1.transAxes)
fig.savefig('wikipedia_block_stats_latest_ci.svg', format='svg')