User:Bawolff/nsfw.js

/**
 * Hides NSFW images under a spoiler. Image status determined by P14416 property.
 * This gadget can be used both for Wikimedia Commons and for other wiki projects.
 *
 * For now this will censor any image with P14416. A future version may allow customizing which values get censored.
 * It intentionally does hide the image on its own image description page.
 *
 * Known issue: There is a brief delay before censoring the image. There is an alternative experimental script at [[User:Bawolff/nsfw-sw.js]] which does not have that issue.
 *
 * For best results, use a slightly non-standard install script. This helps reduce the time between page load and blurring. See [[User:Bawolff/nsfw]] for install instructions.
 *
 * There is also [[User:Bawolff/nsfw-old.js]] if you object to having a toolforge dependency, but it takes much longer during load to blur the images.
 * @author putnik, 2019-2020
 * @author bawolff 2026 (Modified to use P14416 instead of depicts)
 * [[Category:Commons user scripts]]
 */


( function ( mw, $ ) {

// We need an implementation of xxHash64 to make this work.
// Adapted from the xxh64 implementation from xxh3-ts project by i404788 which is licensed under the 2-clause BSD license
// https://github.com/i404788/xxh3-ts/blob/master/xxh64.ts

const PRIME64_1 = 11400714785074694791n;
const PRIME64_2 = 14029467366897019727n;
const PRIME64_3 = 1609587929392839161n;
const PRIME64_4 = 9650029242287828579n;
const PRIME64_5 = 2870177450012600261n;
const mask64 = (1n << 64n) - 1n;

// Basically (byte*)buf + offset
function getView(buf/*: Buffer*/, offset/*: number = 0*/)/*: Buffer*/ {
	if ( offset === undefined ) offset = 0;
    return new DataView(buf.buffer, buf.byteOffset + offset, buf.byteLength - offset)
}

function Rotl64(a/*: bigint*/, b/*: bigint*/) {
    return (a << b) | (a >> (64n - b)) & mask64
}

function round(acc/*: bigint*/, lane/*: bigint*/) {
    acc = (acc + (lane * PRIME64_2)) & mask64;
    acc = Rotl64(acc, 31n) & mask64;
    return (acc * PRIME64_1) & mask64;
}

function XH64_mergeAccumulator(acc/*: bigint*/, accN/*: bigint*/) {
    acc = acc ^ round(0n, accN);
    acc = (acc * PRIME64_1) & mask64
    return (acc + PRIME64_4) & mask64;
}

function XH64_convergeAccumulator(accs/*: BigUint64Array*/)/*: bigint*/ {
    let acc = Rotl64(accs[0], 1n) + Rotl64(accs[1], 7n) + Rotl64(accs[2], 12n) + Rotl64(accs[3], 18n);
    acc = XH64_mergeAccumulator(acc, accs[0]);
    acc = XH64_mergeAccumulator(acc, accs[1]);
    acc = XH64_mergeAccumulator(acc, accs[2]);
    acc = XH64_mergeAccumulator(acc, accs[3]);
    return acc
}

function XH64_accumulateRemainder(data/*: Buffer*/, acc/*: bigint*/)/*: bigint*/ {
    let offset = 0
    while (data.byteLength - offset >= 8) {
        let lane = data.getBigUint64(offset, true);
        acc = acc ^ round(0n, lane);
        acc = Rotl64(acc, 27n) * PRIME64_1;
        acc = (acc + PRIME64_4) & mask64;
        offset += 8;
    }

    if (data.byteLength - offset >= 4) {
        let lane = BigInt(data.getUint32(offset, true));
        acc = (acc ^ (lane * PRIME64_1)) & mask64;
        acc = (Rotl64(acc, 23n) * PRIME64_2) & mask64;
        acc = (acc + PRIME64_3) & mask64;
        offset += 4;
    }

    while (data.byteLength - offset >= 1) {
        let lane = BigInt(data.getUint8(offset));
        acc = (acc ^ (lane * PRIME64_5)) & mask64;
        acc = (Rotl64(acc, 11n) * PRIME64_1) & mask64;
        offset += 1;
    }
    return acc
}

function XH64_accumulate(data/*: Buffer*/, accs/*: BigUint64Array*/) {
    const fullStripes = Math.floor(data.byteLength / 32)
    for (let i = 0; i < fullStripes; i++) {
        for (let j = 0; j < 4; j++) {
            let lane = data.getBigUint64(i * 32 + j * 8, true)
            accs[j] = round(accs[j], lane)
        }
    }

    let acc = XH64_convergeAccumulator(accs)
    acc += BigInt(data.byteLength)

    if (fullStripes != data.byteLength / 32) {
        acc = XH64_accumulateRemainder(getView(data, fullStripes * 32), acc)
    }

    return XH64_mix(acc)
}

function XH64_mix(acc/*: bigint*/) {
    acc = acc ^ (acc >> 33n);
    acc = (acc * PRIME64_2) & mask64;
    acc = acc ^ (acc >> 29n);
    acc = (acc * PRIME64_3) & mask64;
    acc = acc ^ (acc >> 32n);
    return acc
}

function XXH64_small(data/*: Buffer*/, seed/*: bigint*/) {
    let acc = (seed + PRIME64_5) & mask64;
    acc += BigInt(data.byteLength);
    acc = XH64_accumulateRemainder(data, acc)
    return XH64_mix(acc)
}

// Modified to take a string
function XXH64(input, seed/*: bigint = n(0)*/) {

	const encoder = new TextEncoder();
	const uint8Array = encoder.encode(input);
	const data = new DataView( uint8Array.buffer )
	if (!seed) seed = 0n;
    if (data.byteLength < 32) return XXH64_small(data, seed)
    const acc = new BigUint64Array([
        seed + PRIME64_1 + PRIME64_2,
        seed + PRIME64_2,
        seed,
        seed - PRIME64_1
    ])

    return XH64_accumulate(data, acc)
}

// end xxh3-ts/xxh64.ts


	/**
	 * XorFilter.
	 * This is adapted from https://github.com/FastFilter/xor_singleheader which is apache licensed
     */
	var XorFilter = function( seed, blocksize, fingerprints ) {
		this.seed = seed; /* bigint */
		this.blocksize = blocksize; /* number */
		this.fingerprints = fingerprints; /* Uint16Array */
	}
	XorFilter.prototype = {
		contains: async function(key) {
			var hash = XXH64( key );
			return this.contains_hash( BigInt( "0x" + hash ) )
		},
		// The mix function from murmur64.
		hash: function (h) {
			h ^= h >> 33n;
			h = (h * 0xff51afd7ed558ccdn) & 0xFFFFFFFFFFFFFFFFn;
			h ^= h >> 33n;
			h = (h * 0xc4ceb9fe1a85ec53n) & 0xFFFFFFFFFFFFFFFFn;
			h ^= h >> 33n;
			return h;
		},
		contains_hash: function (hash_in) {
			var hash = this.mix_split(hash_in); /* 64 bit */
			var f = this.fingerprint(hash) /* 16 bit */
			var r0 = Number(hash & 0xFFFFFFFFn)
			var r1 = Number(this.rotl64(hash, 21n));
			var r2 = Number(this.rotl64(hash, 42n));
			var h0 = this.reduce( r0, this.blocksize )
			var h1 = this.reduce( r1, this.blocksize ) + this.blocksize
			var h2 = this.reduce( r2, this.blocksize ) + this.blocksize*2
			return f === (this.fingerprints[h0] ^ this.fingerprints[h1] ^ this.fingerprints[h2]);
		},
		fingerprint: function(hash) {
			return Number((hash ^ (hash>>32n)) & 0xFFFFn);
		},
		mix_split: function( hash ) {
			return this.hash((hash+this.seed) & 0xFFFFFFFFFFFFFFFFn);
		},
		rotl64: function( n, c ) {
			return ((( n << (c & 63n)) | n >> ((-c) & 63n))) & 0xFFFFFFFFn;
		},
		reduce: function( hash, n ) {
			return Number((BigInt(hash) * BigInt(n)) >> 32n);
		},
	}
	// End xorfilter code

	var imagesData = [];
	var $images;
	var isCommons = 'commonswiki' === mw.config.get( 'wgDBname' );
	var commonsApi;
	var stopProcessing = false;
	var firstRun = true;
	var xor;

	mw.util.addCSS( `
		.nsfw {
			display: inline-block;
			vertical-align: top;
			position: relative;
			overflow: hidden;
		}

		.nsfw .mw-tmh-play {
			display: none;
		}

		.nsfw img, .nsfw video, .nsfw-gadget-pending {
			filter: blur(12px);
		}

		.nsfw::before {
			content: "NSFW";
			position: absolute;
			left: 50%;
			top: 50%;
			z-index: 1;
			background: #fff;
			border-radius: 2em;
			height: 3em;
			line-height: 3em;
			width: 6em;
			margin-left: -3em;
			margin-top: -1.5em;
			opacity: .6;
			color: #222;
			text-align: center;
			cursor: pointer; 
		}

		.nsfw:hover::before {
			opacity: .8;
		}
	` );

	// Initial pass. May have some false positives. Try and do this as early as possible.
	// This may run before page is fully loaded. The hope is that at least the DOM of the initial paint is loaded.
	var earlyHeuristicPromise = fetch( 'https://commons-nsfw-filter.toolforge.org/filters/p14416.xor16.bin' ).then( (x) => x.arrayBuffer() ).then( (buffer) => {
		var dataview = new DataView(buffer)
		var blocksize = Number( dataview.getBigUint64(8, true))
		xor = new XorFilter(
			dataview.getBigUint64(0, true),
			blocksize,
			new Uint16Array( buffer.slice( 16, 6*blocksize ) )
		);
		return xor;
	} ).then( async (xor) => {
		var $imgs = $( '[typeof^="mw:File"] img, [typeof^="mw:File"] video, .sdms-image-result img, .sdms-video-result img, .searchResultImage-thumbnail img, .searchResultImage-thumbnail video' );
		for ( var i = 0; i < $imgs.length; i++ ) {
			var img = $imgs[i];
			var src = img.getAttribute( 'src' ) || img.getAttribute( 'data-src' ) || img.getAttribute( 'data-mwtitle' );
			if ( !src || src.indexOf( 'upload.wikimedia.org' ) === -1 ) {
				continue;
			}
			var filename = parseImageName( src ).replace( / /g, '_' );
			var potentialNSFW = await xor.contains( filename );
			if ( potentialNSFW ) {
				// Probably NSFW, but small chance of being a false positive. Also could be out of date data.
				img.classList.add( 'nsfw-gadget-pending' );
			}
		}
	} );

	// Handle new content being added to the page
	var hookHandler = function ($elm) {
		if (firstRun && $elm[0].id === 'mw-content-text') {
			// We already did the initial page view
			firstRun = false;
			return;
		}
		if ($elm.find( '[typeof^="mw:File"]:not(.nsfw), .sdms-image-result:not(.nsfw), .sdms-video-result:not(.nsfw), .searchResultImage-thumbnail:not(.nsfw)' ).length ) {
			findImages();
		}
	}

	// A bit hacky. See T428302. MediaSearch dynamically loads new images, which we need to get.
	var mediaSearchObserver = function() {
		var mutObserver = new MutationObserver(() => findImages());
		$( '.sdms-search-results__list' ).each( function () {
			mutObserver.observe( this, { childList: true } );
		} );
	}

	var init = function() {
		var agent = { userAgent: 'nsfw-gadget/0.0 (https://commons.wikimedia.org/wiki/User:Bawolff/nsfw.js)' };
		commonsApi = isCommons ? new mw.Api(agent) : new mw.ForeignApi( '//commons.wikimedia.org/w/api.php', agent );
		findImages();
		mw.hook( 'wikipage.content' ).add(hookHandler);
		mediaSearchObserver();
	};

	var findImages = function() {
		if ( stopProcessing ) {
			return;
		}
		$images = $( '[typeof^="mw:File"]:not(.nsfw), .sdms-image-result:not(.nsfw), .sdms-video-result:not(.nsfw), .searchResultImage-thumbnail:not(.nsfw)' ).filter( function() {
			var $img = $( this ).find( 'img' );
			var $video = $( this ).find( 'video' );
			return $video.attr( 'data-mwtitle' ) || $( this ).find( 'img' ).attr( 'data-src' ) || ( $img.width() * $img.height() >= 4000 && $img.attr( 'src' ) && $img.attr( 'src' ).match( 'upload.wikimedia.org' ));
		} );

		var imageTitles = $images.map( function() {
			var src = $( this ).find( 'img' ).attr( 'src' ) || $( this ).find( 'img' ).attr( 'data-src' ) || $( this ).find( 'video' ).attr( 'data-mwtitle' );
			if ( typeof src !== "string" ) {
				// Not clear why this would happen, but users are reporting it.
				return undefined;
			}
			var imgName = parseImageName( src );
			if ( xor.contains( imgName.replace( / /g, '_' ) ) ) {
				$( this ).find( 'img, video' ).addClass( 'nsfw-gadget-pending' );
			}

			return 'File:' + imgName;
		} ).toArray();
		
		for ( var offset = 0; offset < imageTitles.length; offset += 50 ) {
			loadProp( imageTitles.slice( offset, offset + 50 ) );
		}
	};

	var loadProp = function( imageTitles ) {
		commonsApi.get( {
			action: 'wbgetentities',
			props: [ 'info', 'claims' ],
			sites: 'commonswiki',
			titles: imageTitles,
			// Cache for a few minutes to avoid overwhelming servers on repeated requests. Not clear if this still works for logged in users.
			uselang: "en",
			smaxage: "200",
			maxage: "200",
		} ).done( function ( data ) {
			if ( data.entities === undefined ) {
				return;
			}
			for ( var pageMid in data.entities ) {
				var entity = data.entities[ pageMid ];
				if ( entity.statements === undefined || entity.statements.P14416 === undefined ) {
					continue;
				}
				imagesData.push( {
					mid: pageMid,
					title: entity.title,
					contentDescriptor: entity.statements.P14416.map( function( value ) {
						return value.mainsnak.datavalue.value.id;
					} ),
				} );
			}
			hideNsfwImages();
		} );
	};

	var hideNsfwImages = function() {

		var imageNames = [];

		for ( var i in imagesData ) {
			if ( imagesData[ i ].contentDescriptor.length > 0 ) {
				imageNames.push( imagesData[ i ].title.replace( /^File:/, '' ) );
			}
		}

		$images = $( '[typeof^="mw:File"], .sdms-image-result, .sdms-video-result, .searchResultImage-thumbnail' ).each( function() {
			var $image = $( this );
			var src = $image.find( 'img' ).attr( 'src' ) || $( this ).find( 'img' ).attr( 'data-src' ) || $( this ).find( 'video' ).attr( 'data-mwtitle' );
			if ( !src ) {
				return;
			}
			var name = parseImageName( src );
			if ( -1 !== imageNames.indexOf( name ) ) {
				$image.addClass( 'nsfw' );
			}
		} );

		var nsfw = $( '[typeof^="mw:File"].nsfw, .sdms-image-result.nsfw, .sdms-video-result.nsfw, .searchResultImage-thumbnail.nsfw' )
			.off( 'click' )
			.one( 'click', onClick );

		if ( nsfw.length >= 1 && !document.querySelector( '#p-nsfw-reveal' ) ) {
			$( mw.util.addPortletLink( 'p-tb', '#', 'Reveal all NSFW media', 'p-nsfw-reveal' ) ).on( 'click', () => {
				stopProcessing = true;
				$( '.nsfw' ).removeClass( 'nsfw' );
				$( '#p-nsfw-reveal' ).remove();
			} );
		}

		$( '[typeof^="mw:File"].nsfw img, [typeof^="mw:File"].nsfw video, .sdms-image-result.nsfw img, .sdms-video-result.nsfw img, .searchResultImage-thumbnail.nsfw img, .searchResultImage-thumbnail.nsfw video' ).one( 'click', onClickImg );


		// It seemed like there was sometimes a flicker when switching between the classes, so put a tiny delay
		window.setTimeout( () => { $( '.nsfw-gadget-pending' ).removeClass( 'nsfw-gadget-pending' ) }, 25 );
	};
	
	var parseImageName = function( src ) {
		var name = src.replace( /\/(?:lossless-page\d+-|lang[a-z]+-)?\d+px--?[^/]+$/, '' );
		name = name.replace( /^.*\/([^/?]+)(?:\?.*)?$/, '$1' );
		return decodeURIComponent( name.replace( /_/g, ' ' ) );
	};

	var onClick = function( e ) {
		if ( $( this ).hasClass( 'nsfw' ) ) {
			e.preventDefault();
			e.stopPropagation();
			e.stopImmediatePropagation();
			$( this ).removeClass( 'nsfw' );
		}
	};

	var onClickImg = function( e ) {
		var parents = $( this ).parents( '.nsfw' );
		if ( parents.length === 1 ) {
			e.preventDefault();
			e.stopPropagation();
			e.stopImmediatePropagation();
			parents.removeClass( 'nsfw' );
		}
	};

	$.when(
		$.ready,
		earlyHeuristicPromise,
		mw.loader.using( [
			( isCommons ? 'mediawiki.api' : 'mediawiki.ForeignApi' ),
			'mediawiki.util',
		] )
	).done( init );
}( mediaWiki, jQuery ) );
Category:Commons user scripts