Published 15 Sep. 2025
Tags: Web
import hashlib def h(path): return hashlib.sha256(open(path,'rb').read()).hexdigest() duplicate = h("cat1.gif") == h("cat2.gif")So when cat1.gif has exactly the same contents as cat2.gif, we know they’re duplicates of each other.
from PIL import Image import imagehash def sample_frames_evenly(gif_path, num_samples=5): frames = [] with Image.open(gif_path) as img: try: frame_count = img.n_frames except: frame_count = 1 if frame_count == 1: # stills... frames.append(img.copy()) return frames if frame_count <= num_samples: frame_indices = list(range(frame_count)) else: step = frame_count / num_samples frame_indices = [int(i * step) for i in range(num_samples)] for frame_idx in frame_indices: img.seek(frame_idx) frames.append(img.copy()) return frames if frames else None def get_gif_frame_hashes(filepath, sample_frames=5): frames = sample_frames_evenly(filepath, sample_frames) if not frames: return None hashes = [] for frame in frames: hashes.append(str(imagehash.average_hash(frame))) return hashesThe next step in processing these is to store which hashes we’ve already seen as we’re looping through each GIF, and then remove any GIF containing a hash we’ve seen before. And hey presto - no more duplicates!
from transformers import pipeline import torch # this example assumes you've got a GPU capable of running the model. # you should also be able to run it on CPU instead, but invoking it # would look a bit different. ## as an aside, my graphics card made a coil whine at a pitch I'd ## never heard it make before while taking on this workload. ## was it... enjoying itself? CLASSIFIER = pipeline( "image-classification", model="AdamCodd/vit-base-nsfw-detector", device=0 # use device=1 if your GPU is not the first one ) def is_nsfw(gif_path, num_frames=5): # from the previous example frames = sample_frames_evenly(gif_path, num_frames) if not frames: return False # gotta make sure frames are RGB for our classifier rgb_frames = [] for frame in frames: if frame.mode != 'RGB': frame = frame.convert('RGB') rgb_frames.append(frame) frames = rgb_frames max_nsfw_score = 0 for frame in frames: results = CLASSIFIER(frame) for result in results: if result['label'] == "NSFW": max_nsfw_score = max(max_nsfw_score, result['score']) return max_nsfw_score >= 0.4Something that this doesn’t catch is NSFW text inside GIFs. For example, there are a few banners in the dataset with BIG letters making declarations like “I LOVE ****”.
?safe=no
query param: gifs.alex.works?safe=no. Don’t say I didn’t warn you!)
const GIF_URL = 'goku.gif' const BASE_ROW_HEIGHT = 60 const PADDING = 8 let rows = [] let panX = 0 let sourceImg function preload() { sourceImg = loadImage(GIF_URL) } function setup() { createCanvas(windowWidth, windowHeight) buildRows() fillInitialCells() } function buildRows() { rows = [] let y = 0 while (y < height) { // add some visual interest by randomising height of the // row, as well as its panning speed multiplier const h = BASE_ROW_HEIGHT + random(0, 50) rows.push({y, height: h, speedMul: random(1, 2.5), offsetX: 0, cells: []}) y += h + PADDING } } function addCell(row) { const aspect = sourceImg.width / sourceImg.height const w = Math.floor(row.height * aspect) row.cells.push({width: w, img: sourceImg}) } function fillInitialCells() { // fill a little beyond screen width for smoother start rows.forEach(row => { while (rowWidth(row) < width * 1.2) addCell(row) }) } function rowWidth(row) { return row.cells.reduce((sum, c, i) => sum + c.width + (i > 0 ? PADDING : 0), 0) } function draw() { background(0) if (!sourceImg) return panX += PAN_PER_FRAME rows.forEach(row => { const rowPan = panX * row.speedMul // need another cell appearing on the right? if (row.offsetX + rowWidth(row) < width + rowPan) { addCell(row) removeOffscreen(row, rowPan) } push() translate(-rowPan, row.y) let x = row.offsetX row.cells.forEach(cell => { image(cell.img, x, 0, cell.width, row.height) x += cell.width + PADDING }) pop() }) } function removeOffscreen(row, rowPan) { // recycle cells fully scrolled past the left edge while (row.cells.length) { const first = row.cells[0] const firstRight = row.offsetX + first.width if (firstRight < rowPan) { row.offsetX += first.width + PADDING row.cells.shift() } else { break } } }Run this on p5.js’s online editor.
sketch.js
file to see how this works.const GIF_URL = 'goku.gif' let gifImg let buffer, crt function preload() { gifImg = loadImage(GIF_URL) } function setup() { createCanvas(windowWidth, windowHeight) initBuffer() } function initBuffer() { // create a buffer at a max width of 1920 for our draws. we don't // want to exceed this width because otherwise too many GIFs will // be loaded at once and we'll tank performance. const bw = min(windowWidth, 1920) const scale = windowWidth / bw const bh = Math.floor(windowHeight / scale) buffer = createGraphics(bw, bh, WEBGL) buffer.pixelDensity(1) // instantiate the shader crt = buffer.createFilterShader(CRT_SHADER_SRC) } function draw() { buffer.background(0) // tile the gif to fill the buffer for a prettier example if (gifImg) { const tileW = gifImg.width const tileH = gifImg.height buffer.push() buffer.imageMode(CORNER) // note: WEBGL origin is center, so iterate from -width/2,-height/2 const startX = -buffer.width / 2 const startY = -buffer.height / 2 for (let ty = startY; ty < buffer.height / 2; ty += tileH) { for (let tx = startX; tx < buffer.width / 2; tx += tileW) { buffer.image(gifImg, tx, ty, tileW, tileH) } } buffer.pop() } // apply the shader if (crt) buffer.filter(crt) background('black') // draw the image back to the main buffer (the onscreen canvas) // and scale it so it fits image(buffer, 0, 0, width, height) } // https://babylonjs.medium.com/retro-crt-shader-a-post-processing-effect-study-1cb3f783afbc const CRT_SHADER_SRC = ` precision highp float; uniform sampler2D tex0; varying vec2 vTexCoord; vec2 curveRemapUV(vec2 uv) { // as we near the edge of our screen apply greater distortion using a cubic function uv = 2.0 * uv - 1.0; vec2 curvature = vec2(6.0); vec2 offset = abs(uv.yx) / curvature; uv = uv + uv * offset * offset; uv = uv * 0.5 + 0.5; return uv; } vec4 adjBrightness(vec2 inUV, vec4 clr) { float r = 0.5; vec2 cornerUV = min(2.0 * (0.5 - abs(inUV - vec2(0.5))) + r, 1.0); float br = cornerUV.x * cornerUV.y + 0.15; br = pow(cornerUV.x * cornerUV.y, 2.2) + 0.45; br = clamp(br * br * br * br + 0.55, 0.0, 1.0); return clr * br; } void main() { vec2 remappedUV = curveRemapUV(vTexCoord); vec4 baseColor = texture2D(tex0, remappedUV); if (remappedUV.x < 0.0 || remappedUV.y < 0.0 || remappedUV.x > 1.0 || remappedUV.y > 1.0){ gl_FragColor = vec4(0.0, 0.0, 0.0, 1.0); } else { gl_FragColor = adjBrightness(vTexCoord, baseColor); } gl_FragColor *= abs(sin(remappedUV.y * 1024.0)); gl_FragColor.a = 1.0; } `Run this on p5.js’s online editor.
?shader=no
query param on the site if you want to see what it looks like without the shader: gifs.alex.works?shader=no
let stars = [] function setup() { createCanvas(windowWidth, windowHeight) initStars() } function draw() { background('black') drawStars() } function initStars() { const maxStars = 2000 const density = 1000 // bigger = fewer stars const target = Math.min((width * height) / density, maxStars) for (let i = 0; i < target; i++) { stars.push({x: random(0, width), y: random(0, height), speed: random(0.1, 0.5), size: random(0.5, 3)}) } } function drawStars() { stroke(255, 255, 255, 150) strokeWeight(2) beginShape(POINTS) stars.forEach(s => { s.x -= s.speed if (s.x < 0) { s.x = width s.y = random(0, height) } vertex(s.x, s.y) }) endShape() }Run this on p5.js’s online editor.
let data, i = 0, ok = 0, bad = 0; function preload() { data = loadJSON('load from gifs api'); // Replace with actual API call } function setup() { createCanvas(600, 200); next(); } function next() { if (!data || i >= data.urls.length) return; const url = data.urls[i++]; loadImage(url, img => { ok++; schedule(); }, _err => { bad++; // here's where i made a request to backend // to mark gif as invalid schedule(); } ); } function schedule() { setTimeout(next, 10); } function draw() { if (!data) return; background(0); fill(255); textAlign(CENTER, CENTER); const total = data.urls.length; text(`Checked ${i}/${total}`, width / 2, height / 2 - 20); text(`valid ${ok} invalid ${bad}`, width / 2, height / 2 + 10); }Run this on p5.js’s online editor.
Leave a comment and tell me what you think of my totally rad page!