Skip to content

Instantly share code, notes, and snippets.

@flipphillips
Forked from unrealwill/collisionLSH.py
Created August 9, 2021 01:31
Show Gist options
  • Save flipphillips/ca863d377741e1043f94f9c7d2fcc93f to your computer and use it in GitHub Desktop.
Save flipphillips/ca863d377741e1043f94f9c7d2fcc93f to your computer and use it in GitHub Desktop.
Proof of Concept : generating collisions on a neural perceptual hash
import tensorflow as tf #We need tensorflow 2.x
import numpy as np
#The hashlength in bits
hashLength = 256
def buildModel():
#we can set the seed to simulate the fact that this network is known and doesn't change between runs
#tf.random.set_seed(42)
model = tf.keras.Sequential()
model.add(tf.keras.Input(shape=(1000)))
model.add(tf.keras.layers.Dense(300,activation=tf.nn.selu))
model.add(tf.keras.layers.Dense(300, activation=tf.nn.selu))
model.add(tf.keras.layers.Dense(300, activation=tf.nn.selu))
#The last layer contains the LSH Random hyperplanes
model.add(tf.keras.layers.Dense(hashLength))
return model
#This use Random projection LSH (aka "HyperPlane LSH") on the features generated by the model
def computeNeuralHash(m, img):
hash = m(img).numpy()[0]
targetstringhash = "".join(["1" if x > 0 else "0" for x in hash])
return targetstringhash
def demo( ):
m = buildModel()
#np.random.seed(340)
targetimg = np.expand_dims(np.random.randn(1000),0)
print(targetimg.shape)
targetstringhash = computeNeuralHash(m,targetimg)
print("targetstringhash : ")
print(targetstringhash)
flip = [ 1.0 if x=="0" else -1.0 for x in targetstringhash]
print( "flip : ")
print(flip)
img = np.expand_dims(np.random.randn(1000), 0)
#to make sure the hash is more stable we add a gap
gap = 0.1
#when the network is trying to have a 1 for the kth bit, it will try to have the feature in the range [gap, +infinity]
#when the network is trying to have a 0 for the kth bit, it will try to have the feature in the range [-infinity,-gap]
#Otherwise it get penalized
loss = 1.0 #we initialize loss so that we take at least one iteration
#we use a standard gradient descent
learning_rate = 1e-2
#we can do better using l-bfgs-b optimizer and handle bounds constraints
#we can also add some additional loss to make the result similar to a provided image
#or use a gan-loss to make it look "natural"
while( loss > gap*gap ):
loss = distanceBetweenHashes( m, img, flip, gap ).numpy()
print("loss : ")
print(loss)
grad = gradient( m,img, flip,gap)
img -= learning_rate * grad
imgstringhash = computeNeuralHash(m,img)
print("img : ")
print( img )
#This is not zero : We have found a totally different image
print("targetimg - img : ")
print(targetimg - img)
print("targetstringhash : ")
print(targetstringhash)
print("imgstringhash : ")
print( imgstringhash)
# We should get True if a collision has been successfully produced
print("targetstringhash == imgstringhash : ")
print(targetstringhash == imgstringhash )
def distanceBetweenHashes( model, input, flip , gap ):
loss = tf.nn.l2_loss(tf.nn.relu(model(input) * flip + gap) )
return loss
def gradient(model, x, flip,gap):
input = tf.convert_to_tensor(x, dtype=tf.float32)
with tf.GradientTape() as t:
t.watch(input)
loss = distanceBetweenHashes( model, input,flip,gap)
return t.gradient(loss, input).numpy()
if __name__ == "__main__":
demo()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment