Commit 5b11049d authored by Steve Tjoa's avatar Steve Tjoa

cleaned up lsh

parent 06787859
{
"metadata": {
"name": "",
"signature": "sha256:5d2db016d8cce3a76c015e44ee4f8e4c94a94e3dbfbc27ad7e4bab24b8b01c31"
"signature": "sha256:d387e173a98616453a4425699fda77d47ae87ed7864fdc28e3bbb8283182abea"
},
"nbformat": 3,
"nbformat_minor": 0,
......@@ -34,8 +34,7 @@
"cell_type": "code",
"collapsed": false,
"input": [
"from sklearn.random_projection import GaussianRandomProjection\n",
"import essentia.standard as ess\n",
"import librosa\n",
"import os\n",
"import os.path"
],
......@@ -92,8 +91,8 @@
"cell_type": "code",
"collapsed": false,
"input": [
"def hash_func(vecs, model):\n",
" bools = model.transform(vecs) > 0\n",
"def hash_func(vecs, projections):\n",
" bools = dot(vecs, projections.T) > 0\n",
" return [bool2int(bool_vec) for bool_vec in bools]"
],
"language": "python",
......@@ -120,8 +119,9 @@
"cell_type": "code",
"collapsed": false,
"input": [
"model = GaussianRandomProjection(n_components=3)\n",
"model.fit(randn(10000, 8))"
"X = randn(10,100)\n",
"P = randn(3,100)\n",
"hash_func(X, P)"
],
"language": "python",
"metadata": {},
......@@ -131,33 +131,12 @@
"output_type": "pyout",
"prompt_number": 6,
"text": [
"GaussianRandomProjection(eps=0.1, n_components=3, random_state=None)"
"[5, 2, 1, 3, 5, 1, 7, 2, 4, 4]"
]
}
],
"prompt_number": 6
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"X = randn(12, 8)\n",
"hash_func(X, model)"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 7,
"text": [
"[3, 5, 2, 6, 0, 0, 6, 1, 0, 2, 1, 7]"
]
}
],
"prompt_number": 7
},
{
"cell_type": "heading",
"level": 3,
......@@ -175,10 +154,7 @@
" def __init__(self, hash_size, dim):\n",
" self.table = dict()\n",
" self.hash_size = hash_size\n",
" self.dim = dim # TODO is this necessary?\n",
" #self.projections = randn(self.hash_size, self.dim)\n",
" self.projections = GaussianRandomProjection(n_components=hash_size)\n",
" self.projections.fit(randn(10000, dim))\n",
" self.projections = randn(self.hash_size, dim)\n",
"\n",
" def add(self, vecs, label):\n",
" entry = {'label': label}\n",
......@@ -200,39 +176,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 8
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"t = Table(2, 8)\n",
"t.add(randn(5, 8), '1')\n",
"t.add(randn(5, 8), '2')\n",
"t.query(randn(2,8))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"metadata": {},
"output_type": "pyout",
"prompt_number": 9,
"text": [
"[{'label': '1'},\n",
" {'label': '1'},\n",
" {'label': '1'},\n",
" {'label': '2'},\n",
" {'label': '2'},\n",
" {'label': '1'},\n",
" {'label': '1'},\n",
" {'label': '1'},\n",
" {'label': '2'},\n",
" {'label': '2'}]"
]
}
],
"prompt_number": 9
"prompt_number": 7
},
{
"cell_type": "code",
......@@ -243,10 +187,9 @@
" def __init__(self, dim):\n",
" self.num_tables = 4\n",
" self.hash_size = 8\n",
" self.dim = dim\n",
" self.tables = list()\n",
" for i in range(self.num_tables):\n",
" self.tables.append(Table(self.hash_size, self.dim))\n",
" self.tables.append(Table(self.hash_size, dim))\n",
" \n",
" def add(self, vecs, label):\n",
" for table in self.tables:\n",
......@@ -265,33 +208,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 10
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lsh = LSH(8)\n",
"lsh.add(randn(10,8), '1')\n",
"lsh.add(randn(10,8), '2')\n",
"lsh.describe()\n",
"#lsh.query(randn(2,8))"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "stream",
"stream": "stdout",
"text": [
"{110: [{'label': '2'}], 3: [{'label': '2'}], 36: [{'label': '1'}], 133: [{'label': '1'}, {'label': '2'}], 106: [{'label': '2'}], 151: [{'label': '2'}], 205: [{'label': '1'}], 78: [{'label': '1'}], 176: [{'label': '1'}, {'label': '1'}], 145: [{'label': '2'}], 180: [{'label': '2'}], 46: [{'label': '1'}], 87: [{'label': '2'}], 185: [{'label': '2'}], 250: [{'label': '1'}], 5: [{'label': '1'}], 74: [{'label': '2'}], 149: [{'label': '1'}]}\n",
"{163: [{'label': '1'}], 9: [{'label': '2'}], 113: [{'label': '2'}], 40: [{'label': '2'}], 201: [{'label': '1'}], 74: [{'label': '1'}], 203: [{'label': '1'}, {'label': '2'}, {'label': '2'}], 44: [{'label': '2'}], 45: [{'label': '2'}], 142: [{'label': '2'}], 48: [{'label': '2'}], 209: [{'label': '1'}], 18: [{'label': '2'}], 147: [{'label': '1'}], 84: [{'label': '1'}], 215: [{'label': '1'}], 92: [{'label': '1'}], 247: [{'label': '1'}]}\n",
"{240: [{'label': '2'}], 69: [{'label': '2'}], 71: [{'label': '2'}], 43: [{'label': '2'}], 110: [{'label': '1'}], 79: [{'label': '2'}], 144: [{'label': '1'}], 177: [{'label': '1'}], 178: [{'label': '2'}], 148: [{'label': '1'}], 86: [{'label': '1'}], 56: [{'label': '2'}], 184: [{'label': '1'}, {'label': '1'}], 84: [{'label': '2'}], 26: [{'label': '2'}], 111: [{'label': '2'}], 156: [{'label': '1'}], 94: [{'label': '1'}], 127: [{'label': '1'}]}\n",
"{65: [{'label': '2'}], 99: [{'label': '1'}, {'label': '1'}], 132: [{'label': '1'}], 214: [{'label': '1'}], 231: [{'label': '1'}], 72: [{'label': '2'}], 75: [{'label': '1'}], 140: [{'label': '1'}], 45: [{'label': '1'}], 168: [{'label': '2'}], 149: [{'label': '2'}], 150: [{'label': '1'}, {'label': '2'}], 87: [{'label': '2'}], 88: [{'label': '2'}], 164: [{'label': '2'}], 186: [{'label': '2'}], 86: [{'label': '1'}, {'label': '2'}]}\n"
]
}
],
"prompt_number": 11
"prompt_number": 8
},
{
"cell_type": "code",
......@@ -302,30 +219,23 @@
" def __init__(self, training_files):\n",
" self.frame_size = 4096\n",
" self.hop_size = 4000\n",
" self.fv_size = 1000\n",
" self.fv_size = 12\n",
" self.lsh = LSH(self.fv_size)\n",
" self.training_files = training_files\n",
" self.num_features_in_file = dict()\n",
" for f in self.training_files:\n",
" self.num_features_in_file[f] = 0\n",
" \n",
" def get_features(self, frame):\n",
" hamming_window = ess.Windowing(type='hamming')\n",
" spectrum = ess.Spectrum()\n",
" return spectrum(hamming_window(frame))[:self.fv_size]\n",
" \n",
" def train(self):\n",
" for filepath in self.training_files:\n",
" x = ess.MonoLoader(filename=filepath)()\n",
" features = [self.get_features(frame) \n",
" for frame in ess.FrameGenerator(x, frameSize=self.frame_size, hopSize=self.hop_size)]\n",
" x, fs = librosa.load(filepath)\n",
" features = librosa.feature.chromagram(x, fs, n_fft=self.frame_size, hop_length=self.hop_size).T\n",
" self.lsh.add(features, filepath)\n",
" self.num_features_in_file[filepath] += len(features)\n",
" \n",
" def query(self, filepath):\n",
" x = ess.MonoLoader(filename=filepath)()\n",
" features = [self.get_features(frame) \n",
" for frame in ess.FrameGenerator(x, frameSize=self.frame_size, hopSize=self.hop_size)]\n",
" x, fs = librosa.load(filepath)\n",
" features = librosa.feature.chromagram(x, fs, n_fft=self.frame_size, hop_length=self.hop_size).T\n",
" results = self.lsh.query(features)\n",
" print 'num results', len(results)\n",
"\n",
......@@ -342,7 +252,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 12
"prompt_number": 9
},
{
"cell_type": "markdown",
......@@ -361,7 +271,7 @@
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 13
"prompt_number": 10
},
{
"cell_type": "markdown",
......@@ -384,11 +294,11 @@
"output_type": "stream",
"stream": "stdout",
"text": [
"num results 69490\n"
"num results 82209\n"
]
}
],
"prompt_number": 27
"prompt_number": 11
},
{
"cell_type": "markdown",
......@@ -411,72 +321,63 @@
"output_type": "stream",
"stream": "stdout",
"text": [
"../train/bach_s3_3_szeryng_02.wav 16.9279279279\n",
"../train/bach_s3_3_szeryng_04.wav 14.2702702703\n",
"../train/Beethoven_vln_sonata5_Zukerman_03.wav 13.2526315789\n",
"../train/bach_s3_3_szeryng_03.wav 12.6036036036\n",
"../train/Beethoven_vln_sonata5_Oistrakh_02.wav 12.2755905512\n",
"../train/bach_s3_3_szeryng_01.wav 11.8018018018\n",
"../train/Beethoven_vln_sonata5_Zukerman_02.wav 11.7111111111\n",
"../train/Beethoven_vln_sonata5_Oistrakh_03.wav 11.6477272727\n",
"../train/Beethoven_vln_sonata5_Zukerman_04.wav 11.5739130435\n",
"../train/moonlight.wav 10.0018181818\n",
"../train/Beethoven_vln_sonata5_Oistrakh_04.wav 9.66071428571\n",
"../train/brahms_s1_1_perlman_06.wav 8.68468468468\n",
"../train/Beethoven_vln_sonata5_Francescatti_03.wav 8.59036144578\n",
"../train/brahms_s1_1_perlman_01.wav 8.45945945946\n",
"../train/Beethoven_vln_sonata5_Francescatti_02.wav 8.4375\n",
"../train/Beethoven_vln_sonata5_Francescatti_04.wav 8.30188679245\n",
"../train/Beethoven_vln_sonata5_Zukerman_01.wav 8.12264150943\n",
"../train/Beethoven_vln_sonata5_Oistrakh_01.wav 8.06140350877\n",
"../train/brandenburg3_01.wav 7.99497487437\n",
"../train/brahms_rhapsody_01.wav 7.61144578313\n",
"../train/Bach Vln Sonata1 - Fischbach 2004 - 02.wav 7.51626898048\n",
"../train/Beethoven_vln_sonata5_Zukerman_05.wav 7.45138888889\n",
"../train/Beethoven_vln_sonata5_Francescatti_01.wav 7.36538461538\n",
"../train/Bach Vln Partita3 - Milstein 1955 - 03.wav 7.24712643678\n",
"../train/brahms_rhapsody_02.wav 7.2\n",
"../train/brahms_s1_1_perlman_04.wav 6.90990990991\n",
"../train/dont_stop_believin.wav 6.76938369781\n",
"../train/konstantine.wav 6.60836501901\n",
"../train/Bach Vln Sonata1 - Milstein 1954 - 02.wav 6.54965357968\n",
"../train/brahms_s1_1_perlman_03.wav 6.43243243243\n",
"../train/brahms_s1_1_perlman_05.wav 6.10810810811\n",
"../train/lady_madonna_crop.wav 6.0625\n",
"../train/brahms_s1_1_perlman_02.wav 5.85585585586\n",
"../train/bach_s3_3_szeryng_05.wav 5.85585585586\n",
"../train/Beethoven_vln_sonata5_Francescatti_05.wav 5.81034482759\n",
"../train/bach_p3_1_perlman_04.wav 5.7027027027\n",
"../train/lady_madonna.wav 5.40662650602\n",
"../train/Bach Vln Partita3 - Fischbach 2004 - 03.wav 5.37709497207\n",
"../train/bach_p3_1_heifetz_04.wav 5.34234234234\n",
"../train/bach_p3_1_perlman_05.wav 5.27027027027\n",
"../train/Beethoven_vln_sonata5_Oistrakh_05.wav 5.1954887218\n",
"../train/bach_p3_1_heifetz_05.wav 5.10810810811\n",
"../train/bach_p3_1_perlman_06.wav 5.07207207207\n",
"../train/bach_p3_1_heifetz_01.wav 4.90990990991\n",
"../train/bach_s3_3_szeryng_06.wav 4.8018018018\n",
"../train/bach_p3_1_perlman_02.wav 4.55855855856\n",
"../train/bach_p3_1_perlman_01.wav 4.54954954955\n",
"../train/bach_p3_1_heifetz_02.wav 4.38738738739\n",
"../train/Bach Vln Partita3 - Fischbach 2004 - 01.wav 4.38580931264\n",
"../train/office_theme.wav 4.33734939759\n",
"../train/bach_p3_1_perlman_03.wav 3.99099099099\n",
"../train/Bach Vln Partita3 - Milstein 1955 - 01.wav 3.89974293059\n",
"../train/bach_p3_1_heifetz_03.wav 3.73873873874\n"
"../train/bach_s3_3_szeryng_03.wav 27.5178571429\n",
"../train/lady_madonna.wav 25.9036144578\n",
"../train/bach_s3_3_szeryng_01.wav 24.6964285714\n",
"../train/lady_madonna_crop.wav 24.6785714286\n",
"../train/bach_s3_3_szeryng_02.wav 24.0892857143\n",
"../train/bach_s3_3_szeryng_04.wav 23.1428571429\n",
"../train/brahms_s1_1_perlman_01.wav 23.0535714286\n",
"../train/Beethoven_vln_sonata5_Oistrakh_01.wav 22.7543859649\n",
"../train/konstantine.wav 22.0839694656\n",
"../train/bach_s3_3_szeryng_06.wav 21.8571428571\n",
"../train/Beethoven_vln_sonata5_Francescatti_02.wav 21.828125\n",
"../train/Beethoven_vln_sonata5_Oistrakh_03.wav 21.7272727273\n",
"../train/Beethoven_vln_sonata5_Zukerman_01.wav 21.1320754717\n",
"../train/Beethoven_vln_sonata5_Francescatti_04.wav 20.7924528302\n",
"../train/Beethoven_vln_sonata5_Francescatti_03.wav 20.7380952381\n",
"../train/brahms_s1_1_perlman_02.wav 20.6785714286\n",
"../train/Beethoven_vln_sonata5_Oistrakh_04.wav 20.5714285714\n",
"../train/Bach Vln Sonata1 - Fischbach 2004 - 02.wav 20.3939393939\n",
"../train/brahms_s1_1_perlman_06.wav 20.3571428571\n",
"../train/Beethoven_vln_sonata5_Oistrakh_02.wav 20.3492063492\n",
"../train/bach_s3_3_szeryng_05.wav 19.9464285714\n",
"../train/Beethoven_vln_sonata5_Zukerman_03.wav 19.9166666667\n",
"../train/Beethoven_vln_sonata5_Zukerman_02.wav 19.776119403\n",
"../train/Bach Vln Sonata1 - Milstein 1954 - 02.wav 19.1203703704\n",
"../train/brahms_rhapsody_01.wav 19.0361445783\n",
"../train/Beethoven_vln_sonata5_Oistrakh_05.wav 17.9090909091\n",
"../train/Beethoven_vln_sonata5_Zukerman_04.wav 17.6551724138\n",
"../train/brahms_rhapsody_02.wav 17.5058823529\n",
"../train/Beethoven_vln_sonata5_Francescatti_05.wav 17.1034482759\n",
"../train/Beethoven_vln_sonata5_Francescatti_01.wav 17.0\n",
"../train/office_theme.wav 16.9236947791\n",
"../train/Bach Vln Partita3 - Milstein 1955 - 01.wav 16.8820512821\n",
"../train/Beethoven_vln_sonata5_Zukerman_05.wav 16.3194444444\n",
"../train/brahms_s1_1_perlman_05.wav 16.0714285714\n",
"../train/bach_p3_1_heifetz_03.wav 15.8928571429\n",
"../train/Bach Vln Partita3 - Milstein 1955 - 03.wav 15.2528735632\n",
"../train/bach_p3_1_perlman_03.wav 14.8214285714\n",
"../train/moonlight.wav 14.8145454545\n",
"../train/bach_p3_1_perlman_04.wav 13.0535714286\n",
"../train/brahms_s1_1_perlman_03.wav 12.125\n",
"../train/Bach Vln Partita3 - Fischbach 2004 - 01.wav 12.0444444444\n",
"../train/brahms_s1_1_perlman_04.wav 11.6071428571\n",
"../train/bach_p3_1_heifetz_04.wav 11.2321428571\n",
"../train/Bach Vln Partita3 - Fischbach 2004 - 03.wav 10.9273743017\n",
"../train/dont_stop_believin.wav 9.9246031746\n",
"../train/bach_p3_1_heifetz_02.wav 9.89285714286\n",
"../train/brandenburg3_01.wav 8.89\n",
"../train/bach_p3_1_perlman_02.wav 7.60714285714\n",
"../train/bach_p3_1_heifetz_01.wav 7.55357142857\n",
"../train/bach_p3_1_heifetz_05.wav 7.10714285714\n",
"../train/bach_p3_1_perlman_01.wav 6.89285714286\n",
"../train/bach_p3_1_perlman_06.wav 6.55357142857\n",
"../train/bach_p3_1_perlman_05.wav 5.17857142857\n"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 15
"prompt_number": 12
}
],
"metadata": {}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment