Skip to content

Instantly share code, notes, and snippets.

@twobob
Created September 23, 2024 01:32
Show Gist options
  • Save twobob/be412b69060fb49296554623060d8792 to your computer and use it in GitHub Desktop.
Save twobob/be412b69060fb49296554623060d8792 to your computer and use it in GitHub Desktop.
creating meaningful categories based on classifications parsed from a song name
from collections import Counter
# Provided text
import os
import glob
# Define the directory path
dir_path = r"E:\Dubstep_diffusion\tracks"
# List all .wav and .mp3 files using glob with wildcard matching
text = glob.glob(dir_path + '/*.wav') + glob.glob(dir_path + '/*.mp3')
# If you want to print or use the list of files
for item in text:
#print(item)
pass
# Split the text by lines
lines = text
# Extract words and clean up
import re
all_words = set()
# Counter to store word occurrences
word_count = Counter()
for line in lines:
# Remove the file extension and the key at the end
cleaned_line = line.rsplit('.', 1)[0].rsplit('=', 1)[0]
# Remove unwanted characters
cleaned_line = re.sub(r"[()\[\],.]", "", cleaned_line) # Removes (, ), [, ], commas, and periods
# Replace underscores with spaces and convert to upper case
cleaned_line = cleaned_line.replace('_', ' ').upper()
# Replace multiple spaces with a single space
cleaned_line = re.sub(r'\s+', ' ', cleaned_line).strip()
# Split into words
words = cleaned_line.split()
# Update the Counter with the words from this line
word_count.update(words)
# Function to load stopwords from a file
def load_stopwords(filename):
with open(filename, 'r') as file:
return set(word.strip().upper() for word in file)
# Load stopwords from file
stopwords = load_stopwords('title_parse_stopwords.txt')
# Filter out words that appear more than once, are longer than one character, and do not include '\\TRACKS\\' (note the upper case)
filtered_words = {word: count for word, count in word_count.items() if count > 1
and len(word) > 2
and '\\' not in word
and word not in stopwords
and not re.match(r'^\d+BPM$', word)}
# Sort the words by their frequency in descending order
sorted_filtered_words = sorted(filtered_words.items(), key=lambda item: item[1], reverse=True)
print(sorted_filtered_words.__len__())
# Display filter words
sorted_filtered_words
# Save sorted_filtered_words to lexicon.txt
with open('lexicon.txt', 'w') as file:
for word, count in sorted_filtered_words:
file.write(f"{word}: {count}\n")
print("Lexicon has been saved to 'lexicon.txt'")
# Optional: Display the contents of the file
#print("\nContents of lexicon.txt:")
#with open('lexicon.txt', 'r') as file:
# print(file.read())
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# Convert the list of tuples to a dictionary
word_freq = dict(sorted_filtered_words)
# Create and generate a word cloud image
wordcloud = WordCloud(width=4096, max_words=600, height=2048, background_color='white').generate_from_frequencies(word_freq)
# Display the generated image
#plt.figure(figsize=(10, 5))
#plt.imshow(wordcloud, interpolation='bilinear')
#plt.axis('off')
#plt.show()
# Save the word cloud as an image file
wordcloud.to_file("wordcloud.png")
2020
ABM
AGAIN
ALL
ALWAYS
AND
APOCALYPTIC
ARE
BARRY
BENT
BOTTOM
BUILDS
CAN
CANDY
CATCH
CAR
COLD
COMPLETE
COOL
CUT
CUTS
DEEPER
DESTROYER
DETAIL
DONT
EDIT
FEAT
FEEL
FING
FIREFLIES
FOR
FOREVER
FRAGMENTS
GENERAL
GUEST
HAVE
HEAR
HEAT
HEBETUDE
HITHAT
HOME
HOW
JOHN
KIND
KNOW
LIFE
LIKE
MAKE
MIND
MORE
MORNING
MOTION
NAVSTA
NEW
NGOSI
NISTRUM
NOW
ONLY
ORB
OUR
PAX
ROD
SAID
SCARRZ
SECTION
SPAGHETTI
STUFF
SUBSYST
SUGAR
SUN
SUNCASTLE
SUNSHINE
SUPER
SUPPLY
TAKE
TEXTUREDGROOVES
THAT
THE
TILL
TIME
TWO
TWOBOB
TWOBOBS
VAST
VIBRO
WAY
WITH
WITHOUT
WHAT
WHEN
WHERE
WHO
#WORD
WORLD
YOU
YOUR
YOURE
ZENITH
#
END
CREEPY
CRYPT
DOUBLEKICK
PURE
WORD
THREEFOUR
EIGTH
PROG
THEATRICAL
MANGA
AUDIO
LISTENING
MACHINES
BAND
SPACES
BIG
BOUNCY
ROBOT
CUE
DRIVEN
TUNE
WASTED
OFFBEATS
CLOCKWORK
DRIFTING
TARANTINO
HIGHNOON
STANDOFF
STYLISED
DAWN
LOTR
RELIGIOUS
RESOLUTION
REMIXED
MURDER
BENDY
STAR
BLAZING
BLING
POETRY
BOSON
SPIN
ROOM
VIP
COLORFUL
SHINING
RAFIQI
SHORT
STATIS
SUBMARINE
DEVRAS
PLEXI
COMPRESS
DJINTRO
VIRTUSO
EVERYTHING
SONG
ANDREWS
BEEFHEART
SIGNALS
QUIVER
INDUCTIVEONE
LAWLER
NAVSTA
REMASTERED
NUMBERSINTHEDREAM
SPAGHETTIWESTERN
FORWARD
MAD
WAITING
TIRED
@twobob
Copy link
Author

twobob commented Sep 23, 2024

results
511 polytonic classes with frequency extracted from 472 wav titles containing tags

  1. NOVOCAL: 208
  2. BEAT: 188
  3. VOCAL: 186
  4. BASS: 184
  5. DANCE: 171
  6. FEMALE: 160
  7. UNDERGROUND: 160
  8. CLUB: 159
  9. HOUSE: 158
  10. SYNTH: 116
  11. 4TTF: 84
  12. PERCUSSION: 83
  13. SOUNDDESIGN: 79
  14. INSTRUMENTAL: 73
  15. DARK: 70
  16. DRUM: 69
  17. GUITAR: 67
  18. EXPERIMENTAL: 66
  19. DRUMS: 64
  20. GROOVE: 63
  21. MALE: 62
  22. KICK: 56
  23. AMBIENT: 54
  24. SUB: 54
  25. ORGANIC: 53
  26. BUILD: 51
  27. STRING: 47
  28. PIANO: 46
  29. TECHNO: 45
  30. BREAKBEAT: 42
  31. STRINGS: 38
  32. MIX: 37
  33. CATCHY: 37
  34. CHOIR: 36
  35. ORCHESTRAL: 35
  36. ANTHEM: 34
  37. BREAKS: 34
  38. DROP: 33
  39. EPIC: 33
  40. BRASS: 32
  41. BACKTOMINE: 30
  42. PRODUCTION: 30
  43. FUNKY: 29
  44. VOCALISATION: 29
  45. BREAK: 28
  46. INDUSTRIAL: 28
  47. FILTER: 28
  48. CHILL: 27
  49. POWERFUL: 25
  50. DELAY: 25
  51. HARMONY: 23
  52. DISTORTION: 23
  53. GROOVY: 23
  54. KAY: 22
  55. SULTRY: 22
  56. SEXY: 22
  57. DRIVING: 22
  58. MAINROOM: 22
  59. ACOUSTIC: 21
  60. EDM: 21
  61. RISE: 21
  62. SPECIAL: 21
  63. PARTY: 21
  64. TB303: 20
  65. FLUTE: 20
  66. MOTIF: 20
  67. COUNTERPOINT: 20
  68. SOLO: 19
  69. SLOW: 19
  70. MELODIC: 19
  71. FILMSCORE: 19
  72. INTENSE: 19
  73. OFFBEAT: 18
  74. IBIZA: 18
  75. ATEMPORAL: 18
  76. VIOLIN: 18
  77. PERCUSSIVE: 17
  78. ELECTRO: 17
  79. FUSION: 17
  80. BEAUTIFUL: 16
  81. HARD: 16
  82. JAZZ: 15
  83. DRUMROLL: 15
  84. THEMETUNE: 15
  85. WARM: 15
  86. SUMMER: 15
  87. SOUNDESIGN: 15
  88. KEYS: 15
  89. ATMOSPHERIC: 14
  90. FILL: 14
  91. BRIGHT: 14
  92. ROCK: 14
  93. CONGA: 14
  94. DANCEFLOOR: 14
  95. AFTERPARTY: 14
  96. ELECTRONIC: 13
  97. HAPPY: 13
  98. SLOWBURN: 13
  99. SWEEP: 13
  100. OVERTONE: 13
  101. RISER: 12
  102. CLASSIC: 12
  103. ABSTRACT: 12
  104. POP: 11
  105. CLASSICAL: 11
  106. SYNTHWAVE: 11
  107. HIPHOP: 11
  108. SPACE: 11
  109. HITS: 11
  110. DEEP: 11
  111. BANG: 11
  112. ACID: 10
  113. HARMONIC: 10
  114. SOUNDSCAPE: 10
  115. SOUNDTRACK: 10
  116. TRAP: 10
  117. CADENCE: 10
  118. APHEX: 10
  119. BEND: 10
  120. LEFTFIELD: 10
  121. EMOTIONAL: 10
  122. ARPEGGIO: 10
  123. CHART: 10
  124. LEAD: 9
  125. PROGRESSIVE: 9
  126. WIND: 9
  127. BRUSH: 9
  128. FUNK: 9
  129. SOUND: 9
  130. DESIGN: 9
  131. SOULFUL: 9
  132. REVERB: 9
  133. CHROMATIC: 9
  134. MOODY: 9
  135. ATONAL: 9
  136. CELLO: 8
  137. LIVE: 8
  138. SLAP: 8
  139. HOT: 8
  140. TENSION: 8
  141. HAUNTING: 8
  142. PHASE: 8
  143. DUB: 8
  144. PLAYFUL: 8
  145. FOLK: 8
  146. XYLOPHONE: 8
  147. GLITCH: 8
  148. 303: 8
  149. IMPROV: 8
  150. ENERGY: 8
  151. AUGMENT: 8
  152. DIMINISH: 8
  153. CRUSH: 8
  154. EASYLISTENING: 8
  155. RIFF: 7
  156. CYMBAL: 7
  157. PRETTY: 7
  158. SWING: 7
  159. NOISE: 7
  160. CALM: 7
  161. SKANK: 7
  162. BRUSHES: 7
  163. ACCELERANDO: 7
  164. ORGAN: 7
  165. WONK: 7
  166. DISTORT: 7
  167. TRANCE: 7
  168. MADNESS: 7
  169. SAXOPHONE: 7
  170. CRESCENDO: 7
  171. MASTER: 7
  172. BOUNCE: 7
  173. VIRTUOSO: 7
  174. DYNAMIC: 6
  175. LICK: 6
  176. SNARE: 6
  177. SPLASH: 6
  178. RELAXING: 6
  179. SYNTHS: 6
  180. HUGE: 6
  181. MINIMAL: 6
  182. ROLLS: 6
  183. TRIPHOP: 6
  184. WINDS: 6
  185. DRAMATIC: 6
  186. LIGHT: 6
  187. SHRED: 6
  188. MYSTERIOUS: 6
  189. ROLLING: 6
  190. MAJESTIC: 6
  191. CHANT: 6
  192. VOCALISATIONS: 6
  193. EMOTION: 6
  194. DISTORTED: 6
  195. SWELL: 6
  196. BREAKDOWN: 6
  197. SPOOKY: 6
  198. DRUMANDBASS: 6
  199. HARP: 6
  200. FANTASY: 6
  201. BACKBEAT: 6
  202. GLISSANDO: 6
  203. TECHNICAL: 6
  204. REMAX: 6
  205. DRIVE: 6
  206. SPOKEN: 5
  207. GIRL: 5
  208. BIRKENSHAW: 5
  209. BLUES: 5
  210. HORROR: 5
  211. THRASH: 5
  212. CRASH: 5
  213. HALFTIME: 5
  214. RIDE: 5
  215. TECH: 5
  216. MASSIVE: 5
  217. DELICATE: 5
  218. SWEET: 5
  219. SCARY: 5
  220. STICKS: 5
  221. BUILDUP: 5
  222. RAVE: 5
  223. PEACE: 5
  224. FIGHTING: 5
  225. DARKNESS: 5
  226. SIDECHAIN: 5
  227. STABS: 5
  228. EARWORM: 5
  229. GENTLE: 5
  230. PENSIVE: 5
  231. MODERN: 5
  232. SOARING: 5
  233. MASSIVEATTACK: 5
  234. PULSING: 5
  235. ELECTRIC: 5
  236. AFRICAN: 5
  237. POWER: 5
  238. MANIC: 5
  239. INSANE: 5
  240. MYSTERY: 5
  241. DOUBLEBASS: 5
  242. BELLS: 5
  243. JAZZY: 5
  244. SANFRAN: 5
  245. INDIE: 5
  246. CLIMAX: 5
  247. HUMP: 5
  248. DRUMLESS: 5
  249. REMIX: 4
  250. SAMBA: 4
  251. ROLL: 4
  252. SLEEP: 4
  253. TOM: 4
  254. ORIENTAL: 4
  255. ETHEREAL: 4
  256. PSYCHEDELIC: 4
  257. KIT: 4
  258. DOWN: 4
  259. MOOD: 4
  260. DIRTY: 4
  261. MELLOW: 4
  262. ALARM: 4
  263. VIOLINS: 4
  264. MAGIC: 4
  265. EVOLVING: 4
  266. AIRY: 4
  267. DISSONANCE: 4
  268. THEME: 4
  269. DIMINISHED: 4
  270. SPANISH: 4
  271. FIGHT: 4
  272. FLAMENCO: 4
  273. MEXICAN: 4
  274. MAGICAL: 4
  275. DETECTIVE: 4
  276. HEARTBREAK: 4
  277. CONTEMPORARY: 4
  278. FILM: 4
  279. DRONE: 4
  280. GAMEOFTHRONES: 4
  281. WHITE: 4
  282. UPLIFTING: 4
  283. UPLIFT: 4
  284. REVERSE: 4
  285. FILLS: 4
  286. BANGER: 4
  287. BEACH: 4
  288. CHILLOUT: 4
  289. INTIMATE: 4
  290. DANGER: 4
  291. JOY: 4
  292. BEATLESS: 4
  293. PRECUSSION: 4
  294. SCIFI: 4
  295. WHOLETONE: 4
  296. GAMEMUSIC: 4
  297. DESTRUCTION: 4
  298. TRANSPOSE: 4
  299. DISORIENT: 4
  300. GLOCK: 4
  301. UPRIGHT: 4
  302. CELTIC: 4
  303. PERSONAL: 4
  304. INTRICATE: 4
  305. PITCHDROP: 4
  306. REBIRTH: 4
  307. OUTSIDE: 4
  308. LOVE: 4
  309. SLAM: 4
  310. CYMBALS: 4
  311. STEELPAN: 4
  312. VOCALHIT: 3
  313. SINISTER: 3
  314. BACKGROUND: 3
  315. MILITARY: 3
  316. STREET: 3
  317. SYNTHPOP: 3
  318. NIGHT: 3
  319. TRIPLET: 3
  320. ATMOS: 3
  321. GOOD: 3
  322. ELECTRONICA: 3
  323. MARCH: 3
  324. GROOVES: 3
  325. CHORAL: 3
  326. PAD: 3
  327. MINOR: 3
  328. MOTIFS: 3
  329. GLASTO: 3
  330. MUSICBOX: 3
  331. TRIBAL: 3
  332. CONTRAPUNTAL: 3
  333. BEATS: 3
  334. MESMERIC: 3
  335. SUPERSTAR: 3
  336. BUSY: 3
  337. ALIEN: 3
  338. HEAVY: 3
  339. SAD: 3
  340. BATTLE: 3
  341. MEDIEVAL: 3
  342. CEREMONY: 3
  343. URHU: 3
  344. CHINESE: 3
  345. BRAAAM: 3
  346. BIRDSONG: 3
  347. STICK: 3
  348. SWEEPS: 3
  349. RUBATO: 3
  350. ORIGINAL: 3
  351. LUSH: 3
  352. EXPLOSIVE: 3
  353. SUSPENSE: 3
  354. QUIRKY: 3
  355. BOND: 3
  356. PLACE: 3
  357. PACHA: 3
  358. GOLDEN: 3
  359. MIDNIGHT: 3
  360. EVIL: 3
  361. RADIO: 3
  362. RESONATOR: 3
  363. AMEN: 3
  364. CABARET: 3
  365. CONCERT: 3
  366. WINK: 3
  367. SPACEDRUM: 3
  368. HORNS: 3
  369. DEMONIC: 3
  370. FREQUENCIES: 3
  371. DOUBLEUP: 3
  372. CRAZY: 3
  373. EXTREME: 3
  374. AUTOTUNE: 3
  375. FALLS: 3
  376. ROUSING: 3
  377. LANGUID: 3
  378. COMPOSITION: 3
  379. THUMP: 3
  380. UPBEAT: 3
  381. DETUNE: 3
  382. HANDDRUM: 3
  383. RAP: 3
  384. JACKING: 3
  385. BELL: 3
  386. SIMPLE: 3
  387. OLDSCHOOL: 3
  388. VOICE: 2
  389. OSTINATO: 2
  390. SITAR: 2
  391. ALLEGRO: 2
  392. ACAPELLA: 2
  393. POLYRHYTHM: 2
  394. MEDITATION: 2
  395. CHIPTUNE: 2
  396. DISCO: 2
  397. TAIKO: 2
  398. JAPAN: 2
  399. FEMALEVOCAL: 2
  400. TRANSIENT: 2
  401. ORGANICDRUMS: 2
  402. ECHO: 2
  403. DUBSTEP: 2
  404. WESTERN: 2
  405. MICROPERCUSSION: 2
  406. SADNESS: 2
  407. MISERY: 2
  408. WONKY: 2
  409. ORCHESTRA: 2
  410. 3OVER4: 2
  411. ROMANTIC: 2
  412. MEDITATIVE: 2
  413. TRIUMPHANT: 2
  414. SEASIDE: 2
  415. 8BIT: 2
  416. ASIAN: 2
  417. SLOWER: 2
  418. RELENTLESS: 2
  419. SPARSE: 2
  420. SURREAL: 2
  421. CINEMATIC: 2
  422. HOOK: 2
  423. GRAND: 2
  424. OMINOUS: 2
  425. KEYBOARD: 2
  426. CASTANET: 2
  427. HARMONICS: 2
  428. BOUNCING: 2
  429. BREATHING: 2
  430. 808: 2
  431. MINIMALISM: 2
  432. DRUMSOLO: 2
  433. LOVELY: 2
  434. ANCIENT: 2
  435. TOUCH: 2
  436. SHAKER: 2
  437. TOMS: 2
  438. TROPICAL: 2
  439. FILMMUSIC: 2
  440. THOUGHTFUL: 2
  441. DRUMROLLS: 2
  442. CHASE: 2
  443. MORSE: 2
  444. RUNNING: 2
  445. NIGHTMARE: 2
  446. HEARTWARMING: 2
  447. SYMPHONIC: 2
  448. MASTERPIECE: 2
  449. MENACING: 2
  450. SYMPHONY: 2
  451. CRAZED: 2
  452. BLUESY: 2
  453. DRAMA: 2
  454. HIT: 2
  455. MAJORMINOR: 2
  456. SINE: 2
  457. FALLING: 2
  458. BLUE: 2
  459. EFFECTS: 2
  460. PADS: 2
  461. JAM: 2
  462. EVOLVE: 2
  463. SCREAMS: 2
  464. BUMPING: 2
  465. HUMPING: 2
  466. RETRO: 2
  467. CAFEDELMAR: 2
  468. SLEAZY: 2
  469. FUTURE: 2
  470. CONCRETE: 2
  471. FUN: 2
  472. BITCRUSH: 2
  473. FEEDBACK: 2
  474. BROKEN: 2
  475. FIRE: 2
  476. FREEJAZZ: 2
  477. MUSIC: 2
  478. SOUL: 2
  479. PICKING: 2
  480. FILTERED: 2
  481. JOURNEY: 2
  482. BURNING: 2
  483. DIMINSHED: 2
  484. LAIDBACK: 2
  485. HANDCLAP: 2
  486. ANALOG: 2
  487. HORN: 2
  488. PROFOUND: 2
  489. RALLENTANDO: 2
  490. MODULAR: 2
  491. RELAX: 2
  492. HARMONICA: 2
  493. TRADITIONAL: 2
  494. INDIAN: 2
  495. BALLAD: 2
  496. BEATBOX: 2
  497. MOUTH: 2
  498. FAST: 2
  499. DYSTOPIAN: 2
  500. BYZANTINE: 2
  501. ARABIC: 2
  502. GYPSY: 2
  503. BUILDING: 2
  504. STAMP: 2
  505. INSISTENT: 2
  506. SYMPATHY: 2
  507. DISCOFALL: 2
  508. THUNDER: 2
  509. WEATHER: 2
  510. ATTITUDE: 2
  511. FALCETTO: 2

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment