Created
March 22, 2016 17:45
-
-
Save mathias-brandewinder/0285af271f0d78e378ef to your computer and use it in GitHub Desktop.
Word2Vec experiment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#I "../packages/" | |
#r @"FSharp.Data/lib/net40/FSharp.Data.dll" | |
#r @"StemmersNet/lib/net20/StemmersNet.dll" | |
#r @"FSharp.Collections.ParallelSeq/lib/net40/FSharp.Collections.ParallelSeq.dll" | |
#load "Utilities.fs" | |
open FSharp.Data | |
[<Literal>] | |
let trainPath = @"../data/train.csv" | |
[<Literal>] | |
let testPath = @"../data/test.csv" | |
[<Literal>] | |
let attributesPath = @"../data/attributes.csv" | |
[<Literal>] | |
let productsPath = @"..\data\product_descriptions.csv" | |
[<Literal>] | |
let submissionPath = @"../data/" | |
type Train = CsvProvider<trainPath,Schema=",,,,float"> | |
type Test = CsvProvider<testPath> | |
let sample = seq { | |
yield! Train.GetSample().Rows |> Seq.map (fun x -> x.Product_title) | |
yield! Test.GetSample().Rows |> Seq.map (fun x -> x.Product_title) | |
} | |
#load "Utilities.fs" | |
open HomeDepot.Utilities | |
let titles = | |
sample | |
|> Seq.distinct | |
|> Seq.map (preprocess) | |
|> Seq.toArray | |
let path = @"C:\users\mathias brandewinder\desktop\titles.txt" | |
let file = System.IO.File.WriteAllLines(path,titles) | |
#r @"C:\Users\Mathias Brandewinder\Documents\GitHub\Word2Vec.Net\Word2Vec.Net\bin\Release\Word2Vec.Net.dll" | |
open Word2Vec | |
let output = @"C:\users\mathias brandewinder\desktop\output.txt" | |
let vocab = @"C:\users\mathias brandewinder\desktop\vocab.txt" | |
let builder = | |
Word2Vec.Net.Word2VecBuilder | |
.Create() | |
.WithTrainFile(path) | |
.WithOutputFile(output) | |
.WithBinary(1) | |
// .WithCBow(1) | |
.WithSize(50) | |
.WithSaveVocubFile(vocab) | |
.WithWindow(5) | |
.Build() | |
builder.TrainModel() | |
let distance = Word2Vec.Net.Distance(output) | |
distance.Search("shower") | |
let analogy = Word2Vec.Net.WordAnalogy(output) | |
analogy.Search("metal stakes") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment