Last active
June 19, 2021 20:10
-
-
Save westnordost/b43d2e5100e913cdd2eb8d2d2eb2d296 to your computer and use it in GitHub Desktop.
OpeningHoursParser test with real data
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import ch.poole.openinghoursparser.* | |
import java.io.ByteArrayInputStream | |
import java.net.HttpURLConnection | |
import java.net.URL | |
import java.nio.charset.StandardCharsets | |
var total = 0 | |
var parsed = 0 | |
val url = URL("https://sophox.org/sparql?query=select%20%3Fopening_hours%20where%20%7B%3Felement%20osmt%3Aopening_hours%20%3Fopening_hours%7D%20") | |
val connection = url.openConnection() as HttpURLConnection | |
try { | |
connection.setRequestProperty("Accept", "text/csv") | |
connection.setRequestProperty("User-Agent", "OpeningHoursParser real data parsing test") | |
connection.setRequestProperty("charset", StandardCharsets.UTF_8.name()) | |
connection.doOutput = true | |
connection.inputStream.bufferedReader().useLines { lines -> | |
for (line in lines) { | |
total++ | |
// show a little progress bar because it takes so long | |
if (total % 100000 == 0) print(".") | |
var oh = line | |
// CSV output wraps string in "..." if it contains a , | |
if (oh.contains(',') && oh.startsWith('"')) { | |
oh = oh.trim { it == '"' }.replace("\"\"", "\"") | |
} | |
var ohInputStream = ByteArrayInputStream(oh.toByteArray()) | |
try { | |
val parser = OpeningHoursParser(ohInputStream) | |
parser.rules(false) | |
parsed++ | |
} catch (e: ParseException) { | |
// aaw :-( | |
} | |
} | |
} | |
} finally { | |
connection.disconnect() | |
} | |
println() | |
println("$total opening hours") | |
if (parsed > 0) { | |
println("Of these ${100.0 * parsed / total}% are supported.") | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment