library(overpass)
library(sf)
settlements <- 'area[name~"Адыгея|Кабардино-Балкария|Карачаево-Черкесия|Ставропольский край|Краснодарский край"];
(node["place"~"city|village|town|hamlet|isolated_dwelling"](area););
out;'
query_result <- overpass_query(settlements)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import wave | |
from deepspeech import Model, version | |
import numpy as np | |
import sys | |
import subprocess | |
from pipes import quote | |
import shlex | |
import pympi | |
# This code is essentially taken from DeepSpeech native_client repository |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This is an example from Niko Partanen how to use Copius transliterator from Python. | |
# The idea is to replicate the form that the website uses, and send that information | |
# there directly. For the website it essentially looks like someone would be just using | |
# it normally. | |
import requests | |
from lxml import html | |
def run_copius_transliterator(language, text, direction): | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyverse) | |
library(lingtypology) | |
kpv <- read_csv("https://raw.githubusercontent.com/langdoc/kpv-geography/master/kpv.csv") | |
map.feature(languages = kpv$language, | |
features = kpv$dialect, | |
label = kpv$settlement, | |
latitude = kpv$latitude, | |
longitude = kpv$longitude) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xml.etree.cElementTree as ET | |
from PIL import Image | |
import numpy as np | |
from google.protobuf.json_format import MessageToJson | |
import matplotlib.pyplot as plt | |
import matplotlib.patches as patches | |
from pathlib import Path | |
import sklearn as sk |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
import xml.etree.cElementTree as ET | |
import re | |
# Tieto noista ylä- ja alaindekseistä on merkitty tällaisiin elementteihin: | |
# textStyle {offset:13; length:1;superscript:true;} | |
# Niistä pitää poimia milloin niitä on, j amiten pitkiä ne ovat | |
def get_offset_info(offsets): | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
lang=$1 | |
export GTLANG_$lang=/mnt/data/trunk/langs/$lang | |
cd /mnt/data/trunk/langs/$lang | |
make clean | |
./autogen.sh | |
./configure --with-hfst --without-xfst --enable-tokenisers --enable-reversed-intersect --enable-alignment --enable-apertium --enable-dicts --enable-morpher | |
make | |
make install |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### This is Niko Partanen's example R script that splits the National | |
### Library of Finland's dataset OCR Ground Truth Pages (Swedish Fraktur) | |
### into line-image–text-file pairs that can be used with training | |
### models with Tesseract. Same approach works easily also with Ocropy | |
### Data source: | |
### https://digi.kansalliskirjasto.fi/opendata | |
library(tidyverse) | |
library(xml2) | |
library(measurements) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
# http://stackoverflow.com/questions/5574702/how-to-print-to-stderr-in-python | |
from __future__ import print_function | |
import sys | |
def eprint(*args, **kwargs): | |
print(*args, file=sys.stderr, **kwargs) | |
import errno |
NewerOlder