Skip to content

Instantly share code, notes, and snippets.

@agricolamz
Created October 21, 2024 14:30
Show Gist options
  • Save agricolamz/770463f4ba5ca34c07c1b30cbe9bafc3 to your computer and use it in GitHub Desktop.
Save agricolamz/770463f4ba5ca34c07c1b30cbe9bafc3 to your computer and use it in GitHub Desktop.
library(tidyverse)
library(tidytext)
avar <- read_csv("avar_dict.csv")
avar |>
select(lemma) |>
unnest_characters(input = "lemma", output = "chars") |>
distinct(chars) |>
arrange(chars) |>
pull(chars)
avar |>
mutate(borrowing = "avar",
lemma = tolower(lemma),
lemma = str_remove_all(lemma, "[~-]"),
lemma = str_remove_all(lemma, "/"),
lemma = str_replace_all(lemma, "á", "а"),
lemma = str_replace_all(lemma, "é", "е"),
lemma = str_replace_all(lemma, "ó", "о"),
lemma = str_replace_all(lemma, "ý", "у"),
lemma = str_replace_all(lemma, "ѝ", "и"),
lemma = str_replace_all(lemma, "я", "йа"),
lemma = str_replace_all(lemma, "я́", "йа"),
lemma = str_replace_all(lemma, "ё", "йо"),
lemma = str_replace_all(lemma, "i", "I"),
lemma = str_replace_all(lemma, "і", "I")) |>
select(lemma, meaning_ru, borrowing) ->
avar_word_list
andi <- read_csv("andic_dicts.csv")
andi |>
mutate(lemma = tolower(lemma),
lemma = str_remove_all(lemma, "[̄̀́̅~'-<=|]"),
lemma = str_remove_all(lemma, "/"),
lemma = str_replace_all(lemma, "á", "а"),
lemma = str_replace_all(lemma, "é", "е"),
lemma = str_replace_all(lemma, "ó", "о"),
lemma = str_replace_all(lemma, "ý", "у"),
lemma = str_replace_all(lemma, "ѝ", "и"),
lemma = str_replace_all(lemma, "я", "йа"),
lemma = str_replace_all(lemma, "я́", "йа"),
lemma = str_replace_all(lemma, "ё", "йо"),
lemma = str_replace_all(lemma, "i", "I"),
lemma = str_replace_all(lemma, "і", "I")) |>
select(lemma, morphology, meaning_ru, glottocode) |>
left_join(avar_word_list,
relationship = "many-to-many") |>
filter(!is.na(borrowing),
glottocode %in% c("cham1309", "andi1255")) |>
select(-borrowing) |>
writexl::write_xlsx("4jesse.xlsx")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment