Created
July 27, 2022 12:52
-
-
Save mikisvaz/73f0300c50978b569ba01214c5e2908a to your computer and use it in GitHub Desktop.
How signs are calculated in rbbt decoupler sandbox
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
### This loop is run for each pair | |
# sign_evidence_pmids will hold the list of PMIDS support each sign: +, | |
# -, and ~ (unknown or not specified) | |
sign_evidence_pmids = {"+" => [], "-" => [], "~" => []} | |
Misc.zip_fields(values.reverse).each do |e,s| | |
s = "" if s.nil? | |
sign_evidence_pmids[s] = e.split(";") | |
end | |
# NOTE: The lists of PMIDs for each sign might include repetitions, so | |
# if two databases reviewed the same PMID and reported it, it will be | |
# counted twice. Perhaps we can change that | |
# Here we just count the pairs in which we have the same PMID used as | |
# evidence for contradicting signs (the & sign means intersecting two | |
# lists). Not used in scoring. | |
if (sign_evidence_pmids["-"] & sign_evidence_pmids["+"]).any? | |
controversies += 1 | |
end | |
# Here I take out all PMIDs that indicate some sign from the list of ~ | |
# because maybe two databases report the same but one didn't care to | |
# indicate the sign. In that case I assume that the PMID does indicate | |
# the sign and ignore the entry that doesn't reflect that | |
sign_evidence_pmids["~"] = sign_evidence_pmids["~"] - (sign_evidence_pmids["+"] + sign_evidence_pmids["-"]) | |
# Here we turn the lists of PMIDs into counts, which is basically just | |
# the length of the list in each sign | |
sign_evidence = Hash.new(0) | |
sign_evidence_pmids.each do |s,l| | |
sign_evidence[s] = l.length | |
end | |
# Just use some placeholder variables here for clarity. They are turn | |
# to continuous variables (float; .to_f) to avoid rounding up when I do operations | |
# later with them | |
positive_evidence = sign_evidence["+"].to_f | |
negative_evidence = sign_evidence["-"].to_f | |
signed_evidence = Misc.sum(sign_evidence.values_at("+", "-")).to_f | |
non_negative_evidence = Misc.sum(sign_evidence.values_at("+", "~")).to_f | |
non_positive_evidence = Misc.sum(sign_evidence.values_at("-", "~")).to_f | |
# This is the total number of articles | |
total_evidence = Misc.sum(sign_evidence.values).to_f | |
# Here we decide if the negative evidence is sufficient to deem the | |
# interaction negative: more negative articles than positive, while | |
# also the negative evidence divided by positive + unkown must be more than | |
# <negative_evidence_proportion>, one of the parameters. Come to think | |
# of it the denominator should probably be total_evidence and not | |
# non_positive_evidence, otherwise it's not really a proportion. I | |
# haven't change this yet | |
negative = negative_evidence > positive_evidence && negative_evidence / non_positive_evidence > negative_evidence_proportion | |
# As a final rule, the pair is negative if the proportion of negative | |
# evidence divided by all the signed_evidence (i.e. ignoring the | |
# unknown) is larger than <strict_negative>, another parameter | |
negative = true if (negative_evidence / signed_evidence) > strict_negative | |
# Here we define the sign_weight as the proportion of evidence | |
# supporting the chosen sign, which is positive unless it we deemed | |
# negative. This means that a totally unkown signed pair will be | |
# positive by default, but the sign_weight will be 0 | |
if negative | |
sign = "-" | |
sign_weight = negative_evidence / total_evidence | |
else | |
sign = "+" | |
sign_weight = positive_evidence / total_evidence | |
end | |
# Here we compute the support_weight which measure how close we get to | |
# having <support_evidence_max> (another parameter) PMIDs not | |
# contradicting our sign. So if support_evidence_max is 10 and we have | |
# 2 positive, 1 negative and 2 unkown evidences than that would be a | |
# positive relationship with 4 non_contradictory_evidences, and the | |
# support_weight will be 4 / 10 | |
non_contradictory_evidence = Misc.sum(sign_evidence.values_at(*[sign, "~"].uniq)) | |
support_weight = [1, non_contradictory_evidence / support_evidence_max].min | |
# Here we calculate the final weight by combining sign_weight and | |
# support_weight mixed in according to the variable | |
# <sign_support_balance> | |
weight = (sign_weight * sign_support_balance) + (support_weight * (1-sign_support_balance)) | |
# The final weight is made negative when the sign is negative because | |
# this is how it is encoded in decopler | |
weight = - weight if sign == "-" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment