-
-
Save mbadolato/8253004 to your computer and use it in GitHub Desktop.
<?php | |
/* | |
* (c) Mark Badolato <[email protected]> | |
* | |
* This content is released under the {@link http://www.opensource.org/licenses/MIT MIT License.} | |
*/ | |
namespace Bado; | |
class WilsonConfidenceIntervalCalculator | |
{ | |
/** | |
* Computed value for confidence (z) | |
* | |
* These values were computed using Ruby's Statistics2.pnormaldist function | |
* 1.959964 = 95.0% confidence | |
* 2.241403 = 97.5% confidence | |
*/ | |
private const CONFIDENCE = 2.241403; | |
public function getScore(int $positiveVotes, int $totalVotes, float $confidence = self::CONFIDENCE) : float | |
{ | |
return (float) $totalVotes ? $this->lowerBound($positiveVotes, $totalVotes, $confidence) : 0; | |
} | |
private function lowerBound(int $positiveVotes, int $totalVotes, float $confidence) : float | |
{ | |
$phat = 1.0 * $positiveVotes / $totalVotes; | |
$numerator = $this->calculationNumerator($totalVotes, $confidence, $phat); | |
$denominator = $this->calculationDenominator($totalVotes, $confidence); | |
return $numerator / $denominator; | |
} | |
private function calculationDenominator(int $total, float $z) : float | |
{ | |
return 1 + $z * $z / $total; | |
} | |
private function calculationNumerator(int $total, float $z, float $phat) : float | |
{ | |
return $phat + $z * $z / (2 * $total) - $z * sqrt(($phat * (1 - $phat) + $z * $z / (4 * $total)) / $total); | |
} | |
} |
<?php | |
/* | |
* (c) Mark Badolato <[email protected]> | |
* | |
* This content is released under the {@link http://www.opensource.org/licenses/MIT MIT License.} | |
*/ | |
namespace Bado\Tests\ScoreCalculator; | |
use Bado\WilsonConfidenceIntervalCalculator; | |
use PHPUnit\Framework\TestCase; | |
class WilsonConfidenceIntervalCalculatorTest extends TestCase | |
{ | |
/** | |
* @test | |
* @dataProvider ratingsProvider | |
* | |
* @param int $positiveVotes | |
* @param int $totalVotes | |
* @param float $expectedScore | |
*/ | |
public function it_can_calculate_scores_properly(int $positiveVotes, int $totalVotes, float $expectedScore) : void | |
{ | |
$calculator = new WilsonConfidenceIntervalCalculator(); | |
$calculatedScore = $calculator->getScore($positiveVotes, $totalVotes); | |
self::assertEquals($expectedScore, $calculatedScore, '', 0.000001); | |
} | |
public function ratingsProvider() : array | |
{ | |
// Pre-calculated score results using the known Ruby implementation | |
// Array format is [$positiveVotes, $totalVotes, $expectedScore] | |
return [ | |
[0, 0, 0], | |
[0, 10, 0], | |
[1, 2, 0.077136], | |
[10, 10, 0.665607], | |
[10, 20, 0.275967], | |
[52, 76, 0.556480], | |
]; | |
} | |
} |
@flyingbaba This class is not made for that use case. Please read the page listed in the class for an explanation.
@mbadolato well it's written at the end that yelp uses it so it must be usable and as I saw on another ruby version it's used on a 5 stars rating system!
https://github.com/instacart/wilson_score#star-ratings
#5 Stars Ratings
average_rating = 4.5
total_ratings = 2
rating_range = 1..5 # 1 to 5 stars
WilsonScore.rating_lower_bound(average_rating, total_ratings, rating_range)
and check this as well
http://www.goproblems.com/test/wilson/wilson.php?v1=0&v2=0&v3=3&v4=0&v5=0
@capensisma It's quite possible that they modified it. The original article linked in the source talks about adding other things to it. In may case, I was adding metrics like downloads, making $n of them count as a positive vote etc. The wilson score star ratings you linked too does look like it was heavily modified to take it into account. I'm personally not interested in porting that over at this time, but if you port it over, I'm sure some people would find it useful! 😄
@capensisma @mbadolato From a quick look, it appears as if they've added the ability to account for ranges via the following method:
Get average, subtract the minimum vote (1 for a 5 star rating system), multiply that by the total number of votes, divide that by the range interval (max - min, so a 1-5 star rating would have an interval of 4), and use that final result for the number of "upvotes".
So, if we posit an example of 10 votes, half 1's and half 5's, then that's basically equivalent to 5 upvotes and 5 downvotes. The average in such a spread would be 3 stars. Using their math: (3 -1) * 10 / 4 = 5 upvotes, which means 5 downvotes.
If we now posit an example of 10 votes, half 3's and half 5's, average is 4. Using their math: (4 - 1) * 10 / 4 = 7.5 upvotes, which would mean 2.5 downvotes.
Basically, 1 star in their system is a full downvote, 5 stars is a full upvote. Any other rating is partially a downvote and partially an upvote, only instead of counting all the votes up individually, they rely on figuring out the final spread based on the average rating. This works fine if your average value is accurate, but causes there to be variance if your average is actually a rounded value. So, make sure you use enough significant digits on your average number for it to be accurate. None of this rounding to 2 digits stuff.
To apply this to this PHP library, you could add something like this:
function getRatingScore( $average, $totalVotes, $minRange = 1, $maxRange = 5, $confidence = self::CONFIDENCE) {
$positiveVotes = ( ( $average - $minRange ) * $totalVotes ) / ( $maxRange - $minRange );
return $this->getScore($positiveVotes, $totalVotes, $confidence);
}
Whether this is mathematically sound or not, no idea. Just basing it on that Ruby library listed above.
Updates to PHP 7
How to make 5 stars based rating and calculate it using this class ?