Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(491)

Unified Diff: tools/statistics-for-json.R

Issue 1659043002: Add an R script that does some statistical tests on benchmark results (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: Format to 80 chars per lines Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: tools/statistics-for-json.R
diff --git a/tools/statistics-for-json.R b/tools/statistics-for-json.R
new file mode 100644
index 0000000000000000000000000000000000000000..254c479e84ae65e524cd33a0928d5e61a0348bc4
--- /dev/null
+++ b/tools/statistics-for-json.R
@@ -0,0 +1,124 @@
+# Copyright 2016 the V8 project authors. All rights reserved.
Michael Achenbach 2016/02/02 15:53:10 Could you add a new, short copyright header? E.g.
sigurds 2016/02/02 16:02:02 Done.
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Google Inc. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# Do statistical tests on benchmark results
+
+suppressMessages(library("rjson")) # for fromJson
+suppressMessages(library("R.utils")) # for printf
+suppressMessages(library("ggplot2")) # for plotting
+suppressMessages(library("data.table")) # less broken than data.frame
+
+# Clear all variables from environment
+rm(list=ls())
+
+args <- commandArgs(TRUE)
+if (length(args) != 3) {
+ printf(paste("usage: Rscript %%this_script patched-results.json",
+ "unpatched-results.json\n"))
+} else {
+ patch <- fromJSON(file=args[1])
+ nopatch <- fromJSON(file=args[2])
+ outputPath <- args[3]
+ df <- data.table(L = numeric(), R = numeric(), E = numeric(),
+ p.value = numeric(), yL = character(),
+ p.value.sig = logical())
+
+ for (i in seq(1, length(patch$traces))) {
+ testName <- patch$traces[[i]]$graphs[[2]]
+ printf("%s\n", testName)
+
+ nopatch_res <- as.integer(nopatch$traces[[i]]$results)
+ patch_res <- as.integer(patch$traces[[i]]$results)
+ if (length(nopatch_res) > 0) {
+ patch_norm <- shapiro.test(patch_res);
+ nopatch_norm <- shapiro.test(nopatch_res);
+
+ # Shaprio-Wilk test indicates whether data is not likely to
+ # come from a normal distribution. The p-value is the probability
+ # to obtain the sample from a normal distribution. This means, the
+ # smaller p, the more likely the sample was not drawn from a normal
+ # distribution. See [wikipedia:Shapiro-Wilk-Test].
+ printf(" Patched scores look %s distributed (W=%.4f, p=%.4f)\n",
+ ifelse(patch_norm$p.value < 0.05, "not normally", "normally"),
+ patch_norm$statistic, patch_norm$p.value);
+ printf(" Unpatched scores look %s distributed (W=%.4f, p=%.4f)\n",
+ ifelse(nopatch_norm$p.value < 0.05, "not normally", "normally"),
+ nopatch_norm$statistic, nopatch_norm$p.value);
+
+ hist <- ggplot(data=data.frame(x=as.integer(patch_res)), aes(x)) +
+ theme_bw() +
+ geom_histogram(bins=50) +
+ ylab("Points") +
+ xlab(patch$traces[[i]]$graphs[[2]])
+ ggsave(filename=sprintf("%s/%s.svg", outputPath, testName),
+ plot=hist, width=7, height=7)
+
+ hist <- ggplot(data=data.frame(x=as.integer(nopatch_res)), aes(x)) +
+ theme_bw() +
+ geom_histogram(bins=50) +
+ ylab("Points") +
+ xlab(patch$traces[[i]]$graphs[[2]])
+ ggsave(filename=sprintf("%s/%s-before.svg", outputPath, testName),
+ plot=hist, width=7, height=7)
+
+ # The Wilcoxon rank-sum test
+ mww <- wilcox.test(patch_res, nopatch_res, conf.int = TRUE, exact=TRUE)
+ printf(paste(" Wilcoxon U-test W=%.4f, p=%.4f,",
+ "confidence interval [%.1f, %.1f],",
+ "est. effect size %.1f \n"),
+ mww$statistic, mww$p.value,
+ mww$conf.int[1], mww$conf.int[2], mww$estimate);
+ df <-rbind(df, list(mww$conf.int[1], mww$conf.int[2],
+ unname(mww$estimate), unname(mww$p.value),
+ testName, ifelse(mww$p.value < 0.05, TRUE, FALSE)))
+ # t-test
+ t <- t.test(patch_res, nopatch_res, paired=FALSE)
+ printf(paste(" Welch t-test t=%.4f, df = %.2f, p=%.4f,",
+ "confidence interval [%.1f, %.1f], mean diff %.1f \n"),
+ t$statistic, t$parameter, t$p.value,
+ t$conf.int[1], t$conf.int[2], t$estimate[1]-t$estimate[2]);
+ }
+ }
+ df2 <- cbind(x=1:nrow(df), df[order(E),])
+ speedup <- ggplot(df2, aes(x = x, y = E, colour=p.value.sig)) +
+ geom_errorbar(aes(ymax = L, ymin = R), colour="black") +
+ geom_point(size = 4) +
+ scale_x_discrete(limits=df2$yL,
+ name=paste("Benchmark, n=", length(patch_res))) +
+ theme_bw() +
+ geom_hline(yintercept = 0) +
+ ylab("Est. Effect Size in Points") +
+ theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5)) +
+ theme(legend.position = "bottom") +
+ scale_colour_manual(name="Statistical Significance (MWW, p < 0.05)",
+ values=c("red", "green"),
+ labels=c("not significant", "significant")) +
+ theme(legend.justification=c(0,1), legend.position=c(0,1))
+ print(speedup)
+ ggsave(filename=sprintf("%s/speedup-estimates.svg", outputPath),
+ plot=speedup, width=7, height=7)
+}
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698