tools/statistics-for-json.R - Issue 1659043002: Add an R script that does some statistical tests on benchmark results

Side by Side Diff: tools/statistics-for-json.R

Issue 1659043002: Add an R script that does some statistical tests on benchmark results (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master

Patch Set: Format to 80 chars per lines Created 4 years, 10 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # Copyright 2016 the V8 project authors. All rights reserved.
	Michael Achenbach 2016/02/02 15:53:10 Could you add a new, short copyright header? E.g. Could you add a new, short copyright header? E.g. like: https://code.google.com/p/chromium/codesearch#chromium/src/v8/tools/run_perf.... sigurds 2016/02/02 16:02:02 Done. Show quoted text On 2016/02/02 15:53:10, Michael Achenbach wrote: > Could you add a new, short copyright header? E.g. like: > https://code.google.com/p/chromium/codesearch#chromium/src/v8/tools/run_perf.... Done.
	2 # Redistribution and use in source and binary forms, with or without

	3 # modification, are permitted provided that the following conditions are

	4 # met:

	5 #

	6 # * Redistributions of source code must retain the above copyright

	7 # notice, this list of conditions and the following disclaimer.

	8 # * Redistributions in binary form must reproduce the above

	9 # copyright notice, this list of conditions and the following

	10 # disclaimer in the documentation and/or other materials provided

	11 # with the distribution.

	12 # * Neither the name of Google Inc. nor the names of its

	13 # contributors may be used to endorse or promote products derived

	14 # from this software without specific prior written permission.

	15 #

	16 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

	17 # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

	18 # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

	19 # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

	20 # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

	21 # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

	22 # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

	23 # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

	24 # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

	25 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

	26 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

	27

	28 # Do statistical tests on benchmark results

	29

	30 suppressMessages(library("rjson")) # for fromJson

	31 suppressMessages(library("R.utils")) # for printf

	32 suppressMessages(library("ggplot2")) # for plotting

	33 suppressMessages(library("data.table")) # less broken than data.frame

	34

	35 # Clear all variables from environment

	36 rm(list=ls())

	37

	38 args <- commandArgs(TRUE)

	39 if (length(args) != 3) {

	40 printf(paste("usage: Rscript %%this_script patched-results.json",

	41 "unpatched-results.json\n"))

	42 } else {

	43 patch <- fromJSON(file=args[1])

	44 nopatch <- fromJSON(file=args[2])

	45 outputPath <- args[3]

	46 df <- data.table(L = numeric(), R = numeric(), E = numeric(),

	47 p.value = numeric(), yL = character(),

	48 p.value.sig = logical())

	49

	50 for (i in seq(1, length(patch$traces))) {

	51 testName <- patch$traces[[i]]$graphs[[2]]

	52 printf("%s\n", testName)

	53

	54 nopatch_res <- as.integer(nopatch$traces[[i]]$results)

	55 patch_res <- as.integer(patch$traces[[i]]$results)

	56 if (length(nopatch_res) > 0) {

	57 patch_norm <- shapiro.test(patch_res);

	58 nopatch_norm <- shapiro.test(nopatch_res);

	59

	60 # Shaprio-Wilk test indicates whether data is not likely to

	61 # come from a normal distribution. The p-value is the probability

	62 # to obtain the sample from a normal distribution. This means, the

	63 # smaller p, the more likely the sample was not drawn from a normal

	64 # distribution. See [wikipedia:Shapiro-Wilk-Test].

	65 printf(" Patched scores look %s distributed (W=%.4f, p=%.4f)\n",

	66 ifelse(patch_norm$p.value < 0.05, "not normally", "normally"),

	67 patch_norm$statistic, patch_norm$p.value);

	68 printf(" Unpatched scores look %s distributed (W=%.4f, p=%.4f)\n",

	69 ifelse(nopatch_norm$p.value < 0.05, "not normally", "normally"),

	70 nopatch_norm$statistic, nopatch_norm$p.value);

	71

	72 hist <- ggplot(data=data.frame(x=as.integer(patch_res)), aes(x)) +

	73 theme_bw() +

	74 geom_histogram(bins=50) +

	75 ylab("Points") +

	76 xlab(patch$traces[[i]]$graphs[[2]])

	77 ggsave(filename=sprintf("%s/%s.svg", outputPath, testName),

	78 plot=hist, width=7, height=7)

	79

	80 hist <- ggplot(data=data.frame(x=as.integer(nopatch_res)), aes(x)) +

	81 theme_bw() +

	82 geom_histogram(bins=50) +

	83 ylab("Points") +

	84 xlab(patch$traces[[i]]$graphs[[2]])

	85 ggsave(filename=sprintf("%s/%s-before.svg", outputPath, testName),

	86 plot=hist, width=7, height=7)

	87

	88 # The Wilcoxon rank-sum test

	89 mww <- wilcox.test(patch_res, nopatch_res, conf.int = TRUE, exact=TRUE)

	90 printf(paste(" Wilcoxon U-test W=%.4f, p=%.4f,",

	91 "confidence interval [%.1f, %.1f],",

	92 "est. effect size %.1f \n"),

	93 mww$statistic, mww$p.value,

	94 mww$conf.int[1], mww$conf.int[2], mww$estimate);

	95 df <-rbind(df, list(mww$conf.int[1], mww$conf.int[2],

	96 unname(mww$estimate), unname(mww$p.value),

	97 testName, ifelse(mww$p.value < 0.05, TRUE, FALSE)))

	98 # t-test

	99 t <- t.test(patch_res, nopatch_res, paired=FALSE)

	100 printf(paste(" Welch t-test t=%.4f, df = %.2f, p=%.4f,",

	101 "confidence interval [%.1f, %.1f], mean diff %.1f \n"),

	102 t$statistic, t$parameter, t$p.value,

	103 t$conf.int[1], t$conf.int[2], t$estimate[1]-t$estimate[2]);

	104 }

	105 }

	106 df2 <- cbind(x=1:nrow(df), df[order(E),])

	107 speedup <- ggplot(df2, aes(x = x, y = E, colour=p.value.sig)) +

	108 geom_errorbar(aes(ymax = L, ymin = R), colour="black") +

	109 geom_point(size = 4) +

	110 scale_x_discrete(limits=df2$yL,

	111 name=paste("Benchmark, n=", length(patch_res))) +

	112 theme_bw() +

	113 geom_hline(yintercept = 0) +

	114 ylab("Est. Effect Size in Points") +

	115 theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust=0.5)) +

	116 theme(legend.position = "bottom") +

	117 scale_colour_manual(name="Statistical Significance (MWW, p < 0.05)",

	118 values=c("red", "green"),

	119 labels=c("not significant", "significant")) +

	120 theme(legend.justification=c(0,1), legend.position=c(0,1))

	121 print(speedup)

	122 ggsave(filename=sprintf("%s/speedup-estimates.svg", outputPath),

	123 plot=speedup, width=7, height=7)

	124 }

OLD	NEW

« no previous file with comments | « no previous file | no next file » | no next file with comments »