OLD | NEW |
| (Empty) |
1 # Copyright 2012 Google Inc. All Rights Reserved. | |
2 # | |
3 # Licensed under the Apache License, Version 2.0 (the "License"); | |
4 # you may not use this file except in compliance with the License. | |
5 # You may obtain a copy of the License at | |
6 # | |
7 # http://www.apache.org/licenses/LICENSE-2.0 | |
8 # | |
9 # Unless required by applicable law or agreed to in writing, software | |
10 # distributed under the License is distributed on an "AS IS" BASIS, | |
11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
12 # See the License for the specific language governing permissions and | |
13 # limitations under the License. | |
14 | |
15 """Contains the perfdiag gsutil command.""" | |
16 | |
17 import calendar | |
18 from collections import defaultdict | |
19 import contextlib | |
20 import datetime | |
21 import json | |
22 import math | |
23 import multiprocessing | |
24 import os | |
25 import re | |
26 import socket | |
27 import string | |
28 import subprocess | |
29 import tempfile | |
30 import time | |
31 | |
32 import boto.gs.connection | |
33 | |
34 from gslib.command import Command | |
35 from gslib.command import COMMAND_NAME | |
36 from gslib.command import COMMAND_NAME_ALIASES | |
37 from gslib.command import CONFIG_REQUIRED | |
38 from gslib.command import FILE_URIS_OK | |
39 from gslib.command import MAX_ARGS | |
40 from gslib.command import MIN_ARGS | |
41 from gslib.command import PROVIDER_URIS_OK | |
42 from gslib.command import SUPPORTED_SUB_ARGS | |
43 from gslib.command import URIS_START_ARG | |
44 from gslib.commands import config | |
45 from gslib.exception import CommandException | |
46 from gslib.help_provider import HELP_NAME | |
47 from gslib.help_provider import HELP_NAME_ALIASES | |
48 from gslib.help_provider import HELP_ONE_LINE_SUMMARY | |
49 from gslib.help_provider import HELP_TEXT | |
50 from gslib.help_provider import HELP_TYPE | |
51 from gslib.help_provider import HelpType | |
52 from gslib.util import IS_LINUX | |
53 from gslib.util import MakeBitsHumanReadable | |
54 from gslib.util import MakeHumanReadable | |
55 from gslib.util import Percentile | |
56 | |
57 _detailed_help_text = (""" | |
58 <B>SYNOPSIS</B> | |
59 gsutil perfdiag [-i in.json] [-o out.json] | |
60 [-n iterations] [-c concurrency] [-s size] [-t tests] uri... | |
61 | |
62 | |
63 <B>DESCRIPTION</B> | |
64 The perfdiag command runs a suite of diagnostic tests for a given Google | |
65 Storage bucket. | |
66 | |
67 The 'uri' parameter must name an existing bucket (e.g. gs://foo) to which | |
68 the user has write permission. Several test files will be uploaded to and | |
69 downloaded from this bucket. All test files will be deleted at the completion | |
70 of the diagnostic if it finishes successfully. | |
71 | |
72 gsutil performance can be impacted by many factors at the client, server, | |
73 and in-between, such as: CPU speed; available memory; the access path to the | |
74 local disk; network bandwidth; contention and error rates along the path | |
75 between gsutil and Google; operating system buffering configuration; and | |
76 firewalls and other network elements. The perfdiag command is provided so | |
77 that customers can run a known measurement suite when troubleshooting | |
78 performance problems. | |
79 | |
80 | |
81 <B>PROVIDING DIAGNOSTIC OUTPUT TO GOOGLE CLOUD STORAGE TEAM</B> | |
82 If the Google Cloud Storage Team asks you to run a performance diagnostic | |
83 please use the following command, and email the output file (output.json) | |
84 to gs-team@google.com: | |
85 | |
86 gsutil perfdiag -o output.json gs://your-bucket | |
87 | |
88 | |
89 <B>OPTIONS</B> | |
90 -n Sets the number of iterations performed when downloading and | |
91 uploading files during latency and throughput tests. Defaults to | |
92 5. | |
93 | |
94 -c Sets the level of concurrency to use while running throughput | |
95 experiments. The default value of 1 will only run a single read | |
96 or write operation concurrently. | |
97 | |
98 -s Sets the size (in bytes) of the test file used to perform read | |
99 and write throughput tests. The default is 1 MiB. | |
100 | |
101 -t Sets the list of diagnostic tests to perform. The default is to | |
102 run all diagnostic tests. Must be a comma-separated list | |
103 containing one or more of the following: | |
104 | |
105 lat: Runs N iterations (set with -n) of writing the file, | |
106 retrieving its metadata, reading the file, and deleting | |
107 the file. Records the latency of each operation. | |
108 | |
109 rthru: Runs N (set with -n) read operations, with at most C | |
110 (set with -c) reads outstanding at any given time. | |
111 | |
112 wthru: Runs N (set with -n) write operations, with at most C | |
113 (set with -c) writes outstanding at any given time. | |
114 | |
115 -o Writes the results of the diagnostic to an output file. The output | |
116 is a JSON file containing system information and performance | |
117 diagnostic results. The file can be read and reported later using | |
118 the -i option. | |
119 | |
120 -i Reads the JSON output file created using the -o command and prints | |
121 a formatted description of the results. | |
122 | |
123 | |
124 <B>NOTE</B> | |
125 The perfdiag command collects system information. It collects your IP address, | |
126 executes DNS queries to Google servers and collects the results, and collects | |
127 network statistics information from the output of netstat -s. None of this | |
128 information will be sent to Google unless you choose to send it. | |
129 """) | |
130 | |
131 | |
132 class PerfDiagCommand(Command): | |
133 """Implementation of gsutil perfdiag command.""" | |
134 | |
135 # Command specification (processed by parent class). | |
136 command_spec = { | |
137 # Name of command. | |
138 COMMAND_NAME: 'perfdiag', | |
139 # List of command name aliases. | |
140 COMMAND_NAME_ALIASES: ['diag', 'diagnostic', 'perf', 'performance'], | |
141 # Min number of args required by this command. | |
142 MIN_ARGS: 0, | |
143 # Max number of args required by this command, or NO_MAX. | |
144 MAX_ARGS: 1, | |
145 # Getopt-style string specifying acceptable sub args. | |
146 SUPPORTED_SUB_ARGS: 'n:c:s:t:i:o:', | |
147 # True if file URIs acceptable for this command. | |
148 FILE_URIS_OK: False, | |
149 # True if provider-only URIs acceptable for this command. | |
150 PROVIDER_URIS_OK: False, | |
151 # Index in args of first URI arg. | |
152 URIS_START_ARG: 0, | |
153 # True if must configure gsutil before running command. | |
154 CONFIG_REQUIRED: True, | |
155 } | |
156 help_spec = { | |
157 # Name of command or auxiliary help info for which this help applies. | |
158 HELP_NAME: 'perfdiag', | |
159 # List of help name aliases. | |
160 HELP_NAME_ALIASES: [], | |
161 # Type of help: | |
162 HELP_TYPE: HelpType.COMMAND_HELP, | |
163 # One line summary of this help. | |
164 HELP_ONE_LINE_SUMMARY: 'Run performance diagnostic', | |
165 # The full help text. | |
166 HELP_TEXT: _detailed_help_text, | |
167 } | |
168 | |
169 # Byte sizes to use for testing files. | |
170 # TODO: Consider letting the user specify these sizes with a configuration | |
171 # parameter. | |
172 test_file_sizes = ( | |
173 0, # 0 bytes | |
174 1024, # 1 KB | |
175 102400, # 100 KB | |
176 1048576, # 1MB | |
177 ) | |
178 | |
179 # List of all diagnostic tests. | |
180 ALL_DIAG_TESTS = ('rthru', 'wthru', 'lat') | |
181 | |
182 # Google Cloud Storage API endpoint host. | |
183 GOOGLE_API_HOST = boto.gs.connection.GSConnection.DefaultHost | |
184 | |
185 def _WindowedExec(self, cmd, n, w, raise_on_error=True): | |
186 """Executes a command n times with a window size of w. | |
187 | |
188 Up to w instances of the command will be executed and left outstanding at a | |
189 time until n instances of the command have completed. | |
190 | |
191 Args: | |
192 cmd: List containing the command to execute. | |
193 n: Number of times the command will be executed. | |
194 w: Window size of outstanding commands being executed. | |
195 raise_on_error: See _Exec. | |
196 | |
197 Raises: | |
198 Exception: If raise_on_error is set to True and any process exits with a | |
199 non-zero return code. | |
200 """ | |
201 if self.debug: | |
202 print 'Running command:', cmd | |
203 devnull_f = open(os.devnull, 'w') | |
204 num_finished = 0 | |
205 running = [] | |
206 while len(running) or num_finished < n: | |
207 # Fires off new commands that can be executed. | |
208 while len(running) < w and num_finished + len(running) < n: | |
209 print 'Starting concurrent command: %s' % (' '.join(cmd)) | |
210 p = subprocess.Popen(cmd, stdout=devnull_f, stderr=devnull_f) | |
211 running.append(p) | |
212 | |
213 # Checks for finished commands. | |
214 prev_running = running | |
215 running = [] | |
216 for p in prev_running: | |
217 retcode = p.poll() | |
218 if retcode is None: | |
219 running.append(p) | |
220 elif raise_on_error and retcode: | |
221 raise CommandException("Received non-zero return code (%d) from " | |
222 "subprocess '%s'." % (retcode, ' '.join(cmd))) | |
223 else: | |
224 num_finished += 1 | |
225 | |
226 def _Exec(self, cmd, raise_on_error=True, return_output=False, | |
227 mute_stderr=False): | |
228 """Executes a command in a subprocess. | |
229 | |
230 Args: | |
231 cmd: List containing the command to execute. | |
232 raise_on_error: Whether or not to raise an exception when a process exits | |
233 with a non-zero return code. | |
234 return_output: If set to True, the return value of the function is the | |
235 stdout of the process. | |
236 mute_stderr: If set to True, the stderr of the process is not printed to | |
237 the console. | |
238 | |
239 Returns: | |
240 The return code of the process or the stdout if return_output is set. | |
241 | |
242 Raises: | |
243 Exception: If raise_on_error is set to True and any process exits with a | |
244 non-zero return code. | |
245 """ | |
246 if self.debug: | |
247 print 'Running command:', cmd | |
248 stderr = subprocess.PIPE if mute_stderr else None | |
249 p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=stderr) | |
250 (stdoutdata, stderrdata) = p.communicate() | |
251 if raise_on_error and p.returncode: | |
252 raise CommandException("Received non-zero return code (%d) from " | |
253 "subprocess '%s'." % (p.returncode, ' '.join(cmd))) | |
254 return stdoutdata if return_output else p.returncode | |
255 | |
256 def _GsUtil(self, cmd, raise_on_error=True, return_output=False, | |
257 mute_stderr=False): | |
258 """Executes a gsutil command in a subprocess. | |
259 | |
260 Args: | |
261 cmd: A list containing the arguments to the gsutil program, e.g. ['ls', | |
262 'gs://foo']. | |
263 raise_on_error: see _Exec. | |
264 return_output: see _Exec. | |
265 mute_stderr: see _Exec. | |
266 | |
267 Returns: | |
268 The return code of the process or the stdout if return_output is set. | |
269 """ | |
270 cmd = self.gsutil_exec_list + cmd | |
271 return self._Exec(cmd, raise_on_error=raise_on_error, | |
272 return_output=return_output, mute_stderr=mute_stderr) | |
273 | |
274 def _SetUp(self): | |
275 """Performs setup operations needed before diagnostics can be run.""" | |
276 | |
277 # Stores test result data. | |
278 self.results = {} | |
279 # List of test files in a temporary location on disk for latency ops. | |
280 self.latency_files = [] | |
281 # Maps each test file path to its size in bytes. | |
282 self.file_sizes = {} | |
283 # Maps each test file to its contents as a string. | |
284 self.file_contents = {} | |
285 | |
286 def _MakeFile(file_size): | |
287 """Creates a temporary file of the given size and returns its path.""" | |
288 fd, fpath = tempfile.mkstemp(suffix='.bin', prefix='gsutil_test_file', | |
289 text=False) | |
290 self.file_sizes[fpath] = file_size | |
291 f = os.fdopen(fd, 'wb') | |
292 f.write(os.urandom(file_size)) | |
293 f.close() | |
294 f = open(fpath, 'rb') | |
295 self.file_contents[fpath] = f.read() | |
296 f.close() | |
297 return fpath | |
298 | |
299 # Create files for latency tests. | |
300 for file_size in self.test_file_sizes: | |
301 fpath = _MakeFile(file_size) | |
302 self.latency_files.append(fpath) | |
303 | |
304 # Local file on disk for write throughput tests. | |
305 self.thru_local_file = _MakeFile(self.thru_filesize) | |
306 # Remote file to write/read from during throughput tests. | |
307 self.thru_remote_file = (str(self.bucket_uri) + | |
308 os.path.basename(self.thru_local_file)) | |
309 | |
310 def _TearDown(self): | |
311 """Performs operations to clean things up after performing diagnostics.""" | |
312 for fpath in self.latency_files + [self.thru_local_file]: | |
313 try: | |
314 os.remove(fpath) | |
315 except OSError: | |
316 pass | |
317 | |
318 self._GsUtil(['rm', self.thru_remote_file], raise_on_error=False, | |
319 mute_stderr=True) | |
320 | |
321 @contextlib.contextmanager | |
322 def _Time(self, key, bucket): | |
323 """A context manager that measures time. | |
324 | |
325 A context manager that prints a status message before and after executing | |
326 the inner command and times how long the inner command takes. Keeps track of | |
327 the timing, aggregated by the given key. | |
328 | |
329 Args: | |
330 key: The key to insert the timing value into a dictionary bucket. | |
331 bucket: A dictionary to place the timing value in. | |
332 | |
333 Yields: | |
334 For the context manager. | |
335 """ | |
336 print key, 'starting...' | |
337 t0 = time.time() | |
338 yield | |
339 t1 = time.time() | |
340 bucket[key].append(t1 - t0) | |
341 print key, 'done.' | |
342 | |
343 def _RunLatencyTests(self): | |
344 """Runs latency tests.""" | |
345 # Stores timing information for each category of operation. | |
346 self.results['latency'] = defaultdict(list) | |
347 | |
348 for i in range(self.num_iterations): | |
349 print | |
350 print 'Running latency iteration %d...' % (i+1) | |
351 for fpath in self.latency_files: | |
352 basename = os.path.basename(fpath) | |
353 gsbucket = str(self.bucket_uri) | |
354 gsuri = gsbucket + basename | |
355 file_size = self.file_sizes[fpath] | |
356 readable_file_size = MakeHumanReadable(file_size) | |
357 | |
358 print | |
359 print ("File of size %(size)s located on disk at '%(fpath)s' being " | |
360 "diagnosed in the cloud at '%(gsuri)s'." | |
361 % {'size': readable_file_size, | |
362 'fpath': fpath, | |
363 'gsuri': gsuri}) | |
364 | |
365 k = self.bucket.key_class(self.bucket) | |
366 k.key = basename | |
367 | |
368 with self._Time('UPLOAD_%d' % file_size, self.results['latency']): | |
369 k.set_contents_from_string(self.file_contents[fpath]) | |
370 with self._Time('METADATA_%d' % file_size, self.results['latency']): | |
371 k.exists() | |
372 with self._Time('DOWNLOAD_%d' % file_size, self.results['latency']): | |
373 k.get_contents_as_string() | |
374 with self._Time('DELETE_%d' % file_size, self.results['latency']): | |
375 k.delete() | |
376 | |
377 def _RunReadThruTests(self): | |
378 """Runs read throughput tests.""" | |
379 self.results['read_throughput'] = {'file_size': self.thru_filesize, | |
380 'num_times': self.num_iterations, | |
381 'concurrency': self.concurrency} | |
382 | |
383 # Copy the file to remote location before reading. | |
384 self._GsUtil(['cp', self.thru_local_file, self.thru_remote_file]) | |
385 | |
386 if self.concurrency == 1: | |
387 k = self.bucket.key_class(self.bucket) | |
388 k.key = os.path.basename(self.thru_local_file) | |
389 # Warm up the TCP connection by transferring a couple times first. | |
390 for i in range(2): | |
391 k.get_contents_as_string() | |
392 t0 = time.time() | |
393 for i in range(self.num_iterations): | |
394 k.get_contents_as_string() | |
395 t1 = time.time() | |
396 else: | |
397 cmd = self.gsutil_exec_list + ['cp', self.thru_remote_file, os.devnull] | |
398 t0 = time.time() | |
399 self._WindowedExec(cmd, self.num_iterations, self.concurrency) | |
400 t1 = time.time() | |
401 | |
402 time_took = t1 - t0 | |
403 total_bytes_copied = self.thru_filesize * self.num_iterations | |
404 bytes_per_second = total_bytes_copied / time_took | |
405 | |
406 self.results['read_throughput']['time_took'] = time_took | |
407 self.results['read_throughput']['total_bytes_copied'] = total_bytes_copied | |
408 self.results['read_throughput']['bytes_per_second'] = bytes_per_second | |
409 | |
410 def _RunWriteThruTests(self): | |
411 """Runs write throughput tests.""" | |
412 self.results['write_throughput'] = {'file_size': self.thru_filesize, | |
413 'num_copies': self.num_iterations, | |
414 'concurrency': self.concurrency} | |
415 | |
416 if self.concurrency == 1: | |
417 k = self.bucket.key_class(self.bucket) | |
418 k.key = os.path.basename(self.thru_local_file) | |
419 # Warm up the TCP connection by transferring a couple times first. | |
420 for i in range(2): | |
421 k.set_contents_from_string(self.file_contents[self.thru_local_file]) | |
422 t0 = time.time() | |
423 for i in range(self.num_iterations): | |
424 k.set_contents_from_string(self.file_contents[self.thru_local_file]) | |
425 t1 = time.time() | |
426 else: | |
427 cmd = self.gsutil_exec_list + ['cp', self.thru_local_file, | |
428 self.thru_remote_file] | |
429 t0 = time.time() | |
430 self._WindowedExec(cmd, self.num_iterations, self.concurrency) | |
431 t1 = time.time() | |
432 | |
433 time_took = t1 - t0 | |
434 total_bytes_copied = self.thru_filesize * self.num_iterations | |
435 bytes_per_second = total_bytes_copied / time_took | |
436 | |
437 self.results['write_throughput']['time_took'] = time_took | |
438 self.results['write_throughput']['total_bytes_copied'] = total_bytes_copied | |
439 self.results['write_throughput']['bytes_per_second'] = bytes_per_second | |
440 | |
441 def _GetDiskCounters(self): | |
442 """Retrieves disk I/O statistics for all disks. | |
443 | |
444 Adapted from the psutil module's psutil._pslinux.disk_io_counters: | |
445 http://code.google.com/p/psutil/source/browse/trunk/psutil/_pslinux.py | |
446 | |
447 Originally distributed under under a BSD license. | |
448 Original Copyright (c) 2009, Jay Loden, Dave Daeschler, Giampaolo Rodola. | |
449 | |
450 Returns: | |
451 A dictionary containing disk names mapped to the disk counters from | |
452 /disk/diskstats. | |
453 """ | |
454 # iostat documentation states that sectors are equivalent with blocks and | |
455 # have a size of 512 bytes since 2.4 kernels. This value is needed to | |
456 # calculate the amount of disk I/O in bytes. | |
457 sector_size = 512 | |
458 | |
459 partitions = [] | |
460 with open('/proc/partitions', 'r') as f: | |
461 lines = f.readlines()[2:] | |
462 for line in lines: | |
463 _, _, _, name = line.split() | |
464 if name[-1].isdigit(): | |
465 partitions.append(name) | |
466 | |
467 retdict = {} | |
468 with open('/proc/diskstats', 'r') as f: | |
469 for line in f: | |
470 values = line.split()[:11] | |
471 _, _, name, reads, _, rbytes, rtime, writes, _, wbytes, wtime = values | |
472 if name in partitions: | |
473 rbytes = int(rbytes) * sector_size | |
474 wbytes = int(wbytes) * sector_size | |
475 reads = int(reads) | |
476 writes = int(writes) | |
477 rtime = int(rtime) | |
478 wtime = int(wtime) | |
479 retdict[name] = (reads, writes, rbytes, wbytes, rtime, wtime) | |
480 return retdict | |
481 | |
482 def _GetTcpStats(self): | |
483 """Tries to parse out TCP packet information from netstat output. | |
484 | |
485 Returns: | |
486 A dictionary containing TCP information | |
487 """ | |
488 # netstat return code is non-zero for -s on Linux, so don't raise on error. | |
489 netstat_output = self._Exec(['netstat', '-s'], return_output=True, | |
490 raise_on_error=False) | |
491 netstat_output = netstat_output.strip().lower() | |
492 found_tcp = False | |
493 tcp_retransmit = None | |
494 tcp_received = None | |
495 tcp_sent = None | |
496 for line in netstat_output.split('\n'): | |
497 # Header for TCP section is "Tcp:" in Linux/Mac and | |
498 # "TCP Statistics for" in Windows. | |
499 if 'tcp:' in line or 'tcp statistics' in line: | |
500 found_tcp = True | |
501 | |
502 # Linux == "segments retransmited" (sic), Mac == "retransmit timeouts" | |
503 # Windows == "segments retransmitted". | |
504 if (found_tcp and tcp_retransmit is None and | |
505 ('segments retransmited' in line or 'retransmit timeouts' in line or | |
506 'segments retransmitted' in line)): | |
507 tcp_retransmit = ''.join(c for c in line if c in string.digits) | |
508 | |
509 # Linux+Windows == "segments received", Mac == "packets received". | |
510 if (found_tcp and tcp_received is None and | |
511 ('segments received' in line or 'packets received' in line)): | |
512 tcp_received = ''.join(c for c in line if c in string.digits) | |
513 | |
514 # Linux == "segments send out" (sic), Mac+Windows == "packets sent". | |
515 if (found_tcp and tcp_sent is None and | |
516 ('segments send out' in line or 'packets sent' in line or | |
517 'segments sent' in line)): | |
518 tcp_sent = ''.join(c for c in line if c in string.digits) | |
519 | |
520 result = {} | |
521 try: | |
522 result['tcp_retransmit'] = int(tcp_retransmit) | |
523 result['tcp_received'] = int(tcp_received) | |
524 result['tcp_sent'] = int(tcp_sent) | |
525 except (ValueError, TypeError): | |
526 result['tcp_retransmit'] = None | |
527 result['tcp_received'] = None | |
528 result['tcp_sent'] = None | |
529 | |
530 return result | |
531 | |
532 def _CollectSysInfo(self): | |
533 """Collects system information.""" | |
534 sysinfo = {} | |
535 | |
536 # Get the local IP address from socket lib. | |
537 sysinfo['ip_address'] = socket.gethostbyname(socket.gethostname()) | |
538 # Record the temporary directory used since it can affect performance, e.g. | |
539 # when on a networked filesystem. | |
540 sysinfo['tempdir'] = tempfile.gettempdir() | |
541 | |
542 # Produces an RFC 2822 compliant GMT timestamp. | |
543 sysinfo['gmt_timestamp'] = time.strftime('%a, %d %b %Y %H:%M:%S +0000', | |
544 time.gmtime()) | |
545 | |
546 # Execute a CNAME lookup on Google DNS to find what Google server | |
547 # it's routing to. | |
548 cmd = ['nslookup', '-type=CNAME', self.GOOGLE_API_HOST] | |
549 nslookup_cname_output = self._Exec(cmd, return_output=True) | |
550 m = re.search(r' = (?P<googserv>[^.]+)\.', nslookup_cname_output) | |
551 sysinfo['googserv_route'] = m.group('googserv') if m else None | |
552 | |
553 # Look up IP addresses for Google Server. | |
554 (hostname, aliaslist, ipaddrlist) = socket.gethostbyname_ex( | |
555 self.GOOGLE_API_HOST) | |
556 sysinfo['googserv_ips'] = ipaddrlist | |
557 | |
558 # Reverse lookup the hostnames for the Google Server IPs. | |
559 sysinfo['googserv_hostnames'] = [] | |
560 for googserv_ip in ipaddrlist: | |
561 (hostname, aliaslist, ipaddrlist) = socket.gethostbyaddr(googserv_ip) | |
562 sysinfo['googserv_hostnames'].append(hostname) | |
563 | |
564 # Query o-o to find out what the Google DNS thinks is the user's IP. | |
565 cmd = ['nslookup', '-type=TXT', 'o-o.myaddr.google.com.'] | |
566 nslookup_txt_output = self._Exec(cmd, return_output=True) | |
567 m = re.search(r'text\s+=\s+"(?P<dnsip>[\.\d]+)"', nslookup_txt_output) | |
568 sysinfo['dns_o-o_ip'] = m.group('dnsip') if m else None | |
569 | |
570 # Try and find the number of CPUs in the system if available. | |
571 try: | |
572 sysinfo['cpu_count'] = multiprocessing.cpu_count() | |
573 except NotImplementedError: | |
574 sysinfo['cpu_count'] = None | |
575 | |
576 # For *nix platforms, obtain the CPU load. | |
577 try: | |
578 sysinfo['load_avg'] = list(os.getloadavg()) | |
579 except (AttributeError, OSError): | |
580 sysinfo['load_avg'] = None | |
581 | |
582 # Try and collect memory information from /proc/meminfo if possible. | |
583 mem_total = None | |
584 mem_free = None | |
585 mem_buffers = None | |
586 mem_cached = None | |
587 | |
588 try: | |
589 with open('/proc/meminfo', 'r') as f: | |
590 for line in f: | |
591 if line.startswith('MemTotal'): | |
592 mem_total = (int(''.join(c for c in line if c in string.digits)) | |
593 * 1000) | |
594 elif line.startswith('MemFree'): | |
595 mem_free = (int(''.join(c for c in line if c in string.digits)) | |
596 * 1000) | |
597 elif line.startswith('Buffers'): | |
598 mem_buffers = (int(''.join(c for c in line if c in string.digits)) | |
599 * 1000) | |
600 elif line.startswith('Cached'): | |
601 mem_cached = (int(''.join(c for c in line if c in string.digits)) | |
602 * 1000) | |
603 except (IOError, ValueError): | |
604 pass | |
605 | |
606 sysinfo['meminfo'] = {'mem_total': mem_total, | |
607 'mem_free': mem_free, | |
608 'mem_buffers': mem_buffers, | |
609 'mem_cached': mem_cached} | |
610 | |
611 # Get configuration attributes from config module. | |
612 sysinfo['gsutil_config'] = {} | |
613 for attr in dir(config): | |
614 attr_value = getattr(config, attr) | |
615 # Filter out multiline strings that are not useful. | |
616 if attr.isupper() and not (isinstance(attr_value, basestring) and | |
617 '\n' in attr_value): | |
618 sysinfo['gsutil_config'][attr] = attr_value | |
619 | |
620 self.results['sysinfo'] = sysinfo | |
621 | |
622 def _DisplayStats(self, trials): | |
623 """Prints out mean, standard deviation, median, and 90th percentile.""" | |
624 n = len(trials) | |
625 mean = float(sum(trials)) / n | |
626 stdev = math.sqrt(sum((x - mean)**2 for x in trials) / n) | |
627 | |
628 print str(n).rjust(6), '', | |
629 print ('%.1f' % (mean * 1000)).rjust(9), '', | |
630 print ('%.1f' % (stdev * 1000)).rjust(12), '', | |
631 print ('%.1f' % (Percentile(trials, 0.5) * 1000)).rjust(11), '', | |
632 print ('%.1f' % (Percentile(trials, 0.9) * 1000)).rjust(11), '' | |
633 | |
634 def _DisplayResults(self): | |
635 """Displays results collected from diagnostic run.""" | |
636 print | |
637 print '=' * 78 | |
638 print 'DIAGNOSTIC RESULTS'.center(78) | |
639 print '=' * 78 | |
640 | |
641 if 'latency' in self.results: | |
642 print | |
643 print '-' * 78 | |
644 print 'Latency'.center(78) | |
645 print '-' * 78 | |
646 print ('Operation Size Trials Mean (ms) Std Dev (ms) ' | |
647 'Median (ms) 90th % (ms)') | |
648 print ('========= ========= ====== ========= ============ ' | |
649 '=========== ===========') | |
650 for key in sorted(self.results['latency']): | |
651 trials = sorted(self.results['latency'][key]) | |
652 op, numbytes = key.split('_') | |
653 numbytes = int(numbytes) | |
654 if op == 'METADATA': | |
655 print 'Metadata'.rjust(9), '', | |
656 print MakeHumanReadable(numbytes).rjust(9), '', | |
657 self._DisplayStats(trials) | |
658 if op == 'DOWNLOAD': | |
659 print 'Download'.rjust(9), '', | |
660 print MakeHumanReadable(numbytes).rjust(9), '', | |
661 self._DisplayStats(trials) | |
662 if op == 'UPLOAD': | |
663 print 'Upload'.rjust(9), '', | |
664 print MakeHumanReadable(numbytes).rjust(9), '', | |
665 self._DisplayStats(trials) | |
666 if op == 'DELETE': | |
667 print 'Delete'.rjust(9), '', | |
668 print MakeHumanReadable(numbytes).rjust(9), '', | |
669 self._DisplayStats(trials) | |
670 | |
671 if 'write_throughput' in self.results: | |
672 print | |
673 print '-' * 78 | |
674 print 'Write Throughput'.center(78) | |
675 print '-' * 78 | |
676 write_thru = self.results['write_throughput'] | |
677 print 'Copied a %s file %d times for a total transfer size of %s.' % ( | |
678 MakeHumanReadable(write_thru['file_size']), | |
679 write_thru['num_copies'], | |
680 MakeHumanReadable(write_thru['total_bytes_copied'])) | |
681 print 'Write throughput: %s/s.' % ( | |
682 MakeBitsHumanReadable(write_thru['bytes_per_second'] * 8)) | |
683 | |
684 if 'read_throughput' in self.results: | |
685 print | |
686 print '-' * 78 | |
687 print 'Read Throughput'.center(78) | |
688 print '-' * 78 | |
689 read_thru = self.results['read_throughput'] | |
690 print 'Copied a %s file %d times for a total transfer size of %s.' % ( | |
691 MakeHumanReadable(read_thru['file_size']), | |
692 read_thru['num_times'], | |
693 MakeHumanReadable(read_thru['total_bytes_copied'])) | |
694 print 'Read throughput: %s/s.' % ( | |
695 MakeBitsHumanReadable(read_thru['bytes_per_second'] * 8)) | |
696 | |
697 if 'sysinfo' in self.results: | |
698 print | |
699 print '-' * 78 | |
700 print 'System Information'.center(78) | |
701 print '-' * 78 | |
702 info = self.results['sysinfo'] | |
703 print 'IP Address: \n %s' % info['ip_address'] | |
704 print 'Temporary Directory: \n %s' % info['tempdir'] | |
705 print 'Bucket URI: \n %s' % self.results['bucket_uri'] | |
706 | |
707 if 'gmt_timestamp' in info: | |
708 ts_string = info['gmt_timestamp'] | |
709 timetuple = None | |
710 try: | |
711 # Convert RFC 2822 string to Linux timestamp. | |
712 timetuple = time.strptime(ts_string, '%a, %d %b %Y %H:%M:%S +0000') | |
713 except ValueError: | |
714 pass | |
715 | |
716 if timetuple: | |
717 # Converts the GMT time tuple to local Linux timestamp. | |
718 localtime = calendar.timegm(timetuple) | |
719 localdt = datetime.datetime.fromtimestamp(localtime) | |
720 print 'Measurement time: \n %s' % localdt.strftime( | |
721 '%Y-%m-%d %I-%M-%S %p %Z') | |
722 | |
723 print 'Google Server: \n %s' % info['googserv_route'] | |
724 print ('Google Server IP Addresses: \n %s' % | |
725 ('\n '.join(info['googserv_ips']))) | |
726 print ('Google Server Hostnames: \n %s' % | |
727 ('\n '.join(info['googserv_hostnames']))) | |
728 print 'Google DNS thinks your IP is: \n %s' % info['dns_o-o_ip'] | |
729 print 'CPU Count: \n %s' % info['cpu_count'] | |
730 print 'CPU Load Average: \n %s' % info['load_avg'] | |
731 try: | |
732 print ('Total Memory: \n %s' % | |
733 MakeHumanReadable(info['meminfo']['mem_total'])) | |
734 # Free memory is really MemFree + Buffers + Cached. | |
735 print 'Free Memory: \n %s' % MakeHumanReadable( | |
736 info['meminfo']['mem_free'] + | |
737 info['meminfo']['mem_buffers'] + | |
738 info['meminfo']['mem_cached']) | |
739 except TypeError: | |
740 pass | |
741 | |
742 netstat_after = info['netstat_end'] | |
743 netstat_before = info['netstat_start'] | |
744 for tcp_type in ('sent', 'received', 'retransmit'): | |
745 try: | |
746 delta = (netstat_after['tcp_%s' % tcp_type] - | |
747 netstat_before['tcp_%s' % tcp_type]) | |
748 print 'TCP segments %s during test:\n %d' % (tcp_type, delta) | |
749 except TypeError: | |
750 pass | |
751 | |
752 if 'disk_counters_end' in info and 'disk_counters_start' in info: | |
753 print 'Disk Counter Deltas:\n', | |
754 disk_after = info['disk_counters_end'] | |
755 disk_before = info['disk_counters_start'] | |
756 print '', 'disk'.rjust(6), | |
757 for colname in ['reads', 'writes', 'rbytes', 'wbytes', 'rtime', | |
758 'wtime']: | |
759 print colname.rjust(8), | |
760 print | |
761 for diskname in sorted(disk_after): | |
762 before = disk_before[diskname] | |
763 after = disk_after[diskname] | |
764 (reads1, writes1, rbytes1, wbytes1, rtime1, wtime1) = before | |
765 (reads2, writes2, rbytes2, wbytes2, rtime2, wtime2) = after | |
766 print '', diskname.rjust(6), | |
767 deltas = [reads2-reads1, writes2-writes1, rbytes2-rbytes1, | |
768 wbytes2-wbytes1, rtime2-rtime1, wtime2-wtime1] | |
769 for delta in deltas: | |
770 print str(delta).rjust(8), | |
771 print | |
772 | |
773 if self.output_file: | |
774 with open(self.output_file, 'w') as f: | |
775 json.dump(self.results, f, indent=2) | |
776 print | |
777 print "Output file written to '%s'." % self.output_file | |
778 | |
779 print | |
780 | |
781 def _ParsePositiveInteger(self, val, msg): | |
782 """Tries to convert val argument to a positive integer. | |
783 | |
784 Args: | |
785 val: The value (as a string) to convert to a positive integer. | |
786 msg: The error message to place in the CommandException on an error. | |
787 | |
788 Returns: | |
789 A valid positive integer. | |
790 | |
791 Raises: | |
792 CommandException: If the supplied value is not a valid positive integer. | |
793 """ | |
794 try: | |
795 val = int(val) | |
796 if val < 1: | |
797 raise CommandException(msg) | |
798 return val | |
799 except ValueError: | |
800 raise CommandException(msg) | |
801 | |
802 def _ParseArgs(self): | |
803 """Parses arguments for perfdiag command.""" | |
804 # From -n. | |
805 self.num_iterations = 5 | |
806 # From -c. | |
807 self.concurrency = 1 | |
808 # From -s. | |
809 self.thru_filesize = 1048576 | |
810 # From -t. | |
811 self.diag_tests = self.ALL_DIAG_TESTS | |
812 # From -o. | |
813 self.output_file = None | |
814 # From -i. | |
815 self.input_file = None | |
816 | |
817 if self.sub_opts: | |
818 for o, a in self.sub_opts: | |
819 if o == '-n': | |
820 self.num_iterations = self._ParsePositiveInteger( | |
821 a, 'The -n parameter must be a positive integer.') | |
822 if o == '-c': | |
823 self.concurrency = self._ParsePositiveInteger( | |
824 a, 'The -c parameter must be a positive integer.') | |
825 if o == '-s': | |
826 self.thru_filesize = self._ParsePositiveInteger( | |
827 a, 'The -s parameter must be a positive integer.') | |
828 if o == '-t': | |
829 self.diag_tests = [] | |
830 for test_name in a.strip().split(','): | |
831 if test_name.lower() not in self.ALL_DIAG_TESTS: | |
832 raise CommandException("List of test names (-t) contains invalid " | |
833 "test name '%s'." % test_name) | |
834 self.diag_tests.append(test_name) | |
835 if o == '-o': | |
836 self.output_file = os.path.abspath(a) | |
837 if o == '-i': | |
838 self.input_file = os.path.abspath(a) | |
839 if not os.path.isfile(self.input_file): | |
840 raise CommandException("Invalid input file (-i): '%s'." % a) | |
841 try: | |
842 with open(self.input_file, 'r') as f: | |
843 self.results = json.load(f) | |
844 print "Read input file: '%s'." % self.input_file | |
845 except ValueError: | |
846 raise CommandException("Could not decode input file (-i): '%s'." % | |
847 a) | |
848 return | |
849 | |
850 if not self.args: | |
851 raise CommandException('Wrong number of arguments for "perfdiag" ' | |
852 'command.') | |
853 self.bucket_uri = self.suri_builder.StorageUri(self.args[0]) | |
854 if not self.bucket_uri.names_bucket(): | |
855 raise CommandException('The perfdiag command requires a URI that ' | |
856 'specifies a bucket.\n"%s" is not ' | |
857 'valid.' % self.bucket_uri) | |
858 self.bucket = self.bucket_uri.get_bucket() | |
859 | |
860 # Command entry point. | |
861 def RunCommand(self): | |
862 """Called by gsutil when the command is being invoked.""" | |
863 self._ParseArgs() | |
864 | |
865 if self.input_file: | |
866 self._DisplayResults() | |
867 return 0 | |
868 | |
869 print 'Number of iterations to run: %d' % self.num_iterations | |
870 print 'Base bucket URI: %s' % self.bucket_uri | |
871 print 'Concurrency level: %d' % self.concurrency | |
872 print 'Throughput file size: %s' % MakeHumanReadable(self.thru_filesize) | |
873 print 'Diagnostics to run: %s' % (', '.join(self.diag_tests)) | |
874 | |
875 try: | |
876 self._SetUp() | |
877 | |
878 # Collect generic system info. | |
879 self._CollectSysInfo() | |
880 # Collect netstat info and disk counters before tests (and again later). | |
881 self.results['sysinfo']['netstat_start'] = self._GetTcpStats() | |
882 if IS_LINUX: | |
883 self.results['sysinfo']['disk_counters_start'] = self._GetDiskCounters() | |
884 # Record bucket URI. | |
885 self.results['bucket_uri'] = str(self.bucket_uri) | |
886 | |
887 if 'lat' in self.diag_tests: | |
888 self._RunLatencyTests() | |
889 if 'rthru' in self.diag_tests: | |
890 self._RunReadThruTests() | |
891 if 'wthru' in self.diag_tests: | |
892 self._RunWriteThruTests() | |
893 | |
894 # Collect netstat info and disk counters after tests. | |
895 self.results['sysinfo']['netstat_end'] = self._GetTcpStats() | |
896 if IS_LINUX: | |
897 self.results['sysinfo']['disk_counters_end'] = self._GetDiskCounters() | |
898 | |
899 self._DisplayResults() | |
900 finally: | |
901 self._TearDown() | |
902 | |
903 return 0 | |
OLD | NEW |