| Index: source/libvpx/test/android/get_files.py
 | 
| ===================================================================
 | 
| --- source/libvpx/test/android/get_files.py	(revision 0)
 | 
| +++ source/libvpx/test/android/get_files.py	(revision 0)
 | 
| @@ -0,0 +1,118 @@
 | 
| +# Copyright (c) 2013 The WebM project authors. All Rights Reserved.
 | 
| +#
 | 
| +# Use of this source code is governed by a BSD-style license
 | 
| +# that can be found in the LICENSE file in the root of the source
 | 
| +# tree. An additional intellectual property rights grant can be found
 | 
| +# in the file PATENTS.  All contributing project authors may
 | 
| +# be found in the AUTHORS file in the root of the source tree.
 | 
| +#
 | 
| +# This simple script pulls test files from the webm homepage
 | 
| +# It is intelligent enough to only pull files if
 | 
| +#   1) File / test_data folder does not exist
 | 
| +#   2) SHA mismatch
 | 
| +
 | 
| +import pycurl
 | 
| +import csv
 | 
| +import hashlib
 | 
| +import re
 | 
| +import os.path
 | 
| +import time
 | 
| +import itertools
 | 
| +import sys
 | 
| +import getopt
 | 
| +
 | 
| +#globals
 | 
| +url = ''
 | 
| +file_list_path = ''
 | 
| +local_resource_path = ''
 | 
| +
 | 
| +# Helper functions:
 | 
| +# A simple function which returns the sha hash of a file in hex
 | 
| +def get_file_sha(filename):
 | 
| +  try:
 | 
| +    sha_hash = hashlib.sha1()
 | 
| +    with open(filename, 'rb') as file:
 | 
| +      buf = file.read(HASH_CHUNK)
 | 
| +      while len(buf) > 0:
 | 
| +        sha_hash.update(buf)
 | 
| +        buf = file.read(HASH_CHUNK)
 | 
| +      return sha_hash.hexdigest()
 | 
| +  except IOError:
 | 
| +    print "Error reading " + filename
 | 
| +
 | 
| +# Downloads a file from a url, and then checks the sha against the passed
 | 
| +# in sha
 | 
| +def download_and_check_sha(url, filename, sha):
 | 
| +  path = os.path.join(local_resource_path, filename)
 | 
| +  fp = open(path, "wb")
 | 
| +  curl = pycurl.Curl()
 | 
| +  curl.setopt(pycurl.URL, url + "/" + filename)
 | 
| +  curl.setopt(pycurl.WRITEDATA, fp)
 | 
| +  curl.perform()
 | 
| +  curl.close()
 | 
| +  fp.close()
 | 
| +  return get_file_sha(path) == sha
 | 
| +
 | 
| +#constants
 | 
| +ftp_retries = 3
 | 
| +
 | 
| +SHA_COL = 0
 | 
| +NAME_COL = 1
 | 
| +EXPECTED_COL = 2
 | 
| +HASH_CHUNK = 65536
 | 
| +
 | 
| +# Main script
 | 
| +try:
 | 
| +  opts, args = \
 | 
| +      getopt.getopt(sys.argv[1:], \
 | 
| +                    "u:i:o:", ["url=", "input_csv=", "output_dir="])
 | 
| +except:
 | 
| +  print 'get_files.py -u <url> -i <input_csv> -o <output_dir>'
 | 
| +  sys.exit(2)
 | 
| +
 | 
| +for opt, arg in opts:
 | 
| +  if opt == '-u':
 | 
| +    url = arg
 | 
| +  elif opt in ("-i", "--input_csv"):
 | 
| +    file_list_path = os.path.join(arg)
 | 
| +  elif opt in ("-o", "--output_dir"):
 | 
| +    local_resource_path = os.path.join(arg)
 | 
| +
 | 
| +if len(sys.argv) != 7:
 | 
| +  print "Expects two paths and a url!"
 | 
| +  exit(1)
 | 
| +
 | 
| +if not os.path.isdir(local_resource_path):
 | 
| +  os.makedirs(local_resource_path)
 | 
| +
 | 
| +file_list_csv = open(file_list_path, "rb")
 | 
| +
 | 
| +# Our 'csv' file uses multiple spaces as a delimiter, python's
 | 
| +# csv class only uses single character delimiters, so we convert them below
 | 
| +file_list_reader = csv.reader((re.sub(' +', ' ', line) \
 | 
| +    for line in file_list_csv), delimiter = ' ')
 | 
| +
 | 
| +file_shas = []
 | 
| +file_names = []
 | 
| +
 | 
| +for row in file_list_reader:
 | 
| +  if len(row) != EXPECTED_COL:
 | 
| +      continue
 | 
| +  file_shas.append(row[SHA_COL])
 | 
| +  file_names.append(row[NAME_COL])
 | 
| +
 | 
| +file_list_csv.close()
 | 
| +
 | 
| +# Download files, only if they don't already exist and have correct shas
 | 
| +for filename, sha in itertools.izip(file_names, file_shas):
 | 
| +  path = os.path.join(local_resource_path, filename)
 | 
| +  if os.path.isfile(path) \
 | 
| +      and get_file_sha(path) == sha:
 | 
| +    print path + ' exists, skipping'
 | 
| +    continue
 | 
| +  for retry in range(0, ftp_retries):
 | 
| +    print "Downloading " + path
 | 
| +    if not download_and_check_sha(url, filename, sha):
 | 
| +      print "Sha does not match, retrying..."
 | 
| +    else:
 | 
| +      break
 | 
| 
 |