appengine/monorail/features/spammodel.py - Issue 1868553004: Open Source Monorail

Side by Side Diff

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Keyboard Shortcuts

	File
u :	up to issue
j / k :	jump to file after / before current file
J / K :	jump to next file with a comment after / before current file
	Side-by-side diff
i :	toggle intra-line diffs
e :	expand all comments
c :	collapse all comments
s :	toggle showing all comments
n / p :	next / previous diff chunk or comment
N / P :	next / previous comment
<Up> / <Down> :	next / previous line

	Issue
u :	up to list of issues
j / k :	jump to patch after / before current patch
o / <Enter> :	open current patch in side-by-side view
i :	open current patch in unified diff view

	Issue List
j / k :	jump to issue after / before current issue
o / <Enter> :	open current issue

Side by Side Diff: appengine/monorail/features/spammodel.py

Issue 1868553004: Open Source Monorail (Closed) Base URL: https://chromium.googlesource.com/infra/infra.git@master

Patch Set: Rebase Created 4 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch

OLD	NEW
(Empty)
	1 # Copyright 2016 The Chromium Authors. All rights reserved.

	2 # Use of this source code is govered by a BSD-style

	3 # license that can be found in the LICENSE file or at

	4 # https://developers.google.com/open-source/licenses/bsd

	5 """ Tasks and handlers for maintaining the spam classifier model. These

	6 should be run via cron and task queue rather than manually.

	7 """

	8

	9 import cgi

	10 import csv

	11 import logging

	12 import webapp2

	13 import cloudstorage

	14 import json

	15

	16 from datetime import date

	17 from datetime import datetime

	18 from datetime import timedelta

	19

	20 from framework import servlet

	21 from framework import urls

	22 from google.appengine.api import taskqueue

	23 from google.appengine.api import app_identity

	24 from framework import gcs_helpers

	25

	26 class TrainingDataExport(webapp2.RequestHandler):

	27 """Trigger a training data export task"""

	28 def get(self):

	29 logging.info("Training data export requested.")

	30 taskqueue.add(url=urls.SPAM_DATA_EXPORT_TASK + '.do')

	31

	32 BATCH_SIZE = 100

	33

	34 class TrainingDataExportTask(servlet.Servlet):

	35 """Export any human-labeled ham or spam from the previous day. These

	36 records will be used by a subsequent task to create an updated model.

	37 """

	38 CHECK_SECURITY_TOKEN = False

	39

	40 def ProcessFormData(self, mr, post_data):

	41 logging.info("Training data export initiated.")

	42

	43 bucket_name = app_identity.get_default_gcs_bucket_name()

	44 date_str = date.today().isoformat()

	45 export_target_path = '/' + bucket_name + '/spam_training_data/' + date_str

	46 total_issues = 0

	47

	48 with cloudstorage.open(export_target_path, mode='w',

	49 content_type=None, options=None, retry_params=None) as gcs_file:

	50

	51 csv_writer = csv.writer(gcs_file, delimiter=',', quotechar='"',

	52 quoting=csv.QUOTE_ALL, lineterminator='\n')

	53

	54 since = datetime.now() - timedelta(days=1)

	55

	56 # TODO: Comments, and further pagination

	57 issues, first_comments, _count = (

	58 self.services.spam.GetTrainingIssues(

	59 mr.cnxn, self.services.issue, since, offset=0, limit=BATCH_SIZE))

	60 total_issues += len(issues)

	61 for issue in issues:

	62 # Cloud Prediction API doesn't allow newlines in the training data.

	63 fixed_summary = issue.summary.replace('\r\n', ' ')

	64 fixed_comment = first_comments[issue.issue_id].replace('\r\n', ' ')

	65

	66 csv_writer.writerow([

	67 'spam' if issue.is_spam else 'ham',

	68 fixed_summary, fixed_comment,

	69 ])

	70

	71 self.response.body = json.dumps({

	72 "exported_issue_count": total_issues,

	73 })

	74

OLD	NEW

« no previous file with comments | « appengine/monorail/features/savedqueries_helpers.py ('k') | appengine/monorail/features/stars.py » ('j') | no next file with comments »