| OLD | NEW |
| (Empty) |
| 1 # Copyright (c) 2011, the Dart project authors. Please see the AUTHORS file | |
| 2 # for details. All rights reserved. Use of this source code is governed by a | |
| 3 # BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 #!/usr/bin/env python | |
| 6 # | |
| 7 import re, base64, logging, pickle, httplib2, time, urlparse, urllib2, urllib, S
tringIO, gzip, zipfile | |
| 8 | |
| 9 from google.appengine.ext import webapp, db | |
| 10 | |
| 11 from google.appengine.api import taskqueue, urlfetch, memcache, images, users | |
| 12 from google.appengine.ext.webapp.util import login_required | |
| 13 from google.appengine.ext.webapp import template | |
| 14 | |
| 15 from django.utils import simplejson as json | |
| 16 from django.utils.html import strip_tags | |
| 17 | |
| 18 from oauth2client.appengine import CredentialsProperty | |
| 19 from oauth2client.client import OAuth2WebServerFlow | |
| 20 | |
| 21 import encoder | |
| 22 | |
| 23 # TODO(jimhug): Allow client to request desired thumb size. | |
| 24 THUMB_SIZE = (57, 57) | |
| 25 READER_API = 'http://www.google.com/reader/api/0' | |
| 26 | |
| 27 MAX_SECTIONS = 5 | |
| 28 MAX_ARTICLES = 20 | |
| 29 | |
| 30 class UserData(db.Model): | |
| 31 credentials = CredentialsProperty() | |
| 32 sections = db.ListProperty(db.Key) | |
| 33 | |
| 34 def getEncodedData(self, articleKeys=None): | |
| 35 enc = encoder.Encoder() | |
| 36 # TODO(jimhug): Only return initially visible section in first reply. | |
| 37 maxSections = min(MAX_SECTIONS, len(self.sections)) | |
| 38 enc.writeInt(maxSections) | |
| 39 for section in db.get(self.sections[:maxSections]): | |
| 40 section.encode(enc, articleKeys) | |
| 41 return enc.getRaw() | |
| 42 | |
| 43 | |
| 44 class Section(db.Model): | |
| 45 title = db.TextProperty() | |
| 46 feeds = db.ListProperty(db.Key) | |
| 47 | |
| 48 def fixedTitle(self): | |
| 49 return self.title.split('_')[0] | |
| 50 | |
| 51 def encode(self, enc, articleKeys=None): | |
| 52 # TODO(jimhug): Need to optimize format and support incremental updates. | |
| 53 enc.writeString(self.key().name()) | |
| 54 enc.writeString(self.fixedTitle()) | |
| 55 enc.writeInt(len(self.feeds)) | |
| 56 for feed in db.get(self.feeds): | |
| 57 feed.ensureEncodedFeed() | |
| 58 enc.writeRaw(feed.encodedFeed3) | |
| 59 if articleKeys is not None: | |
| 60 articleKeys.extend(feed.topArticles) | |
| 61 | |
| 62 class Feed(db.Model): | |
| 63 title = db.TextProperty() | |
| 64 iconUrl = db.TextProperty() | |
| 65 lastUpdated = db.IntegerProperty() | |
| 66 | |
| 67 encodedFeed3 = db.TextProperty() | |
| 68 topArticles = db.ListProperty(db.Key) | |
| 69 | |
| 70 def ensureEncodedFeed(self, force=False): | |
| 71 if force or self.encodedFeed3 is None: | |
| 72 enc = encoder.Encoder() | |
| 73 articleSet = [] | |
| 74 self.encode(enc, MAX_ARTICLES, articleSet) | |
| 75 logging.info('articleSet length is %s' % len(articleSet)) | |
| 76 self.topArticles = articleSet | |
| 77 self.encodedFeed3 = enc.getRaw() | |
| 78 self.put() | |
| 79 | |
| 80 def encode(self, enc, maxArticles, articleSet): | |
| 81 enc.writeString(self.key().name()) | |
| 82 enc.writeString(self.title) | |
| 83 enc.writeString(self.iconUrl) | |
| 84 | |
| 85 logging.info('encoding feed: %s' % self.title) | |
| 86 encodedArts = [] | |
| 87 | |
| 88 for article in self.article_set.order('-date').fetch(limit=maxArticles): | |
| 89 encodedArts.append(article.encodeHeader()) | |
| 90 articleSet.append(article.key()) | |
| 91 | |
| 92 enc.writeInt(len(encodedArts)) | |
| 93 enc.writeRaw(''.join(encodedArts)) | |
| 94 | |
| 95 | |
| 96 class Article(db.Model): | |
| 97 feed = db.ReferenceProperty(Feed) | |
| 98 | |
| 99 title = db.TextProperty() | |
| 100 author = db.TextProperty() | |
| 101 content = db.TextProperty() | |
| 102 snippet = db.TextProperty() | |
| 103 thumbnail = db.BlobProperty() | |
| 104 thumbnailSize = db.TextProperty() | |
| 105 srcurl = db.TextProperty() | |
| 106 date = db.IntegerProperty() | |
| 107 | |
| 108 def ensureThumbnail(self): | |
| 109 # If our desired thumbnail size has changed, regenerate it and cache. | |
| 110 if self.thumbnailSize != str(THUMB_SIZE): | |
| 111 self.thumbnail = makeThumbnail(self.content) | |
| 112 self.thumbnailSize = str(THUMB_SIZE) | |
| 113 self.put() | |
| 114 | |
| 115 def encodeHeader(self): | |
| 116 # TODO(jmesserly): for now always unescape until the crawler catches up | |
| 117 enc = encoder.Encoder() | |
| 118 enc.writeString(self.key().name()) | |
| 119 enc.writeString(unescape(self.title)) | |
| 120 enc.writeString(self.srcurl) | |
| 121 enc.writeBool(self.thumbnail is not None) | |
| 122 enc.writeString(self.author) | |
| 123 enc.writeInt(self.date) | |
| 124 enc.writeString(unescape(self.snippet)) | |
| 125 return enc.getRaw() | |
| 126 | |
| 127 class HtmlFile(db.Model): | |
| 128 content = db.BlobProperty() | |
| 129 compressed = db.BooleanProperty() | |
| 130 filename = db.StringProperty() | |
| 131 author = db.UserProperty(auto_current_user=True) | |
| 132 date = db.DateTimeProperty(auto_now_add=True) | |
| 133 | |
| 134 | |
| 135 class UpdateHtml(webapp.RequestHandler): | |
| 136 def post(self): | |
| 137 upload_files = self.request.POST.multi.__dict__['_items'] | |
| 138 version = self.request.get('version') | |
| 139 logging.info('files: %r' % upload_files) | |
| 140 for data in upload_files: | |
| 141 if data[0] != 'files': continue | |
| 142 file = data[1] | |
| 143 filename = file.filename | |
| 144 if version: | |
| 145 filename = '%s-%s' % (version, filename) | |
| 146 logging.info('upload: %r' % filename) | |
| 147 | |
| 148 htmlFile = HtmlFile.get_or_insert(filename) | |
| 149 htmlFile.filename = filename | |
| 150 | |
| 151 # If text > (1MB - 1KB) then gzip text to fit in 1MB space | |
| 152 text = file.value | |
| 153 if len(text) > 1024*1023: | |
| 154 data = StringIO.StringIO() | |
| 155 gz = gzip.GzipFile(str(filename), 'wb', fileobj=data) | |
| 156 gz.write(text) | |
| 157 gz.close() | |
| 158 htmlFile.content = data.getvalue() | |
| 159 htmlFile.compressed = True | |
| 160 else: | |
| 161 htmlFile.content = text | |
| 162 htmlFile.compressed = False | |
| 163 | |
| 164 htmlFile.put() | |
| 165 | |
| 166 self.redirect('/') | |
| 167 | |
| 168 class TopHandler(webapp.RequestHandler): | |
| 169 @login_required | |
| 170 def get(self): | |
| 171 user = users.get_current_user() | |
| 172 prefs = UserData.get_by_key_name(user.user_id()) | |
| 173 if prefs is None: | |
| 174 self.redirect('/update/user') | |
| 175 return | |
| 176 | |
| 177 params = {'files': HtmlFile.all().order('-date').fetch(limit=30)} | |
| 178 self.response.out.write(template.render('top.html', params)) | |
| 179 | |
| 180 | |
| 181 class MainHandler(webapp.RequestHandler): | |
| 182 | |
| 183 @login_required | |
| 184 def get(self, name): | |
| 185 if name == 'dev': | |
| 186 return self.handleDev() | |
| 187 | |
| 188 elif name == 'login': | |
| 189 return self.handleLogin() | |
| 190 | |
| 191 elif name == 'upload': | |
| 192 return self.handleUpload() | |
| 193 | |
| 194 user = users.get_current_user() | |
| 195 prefs = UserData.get_by_key_name(user.user_id()) | |
| 196 if prefs is None: | |
| 197 return self.handleLogin() | |
| 198 | |
| 199 html = HtmlFile.get_by_key_name(name) | |
| 200 if html is None: | |
| 201 self.error(404) | |
| 202 return | |
| 203 | |
| 204 self.response.headers['Content-Type'] = 'text/html' | |
| 205 | |
| 206 if html.compressed: | |
| 207 # TODO(jimhug): This slightly sucks ;-) | |
| 208 # Can we write directly to the response.out? | |
| 209 gz = gzip.GzipFile(name, 'rb', fileobj=StringIO.StringIO(html.content)) | |
| 210 self.response.out.write(gz.read()) | |
| 211 gz.close() | |
| 212 else: | |
| 213 self.response.out.write(html.content) | |
| 214 | |
| 215 # TODO(jimhug): Include first data packet with html. | |
| 216 | |
| 217 def handleLogin(self): | |
| 218 user = users.get_current_user() | |
| 219 # TODO(jimhug): Manage secrets for dart.googleplex.com better. | |
| 220 # TODO(jimhug): Confirm that we need client_secret. | |
| 221 flow = OAuth2WebServerFlow( | |
| 222 client_id='267793340506.apps.googleusercontent.com', | |
| 223 client_secret='5m8H-zyamfTYg5vnpYu1uGMU', | |
| 224 scope=READER_API, | |
| 225 user_agent='swarm') | |
| 226 | |
| 227 callback = self.request.relative_url('/oauth2callback') | |
| 228 authorize_url = flow.step1_get_authorize_url(callback) | |
| 229 | |
| 230 memcache.set(user.user_id(), pickle.dumps(flow)) | |
| 231 | |
| 232 content = template.render('login.html', {'authorize': authorize_url}) | |
| 233 self.response.out.write(content) | |
| 234 | |
| 235 def handleDev(self): | |
| 236 user = users.get_current_user() | |
| 237 content = template.render('dev.html', {'user': user}) | |
| 238 self.response.out.write(content) | |
| 239 | |
| 240 def handleUpload(self): | |
| 241 user = users.get_current_user() | |
| 242 content = template.render('upload.html', {'user': user}) | |
| 243 self.response.out.write(content) | |
| 244 | |
| 245 | |
| 246 class UploadFeed(webapp.RequestHandler): | |
| 247 def post(self): | |
| 248 upload_files = self.request.POST.multi.__dict__['_items'] | |
| 249 version = self.request.get('version') | |
| 250 logging.info('files: %r' % upload_files) | |
| 251 for data in upload_files: | |
| 252 if data[0] != 'files': continue | |
| 253 file = data[1] | |
| 254 logging.info('upload feed: %r' % file.filename) | |
| 255 | |
| 256 data = json.loads(file.value) | |
| 257 | |
| 258 feedId = file.filename | |
| 259 feed = Feed.get_or_insert(feedId) | |
| 260 | |
| 261 # Find the section to add it to. | |
| 262 sectionTitle = data['section'] | |
| 263 section = findSectionByTitle(sectionTitle) | |
| 264 if section != None: | |
| 265 if feed.key() in section.feeds: | |
| 266 logging.warn('Already contains feed %s, replacing' % feedId) | |
| 267 section.feeds.remove(feed.key()) | |
| 268 | |
| 269 # Add the feed to the section. | |
| 270 section.feeds.insert(0, feed.key()) | |
| 271 section.put() | |
| 272 | |
| 273 # Add the articles. | |
| 274 collectFeed(feed, data) | |
| 275 | |
| 276 else: | |
| 277 logging.error('Could not find section %s to add the feed to' % | |
| 278 sectionTitle) | |
| 279 | |
| 280 self.redirect('/') | |
| 281 | |
| 282 # TODO(jimhug): Batch these up and request them more agressively. | |
| 283 class DataHandler(webapp.RequestHandler): | |
| 284 def get(self, name): | |
| 285 if name.endswith('.jpg'): | |
| 286 # Must be a thumbnail | |
| 287 key = urllib2.unquote(name[:-len('.jpg')]) | |
| 288 article = Article.get_by_key_name(key) | |
| 289 self.response.headers['Content-Type'] = 'image/jpeg' | |
| 290 # cache images for 10 hours | |
| 291 self.response.headers['Cache-Control'] = 'public,max-age=36000' | |
| 292 article.ensureThumbnail() | |
| 293 self.response.out.write(article.thumbnail) | |
| 294 elif name.endswith('.html'): | |
| 295 # Must be article content | |
| 296 key = urllib2.unquote(name[:-len('.html')]) | |
| 297 article = Article.get_by_key_name(key) | |
| 298 self.response.headers['Content-Type'] = 'text/html' | |
| 299 if article is None: | |
| 300 content = '<h2>Missing article</h2>' | |
| 301 else: | |
| 302 content = article.content | |
| 303 # cache article content for 10 hours | |
| 304 self.response.headers['Cache-Control'] = 'public,max-age=36000' | |
| 305 self.response.out.write(content) | |
| 306 elif name == 'user.data': | |
| 307 self.response.out.write(self.getUserData()) | |
| 308 elif name == 'CannedData.dart': | |
| 309 self.canData() | |
| 310 elif name == 'CannedData.zip': | |
| 311 self.canDataZip() | |
| 312 else: | |
| 313 self.error(404) | |
| 314 | |
| 315 def getUserData(self, articleKeys=None): | |
| 316 user = users.get_current_user() | |
| 317 user_id = user.user_id() | |
| 318 | |
| 319 key = 'data_' + user_id | |
| 320 # need to flush memcache fairly frequently... | |
| 321 data = memcache.get(key) | |
| 322 if data is None: | |
| 323 prefs = UserData.get_or_insert(user_id) | |
| 324 if prefs is None: | |
| 325 # TODO(jimhug): Graceful failure for unknown users. | |
| 326 pass | |
| 327 data = prefs.getEncodedData(articleKeys) | |
| 328 # TODO(jimhug): memcache.set(key, data) | |
| 329 | |
| 330 return data | |
| 331 | |
| 332 def canData(self): | |
| 333 def makeDartSafe(data): | |
| 334 return repr(unicode(data))[1:].replace('$', '\\$') | |
| 335 | |
| 336 lines = ['// TODO(jimhug): Work out correct copyright for this file.', | |
| 337 'class CannedData {'] | |
| 338 | |
| 339 user = users.get_current_user() | |
| 340 prefs = UserData.get_by_key_name(user.user_id()) | |
| 341 articleKeys = [] | |
| 342 data = prefs.getEncodedData(articleKeys) | |
| 343 lines.append(' static final Map<String,String> data = const {') | |
| 344 for article in db.get(articleKeys): | |
| 345 key = makeDartSafe(urllib.quote(article.key().name())+'.html') | |
| 346 lines.append(' %s:%s, ' % (key, makeDartSafe(article.content))) | |
| 347 | |
| 348 lines.append(' "user.data":%s' % makeDartSafe(data)) | |
| 349 | |
| 350 lines.append(' };') | |
| 351 | |
| 352 lines.append('}') | |
| 353 self.response.headers['Content-Type'] = 'application/dart' | |
| 354 self.response.out.write('\n'.join(lines)) | |
| 355 | |
| 356 # Get canned static data | |
| 357 def canDataZip(self): | |
| 358 # We need to zip into an in-memory buffer to get the right string encoding | |
| 359 # behavior. | |
| 360 data = StringIO.StringIO() | |
| 361 result = zipfile.ZipFile(data, 'w') | |
| 362 | |
| 363 articleKeys = [] | |
| 364 result.writestr('data/user.data', | |
| 365 self.getUserData(articleKeys).encode('utf-8')) | |
| 366 logging.info(' adding articles %s' % len(articleKeys)) | |
| 367 images = [] | |
| 368 for article in db.get(articleKeys): | |
| 369 article.ensureThumbnail() | |
| 370 path = 'data/' + article.key().name() + '.html' | |
| 371 result.writestr(path.encode('utf-8'), article.content.encode('utf-8')) | |
| 372 if article.thumbnail: | |
| 373 path = 'data/' + article.key().name() + '.jpg' | |
| 374 result.writestr(path.encode('utf-8'), article.thumbnail) | |
| 375 | |
| 376 result.close() | |
| 377 logging.info('writing CannedData.zip') | |
| 378 self.response.headers['Content-Type'] = 'multipart/x-zip' | |
| 379 disposition = 'attachment; filename=CannedData.zip' | |
| 380 self.response.headers['Content-Disposition'] = disposition | |
| 381 self.response.out.write(data.getvalue()) | |
| 382 data.close() | |
| 383 | |
| 384 | |
| 385 class SetDefaultFeeds(webapp.RequestHandler): | |
| 386 @login_required | |
| 387 def get(self): | |
| 388 user = users.get_current_user() | |
| 389 prefs = UserData.get_or_insert(user.user_id()) | |
| 390 | |
| 391 prefs.sections = [ | |
| 392 db.Key.from_path('Section', 'user/17857667084667353155/label/Top'), | |
| 393 db.Key.from_path('Section', 'user/17857667084667353155/label/Design'), | |
| 394 db.Key.from_path('Section', 'user/17857667084667353155/label/Eco'), | |
| 395 db.Key.from_path('Section', 'user/17857667084667353155/label/Geek'), | |
| 396 db.Key.from_path('Section', 'user/17857667084667353155/label/Google'), | |
| 397 db.Key.from_path('Section', 'user/17857667084667353155/label/Seattle'), | |
| 398 db.Key.from_path('Section', 'user/17857667084667353155/label/Tech'), | |
| 399 db.Key.from_path('Section', 'user/17857667084667353155/label/Web')] | |
| 400 | |
| 401 prefs.put() | |
| 402 | |
| 403 self.redirect('/') | |
| 404 | |
| 405 class SetTestFeeds(webapp.RequestHandler): | |
| 406 @login_required | |
| 407 def get(self): | |
| 408 user = users.get_current_user() | |
| 409 prefs = UserData.get_or_insert(user.user_id()) | |
| 410 | |
| 411 sections = [] | |
| 412 for i in range(3): | |
| 413 s1 = Section.get_or_insert('Test%d' % i) | |
| 414 s1.title = 'Section %d' % (i+1) | |
| 415 | |
| 416 feeds = [] | |
| 417 for j in range(4): | |
| 418 label = '%d_%d' % (i, j) | |
| 419 f1 = Feed.get_or_insert('Test%s' % label) | |
| 420 f1.title = 'Feed %s' % label | |
| 421 f1.iconUrl = getFeedIcon('http://google.com') | |
| 422 f1.lastUpdated = 0 | |
| 423 f1.put() | |
| 424 feeds.append(f1.key()) | |
| 425 | |
| 426 for k in range(8): | |
| 427 label = '%d_%d_%d' % (i, j, k) | |
| 428 a1 = Article.get_or_insert('Test%s' % label) | |
| 429 if a1.title is None: | |
| 430 a1.feed = f1 | |
| 431 a1.title = 'Article %s' % label | |
| 432 a1.author = 'anon' | |
| 433 a1.content = 'Lorem ipsum something or other...' | |
| 434 a1.snippet = 'Lorem ipsum something or other...' | |
| 435 a1.thumbnail = None | |
| 436 a1.srcurl = '' | |
| 437 a1.date = 0 | |
| 438 | |
| 439 s1.feeds = feeds | |
| 440 s1.put() | |
| 441 sections.append(s1.key()) | |
| 442 | |
| 443 prefs.sections = sections | |
| 444 prefs.put() | |
| 445 | |
| 446 self.redirect('/') | |
| 447 | |
| 448 | |
| 449 class UserLoginHandler(webapp.RequestHandler): | |
| 450 @login_required | |
| 451 def get(self): | |
| 452 user = users.get_current_user() | |
| 453 prefs = UserData.get_or_insert(user.user_id()) | |
| 454 if prefs.credentials: | |
| 455 http = prefs.credentials.authorize(httplib2.Http()) | |
| 456 | |
| 457 response, content = http.request('%s/subscription/list?output=json' % | |
| 458 READER_API) | |
| 459 self.collectFeeds(prefs, content) | |
| 460 self.redirect('/') | |
| 461 else: | |
| 462 self.redirect('/login') | |
| 463 | |
| 464 | |
| 465 def collectFeeds(self, prefs, content): | |
| 466 data = json.loads(content) | |
| 467 | |
| 468 queue_name = self.request.get('queue_name', 'priority-queue') | |
| 469 sections = {} | |
| 470 for feedData in data['subscriptions']: | |
| 471 feed = Feed.get_or_insert(feedData['id']) | |
| 472 feed.put() | |
| 473 category = feedData['categories'][0] | |
| 474 categoryId = category['id'] | |
| 475 if not sections.has_key(categoryId): | |
| 476 sections[categoryId] = (category['label'], []) | |
| 477 | |
| 478 # TODO(jimhug): Use Reader preferences to sort feeds in a section. | |
| 479 sections[categoryId][1].append(feed.key()) | |
| 480 | |
| 481 # Kick off a high priority feed update | |
| 482 taskqueue.add(url='/update/feed', queue_name=queue_name, | |
| 483 params={'id': feed.key().name()}) | |
| 484 | |
| 485 sectionKeys = [] | |
| 486 for name, (title, feeds) in sections.items(): | |
| 487 section = Section.get_or_insert(name) | |
| 488 section.feeds = feeds | |
| 489 section.title = title | |
| 490 section.put() | |
| 491 # Forces Top to be the first section | |
| 492 if title == 'Top': title = '0Top' | |
| 493 sectionKeys.append( (title, section.key()) ) | |
| 494 | |
| 495 # TODO(jimhug): Use Reader preferences API to get users true sort order. | |
| 496 prefs.sections = [key for t, key in sorted(sectionKeys)] | |
| 497 prefs.put() | |
| 498 | |
| 499 | |
| 500 class AllFeedsCollector(webapp.RequestHandler): | |
| 501 '''Ensures that a given feed object is locally up to date.''' | |
| 502 def post(self): return self.get() | |
| 503 | |
| 504 def get(self): | |
| 505 queue_name = self.request.get('queue_name', 'background') | |
| 506 for feed in Feed.all(): | |
| 507 taskqueue.add(url='/update/feed', queue_name=queue_name, | |
| 508 params={'id': feed.key().name()}) | |
| 509 | |
| 510 UPDATE_COUNT = 4 # The number of articles to request on periodic updates. | |
| 511 INITIAL_COUNT = 40 # The number of articles to get first for a new queue. | |
| 512 SNIPPET_SIZE = 180 # The length of plain-text snippet to extract. | |
| 513 class FeedCollector(webapp.RequestHandler): | |
| 514 def post(self): return self.get() | |
| 515 | |
| 516 def get(self): | |
| 517 feedId = self.request.get('id') | |
| 518 feed = Feed.get_or_insert(feedId) | |
| 519 | |
| 520 if feed.lastUpdated is None: | |
| 521 self.fetchn(feed, feedId, INITIAL_COUNT) | |
| 522 else: | |
| 523 self.fetchn(feed, feedId, UPDATE_COUNT) | |
| 524 | |
| 525 self.response.headers['Content-Type'] = "text/plain" | |
| 526 | |
| 527 def fetchn(self, feed, feedId, n, continuation=None): | |
| 528 # basic pattern is to read by ARTICLE_COUNT until we hit existing. | |
| 529 if continuation is None: | |
| 530 apiUrl = '%s/stream/contents/%s?n=%d' % ( | |
| 531 READER_API, feedId, n) | |
| 532 else: | |
| 533 apiUrl = '%s/stream/contents/%s?n=%d&c=%s' % ( | |
| 534 READER_API, feedId, n, continuation) | |
| 535 | |
| 536 logging.info('fetching: %s' % apiUrl) | |
| 537 result = urlfetch.fetch(apiUrl) | |
| 538 | |
| 539 if result.status_code == 200: | |
| 540 data = json.loads(result.content) | |
| 541 collectFeed(feed, data, continuation) | |
| 542 elif result.status_code == 401: | |
| 543 self.response.out.write( '<pre>%s</pre>' % result.content) | |
| 544 else: | |
| 545 self.response.out.write(result.status_code) | |
| 546 | |
| 547 def findSectionByTitle(title): | |
| 548 for section in Section.all(): | |
| 549 if section.fixedTitle() == title: | |
| 550 return section | |
| 551 return None | |
| 552 | |
| 553 def collectFeed(feed, data, continuation=None): | |
| 554 ''' | |
| 555 Reads a feed from the given JSON object and populates the given feed object | |
| 556 in the datastore with its data. | |
| 557 ''' | |
| 558 if continuation is None: | |
| 559 if 'alternate' in data: | |
| 560 feed.iconUrl = getFeedIcon(data['alternate'][0]['href']) | |
| 561 feed.title = data['title'] | |
| 562 feed.lastUpdated = data['updated'] | |
| 563 | |
| 564 articles = data['items'] | |
| 565 logging.info('%d new articles for %s' % (len(articles), feed.title)) | |
| 566 | |
| 567 for articleData in articles: | |
| 568 if not collectArticle(feed, articleData): | |
| 569 feed.put() | |
| 570 return False | |
| 571 | |
| 572 if len(articles) > 0 and data.has_key('continuation'): | |
| 573 logging.info('would have looked for more articles') | |
| 574 # TODO(jimhug): Enable this continuation check when more robust | |
| 575 #self.fetchn(feed, feedId, data['continuation']) | |
| 576 | |
| 577 feed.ensureEncodedFeed(force=True) | |
| 578 feed.put() | |
| 579 return True | |
| 580 | |
| 581 def collectArticle(feed, data): | |
| 582 ''' | |
| 583 Reads an article from the given JSON object and populates the datastore with | |
| 584 it. | |
| 585 ''' | |
| 586 if not 'title' in data: | |
| 587 # Skip this articles without titles | |
| 588 return True | |
| 589 | |
| 590 articleId = data['id'] | |
| 591 article = Article.get_or_insert(articleId) | |
| 592 # TODO(jimhug): This aborts too early - at lease for one adafruit case. | |
| 593 if article.date == data['published']: | |
| 594 logging.info('found existing, aborting: %r, %r' % | |
| 595 (articleId, article.date)) | |
| 596 return False | |
| 597 | |
| 598 if data.has_key('content'): | |
| 599 content = data['content']['content'] | |
| 600 elif data.has_key('summary'): | |
| 601 content = data['summary']['content'] | |
| 602 else: | |
| 603 content = '' | |
| 604 #TODO(jimhug): better summary? | |
| 605 article.content = content | |
| 606 article.date = data['published'] | |
| 607 article.title = unescape(data['title']) | |
| 608 article.snippet = unescape(strip_tags(content)[:SNIPPET_SIZE]) | |
| 609 | |
| 610 article.feed = feed | |
| 611 | |
| 612 # TODO(jimhug): make this canonical so UX can change for this state | |
| 613 article.author = data.get('author', 'anonymous') | |
| 614 | |
| 615 article.ensureThumbnail() | |
| 616 | |
| 617 article.srcurl = '' | |
| 618 if data.has_key('alternate'): | |
| 619 for alt in data['alternate']: | |
| 620 if alt.has_key('href'): | |
| 621 article.srcurl = alt['href'] | |
| 622 return True | |
| 623 | |
| 624 def unescape(html): | |
| 625 "Inverse of Django's utils.html.escape function" | |
| 626 if not isinstance(html, basestring): | |
| 627 html = str(html) | |
| 628 html = html.replace(''', "'").replace('"', '"') | |
| 629 return html.replace('>', '>').replace('<', '<').replace('&', '&') | |
| 630 | |
| 631 def getFeedIcon(url): | |
| 632 url = urlparse.urlparse(url).netloc | |
| 633 return 'http://s2.googleusercontent.com/s2/favicons?domain=%s&alt=feed' % url | |
| 634 | |
| 635 def findImage(text): | |
| 636 img = findImgTag(text, 'jpg|jpeg|png') | |
| 637 if img is not None: | |
| 638 return img | |
| 639 | |
| 640 img = findVideoTag(text) | |
| 641 if img is not None: | |
| 642 return img | |
| 643 | |
| 644 img = findImgTag(text, 'gif') | |
| 645 return img | |
| 646 | |
| 647 def findImgTag(text, extensions): | |
| 648 m = re.search(r'src="(http://\S+\.(%s))(\?.*)?"' % extensions, text) | |
| 649 if m is None: | |
| 650 return None | |
| 651 return m.group(1) | |
| 652 | |
| 653 def findVideoTag(text): | |
| 654 # TODO(jimhug): Add other videos beyond youtube. | |
| 655 m = re.search(r'src="http://www.youtube.com/(\S+)/(\S+)[/|"]', text) | |
| 656 if m is None: | |
| 657 return None | |
| 658 | |
| 659 return 'http://img.youtube.com/vi/%s/0.jpg' % m.group(2) | |
| 660 | |
| 661 def makeThumbnail(text): | |
| 662 url = None | |
| 663 try: | |
| 664 url = findImage(text) | |
| 665 if url is None: | |
| 666 return None | |
| 667 return generateThumbnail(url) | |
| 668 except: | |
| 669 logging.info('error decoding: %s' % (url or text)) | |
| 670 return None | |
| 671 | |
| 672 def generateThumbnail(url): | |
| 673 logging.info('generating thumbnail: %s' % url) | |
| 674 thumbWidth, thumbHeight = THUMB_SIZE | |
| 675 | |
| 676 result = urlfetch.fetch(url) | |
| 677 img = images.Image(result.content) | |
| 678 | |
| 679 w, h = img.width, img.height | |
| 680 | |
| 681 aspect = float(w) / h | |
| 682 thumbAspect = float(thumbWidth) / thumbHeight | |
| 683 | |
| 684 if aspect > thumbAspect: | |
| 685 # Too wide, so crop on the sides. | |
| 686 normalizedCrop = (w - h * thumbAspect) / (2.0 * w) | |
| 687 img.crop(normalizedCrop, 0., 1. - normalizedCrop, 1. ) | |
| 688 elif aspect < thumbAspect: | |
| 689 # Too tall, so crop out the bottom. | |
| 690 normalizedCrop = (h - w / thumbAspect) / h | |
| 691 img.crop(0., 0., 1., 1. - normalizedCrop) | |
| 692 | |
| 693 img.resize(thumbWidth, thumbHeight) | |
| 694 | |
| 695 # Chose JPEG encoding because informal experiments showed it generated | |
| 696 # the best size to quality ratio for thumbnail images. | |
| 697 nimg = img.execute_transforms(output_encoding=images.JPEG) | |
| 698 logging.info(' finished thumbnail: %s' % url) | |
| 699 | |
| 700 return nimg | |
| 701 | |
| 702 class OAuthHandler(webapp.RequestHandler): | |
| 703 | |
| 704 @login_required | |
| 705 def get(self): | |
| 706 user = users.get_current_user() | |
| 707 flow = pickle.loads(memcache.get(user.user_id())) | |
| 708 if flow: | |
| 709 prefs = UserData.get_or_insert(user.user_id()) | |
| 710 prefs.credentials = flow.step2_exchange(self.request.params) | |
| 711 prefs.put() | |
| 712 self.redirect('/update/user') | |
| 713 else: | |
| 714 pass | |
| 715 | |
| 716 | |
| 717 def main(): | |
| 718 application = webapp.WSGIApplication( | |
| 719 [ | |
| 720 ('/data/(.*)', DataHandler), | |
| 721 | |
| 722 # This is called periodically from cron.yaml. | |
| 723 ('/update/allFeeds', AllFeedsCollector), | |
| 724 ('/update/feed', FeedCollector), | |
| 725 ('/update/user', UserLoginHandler), | |
| 726 ('/update/defaultFeeds', SetDefaultFeeds), | |
| 727 ('/update/testFeeds', SetTestFeeds), | |
| 728 ('/update/html', UpdateHtml), | |
| 729 ('/update/upload', UploadFeed), | |
| 730 ('/oauth2callback', OAuthHandler), | |
| 731 | |
| 732 ('/', TopHandler), | |
| 733 ('/(.*)', MainHandler), | |
| 734 ], | |
| 735 debug=True) | |
| 736 webapp.util.run_wsgi_app(application) | |
| 737 | |
| 738 if __name__ == '__main__': | |
| 739 main() | |
| OLD | NEW |