From dda68b4bebb5700de6a7dd3c22fbaf8a0de1d211 Mon Sep 17 00:00:00 2001
From: Ben Sima <ben@bsima.me>
Date: Fri, 6 Mar 2020 08:54:27 -0800
Subject: refactor/formatting

---
 scrape | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

(limited to 'scrape')

diff --git a/scrape b/scrape
index 7da2125..fde6b2b 100755
--- a/scrape
+++ b/scrape
@@ -1,10 +1,15 @@
 #!/usr/bin/env python3
 
+"""
+Scrape a webpage for emails or links, or both.
+"""
+
 import argparse
 import re
 import sys
 import urllib.request
 
+
 def get(url):
     page = urllib.request.urlopen(url)
     byts = page.read()
@@ -12,19 +17,20 @@ def get(url):
     page.close()
     return s
 
+
 def extract_links(content):
-    links = re.findall('"((http|ftp)s?://.*?)"', content)
-    return links
+    return re.findall('"((http|ftp)s?://.*?)"', content)
+
 
 def extract_emails(content):
-    emails = re.findall('([\w\.,]+@[\w\.,]+\.\w+)', content)
-    return emails
-
-if __name__ == '__main__':
-    cli = argparse.ArgumentParser(description='Scrape a web page for stuff.')
-    cli.add_argument('url', metavar='URL', type=str, help='url to fetch')
-    cli.add_argument('-e', '--emails', action="store_true", help='get emails')
-    cli.add_argument('-l', '--links', action="store_true", help='get links')
+    return re.findall("([\w\.,]+@[\w\.,]+\.\w+)", content)
+
+
+if __name__ == "__main__":
+    cli = argparse.ArgumentParser(description=__doc__)
+    cli.add_argument("url", metavar="URL", type=str, help="url to fetch")
+    cli.add_argument("-e", "--emails", action="store_true", help="get emails")
+    cli.add_argument("-l", "--links", action="store_true", help="get links")
     args = cli.parse_args()
 
     if not (args.emails or args.links):
-- 
cgit v1.2.3