summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBen Sima <ben@bsima.me>2019-05-19 14:16:19 -0700
committerBen Sima <ben@bsima.me>2019-05-19 14:16:19 -0700
commit95a3cb30f61e90fe00cd859f10e323bbc3e62e99 (patch)
tree6d44b9b5eb04f9279acd31bbaffc95d9d524ae21
parent517bd3c33f7b34173348141b7128b54e2776a30f (diff)
cleanup textract and rdr
-rwxr-xr-xrdr5
-rwxr-xr-xtextract19
2 files changed, 12 insertions, 12 deletions
diff --git a/rdr b/rdr
index a258698..184d7d4 100755
--- a/rdr
+++ b/rdr
@@ -2,5 +2,6 @@
#! nix-shell -i bash -p pandoc
textract "$1" \
- | pandoc -f html -t markdown \
- | less
+ | pandoc -f html -t markdown --strip-comments --reference-links \
+ | sed 's/^:::.*$//g' \
+ | sed 's/{.css.*}//g'
diff --git a/textract b/textract
index f80be36..707cd27 100755
--- a/textract
+++ b/textract
@@ -9,13 +9,12 @@ import sys
cli = argparse.ArgumentParser('read a url')
cli.add_argument('url', type=str)
-if __name__ == '__main__':
- args = cli.parse_args()
- try:
- resp = requests.get(args.url)
- doc = Document(resp.text)
- sys.stdout.write(doc.summary())
- sys.exit(0)
- except:
- print("Could not fetch document.")
- sys.exit(1)
+args = cli.parse_args()
+
+try:
+ resp = requests.get(args.url)
+ doc = Document(resp.text)
+ sys.stdout.write(doc.summary())
+except:
+ print("textract: Could not fetch document.")
+ sys.exit(1)