python web scraping (finviz pull)

Python code below,

#!/usr/bin/env python
from lxml import html
import requests
import csv
import sys
import os
import humanize

# Helper URL
# http://python-docs.readthedocs.io/en/latest/scenarios/scrape.html

curr_arg = 1
for arg in sys.argv:
    print "Argument " + str(curr_arg) + ": " + arg
    curr_arg += 1


if len(sys.argv) < 2:
    print "\nInsufficient arguments! (expected 1)\nUsage: " + os.path.basename(__file__) + " <outputfile.csv>\n"
    exit(1)

output_filename = sys.argv[1]
input_url = 'http://finviz.com/insidertrading.ashx?tc=7'

print "\nFetching contents from " + input_url
buy_tx_page = requests.get(input_url)
print "\nDownloaded " + humanize.naturalsize(len(buy_tx_page.content), gnu=True) + " bytes."
tree = html.fromstring(buy_tx_page.content)

ticker = tree.xpath('//table[@class="body-table"]/tr/td[1]/a/text()')
owner = tree.xpath('//table[@class="body-table"]/tr/td[2]/a/text()')
relationship = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[3]/text()')
on_date = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[4]/text()')
transaction_type = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[5]/text()')
cost = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[6]/text()')
num_shares = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[7]/text()')
transaction_value = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[8]/text()')
shares_total = tree.xpath('//table[@class="body-table"]/tr[position()>1]/td[9]/text()')
sec_form_4 = tree.xpath('//table[@class="body-table"]/tr/td[10]/a/text()')


output = open(output_filename, 'wt')
loop_len = len(ticker)
i = 0
try:
    writer = csv.writer(output)
    writer.writerow(('Ticker', 'Owner', 'Relationship', 'Date', 'Transaction', 'Cost', '#Shares', 'Value ($)',
                     '#Shares Total', 'SEC Form 4'))
    while i < loop_len:
        writer.writerow((ticker[i], ' '.join(owner[i].split()), relationship[i], on_date[i], transaction_type[i],
                         cost[i].strip(), num_shares[i], transaction_value[i], shares_total[i], sec_form_4[i]))
        i += 1
finally:
    output.close()

print "\nWrote " + str(i) + " records in " + output_filename

 

You may also like...

Leave a Reply

Your email address will not be published. Required fields are marked *