上一版结合pychrome模块截图,实验发现各种问题,就有了这篇文章。

代码

pip install pyppeteer

import asyncio
from pyppeteer import launch
import time
import sys
import xmltodict
from urllib.parse import urlparse
# 从文本文档读取域名列表
def read_txt(txt_file):
    with open(txt_file,encoding='utf-8') as fp:
        urls = []
        lines = (line.strip() for line in fp)
        for url in lines:
            urls.append(url)
        return urls
#从nmap的xml文件读取ip与端口
def nmap_xml(xml_file):
    with open(xml_file,encoding='utf-8') as fp:
        xml_url = []
        xml_obj = xmltodict.parse(fp.read())
        host = xml_obj['nmaprun']['host']
        for entry in host:
            port = str(entry['ports']['port']['@portid'])
            if port:
                addr = entry['address']['@addr']
                url =  addr + ':' + port
            xml_url.append(url)
        return xml_url

async def HCS():
    num = 0
    start = time.time()
    browser = await launch(headless=True)
    page = await browser.newPage()
    if sys.argv[1] == '-txt' or sys.argv[1] == '-TXT':
        url_list = [ x for x in read_txt(sys.argv[2])]
    elif sys.argv[1] == '-xml' or sys.argv[1] == '-XML':
        url_list = [x for x in nmap_xml(sys.argv[2])]
    for url in url_list:
        if '://' not in url:
            if ':443' in url:
                url = 'https://' + url
            else:
                url = 'http://' + url
        print('[+]Getting a domain name:'+ url)
        try:
            await page.goto(url)
            await page.waitFor(1000)
            domain_title = await page.evaluate('''() => {
                                return {
                                    title:document.querySelector('title').innerText,
                            }
                            }''')

            url_title = (domain_title['title'][:4]).strip()
            url_domain = urlparse(url).netloc
            Screenshots = 'image/'+ url_domain + '_' + url_title + '.png'
            await page.screenshot({'path': Screenshots})
            num += 1
            print(num)
            print(url)
            print(url_title)
        except Exception as e:
            print (e)
    end = time.time()
    print ("共截图:"+str(num)+"张")
    print ("用时:"+ str(int(end-start))+"秒")
    await browser.close()
if __name__ == '__main__':
    info = '''
    info:
        Use python3 Headless_Chrome_Screenshot.py -txt  domain.txt        -->  The text containing the url  
        Use python3 Headless_Chrome_Screenshot.py -xml  domain_nmap.xml   -->  Xml format nmap scan file
        '''
    if len(sys.argv) != 3:
        print (info)
    else:
        asyncio.get_event_loop().run_until_complete(HCS())

结果

this is screenshot