上市櫃 券商買賣日報表查詢系統 自動化下載 使用問題
pythonist本帖子的原作者今天要執行時自動下載時 Sii 的部份,檔案無法下載,顯示
Traceback (most recent call last):
File "C:\DL\僅上市下載.py", line 100, in <module>
stuff(])
File "C:\DL\僅上市下載.py", line 79, in get_sii
pagenum = get_sii_pagenum(id)
File "C:\DL\僅上市下載.py", line 48, in get_sii_pagenum
pagenum = etree.HTML(html).xpath('//span[@id="sp_ListCount"]').text
IndexError: list index out of range
請問是證交所又改網址了嗎?該怎麼辦啊!!?求救...
請問前輩原本的這個要怎麼改
感謝再感謝
bsr.twse.com.tw/bshtm/bsMenu.aspx?HiddenField_page=PAGE_BS&HiddenField_spDate=&__EVENTARGUMENT=&__EVENTTARGET=&__EVENTVALIDATION=%2FwEWCALEh%2FLbCwLjpuXcAwKN4Ij0CwLB5ZfoCQLjk6TKBwKY8en5CwLdkpmPAQL6n7vzC85o%2BZOc3CbEnEINewpQOooAAAAA&__VIEWSTATE=%2FwEPDwUKMTczNDk4NzY0Mg9kFgICAQ9kFgwCBQ8WAh4JaW5uZXJodG1sBQoyMDEyLzA4LzAzZAIGDxYCHwAFCDIwMTIwODAzZAIIDw8WBh4JRm9udF9Cb2xkZx4EXyFTQgKEEB4JRm9yZUNvbG9yCj1kZAIKD2QWAgIBDw9kFgIeB09uQ2xpY2sFHGphdmFzY3JpcHQ6YnV0Q2xlYXJfQ2xpY2soKTtkAgwPDxYGHwFoHwIChBAfAwpHZGQCDg8PFgIeB1Zpc2libGVoZGRkrlNAWrt5h0rbFKKwjmcC%2FQAAAAA%3D&btnOK=%E6%9F%A5%E8%A9%A2&hidTASKNO=&txtTASKNO=2330 __EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=%2FwEPDwUKLTQzNzI3ODE3MQ9kFgICAQ9kFg4CBQ8WAh4JaW5uZXJodG1sBQoyMDE0LzA5LzA1ZAIGDxYCHwAFCDIwMTQwOTA1ZAIIDw8WBh4JRm9udF9Cb2xkZx4EXyFTQgKEEB4JRm9yZUNvbG9yCj1kZAIKD2QWAgIBDw9kFgIeB09uQ2xpY2sFHGphdmFzY3JpcHQ6YnV0Q2xlYXJfQ2xpY2soKTtkAgwPDxYGHwFoHwIChBAfAwpHZGQCDg8PFgIeB1Zpc2libGVoZGQCEA8PFgYfAWgfAgKEEB8DCkdkZGSi2zjocmrGJllnPH1VNPbh&__EVENTVALIDATION=%2FwEdAAkJ3iIwU1Mi8o7slM8DAmCjib%2BVrHjO6GeEEDcmd50Vv%2FHinYr2havdthGBI4bn%2FuVafk25GKI%2BxFnm8toEIF08OdRfizmino3LPjd4bEI%2Fa8n%2BshoO65Mgov1Gk1LiWrVFJyI109haYg0KuKnXCrDTo3DdQysje%2Ft8sfeZbMNL%2FQdIuRSdO0Evx9MQYYVhnqHnlqy1Mf5yy3u9S%2Fzb2lLx&HiddenField_spDate=&HiddenField_page=PAGE_BS&txtTASKNO=1101&hidTASKNO=&btnOK=%E6%9F%A5%E8%A9%A2 本帖最後由 randloop 於 14-9-6 00:51 編輯
ayeteng 發表於 14-9-5 23:37 static/image/common/back.gif
__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=%2FwEPDwUKLTQzNzI3ODE3MQ9kFgICAQ9kFg4CBQ8WAh4JaW5uZXJodG ...
推樓上
下列是自己參考原作者的教學所寫的C#版,有興趣的參考看看囉
2F.3F的程式碼在哪..
我看我原始的程式碼到後半段似乎就對不起來了...?
請問該怎麼改?
pagelink = 'http://bsr.twse.com.tw/bshtm/bsMenu.aspx?HiddenField_page=PAGE_BS&HiddenField_spDate=&__EVENTARGUMENT=&__EVENTTARGET=&__EVENTVALIDATION=%2FwEWCQK8367mBQLjpuXcAwKN4Ij0CwLB5ZfoCQLjk6TKBwKY8en5CwLdkpmPAQL6n7vzCwLAhrvLBT%2F7aqNSnUoGguZ%2FijmP1h0AAAAA&__VIEWSTATE=%2FwEPDwUKLTQzNzI3ODE3MQ9kFgICAQ9kFg4CBQ8WAh4JaW5uZXJodG1sBQoyMDEzLzAyLzAxZAIGDxYCHwAFCDIwMTMwMjAxZAIIDw8WBh4JRm9udF9Cb2xkZx4EXyFTQgKEEB4JRm9yZUNvbG9yCj1kZAIKD2QWAgIBDw9kFgIeB09uQ2xpY2sFHGphdmFzY3JpcHQ6YnV0Q2xlYXJfQ2xpY2soKTtkAgwPDxYGHwFoHwIChBAfAwpHZGQCDg8PFgIeB1Zpc2libGVoZGQCEA8PFgYfAWgfAgKEEB8DCkdkZGSkalbK4kjTU%2Bt1Zxv5QdZoAAAAAA%3D%3D&btnOK=%E6%9F%A5%E8%A9%A2&hidTASKNO=&txtTASKNO=' + id
randloop 發表於 14-9-6 00:48 static/image/common/back.gif
推樓上
下列是自己參考原作者的教學所寫的C#版,有興趣的參考看看囉
請教C#版開啟的軟體要使用什麼?
非常感謝3F ayeteng大大 請教已經將PAGELINK部分改成與3F大大相同..但還是出錯
快瘋了..可以請高人指點一下嗎...
from lxml import etree
import cStringIO
import codecs
import contextlib
import csv
import datetime
import glob
import itertools
import operator
import os
import os.path
import re
import urllib
import urllib2
import time
class UnicodeWriter:
def __init__(self, f, dialect=csv.excel, encoding="utf-8", **kwds):
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoder = codecs.getincrementalencoder(encoding)()
def writerow(self, row):
self.writer.writerow()
data = self.queue.getvalue()
data = data.decode("utf-8")
data = self.encoder.encode(data)
self.stream.write(data)
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
idfilter = re.compile(r'^\d{4}$')
def get_html(link):
with contextlib.closing(urllib2.urlopen(link)) as netfile:
html = netfile.read()
return html
def get_sii_pagenum(id):
pagelink = 'http://bsr.twse.com.tw/bshtm/bsMenu.aspx?HiddenField_page=PAGE_BS&HiddenField_spDate=&__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=%2FwEPDwUKLTQzNzI3ODE3MQ9kFgICAQ9kFg4CBQ8WAh4JaW5uZXJodG1sBQoyMDE0LzA5LzA1ZAIGDxYCHwAFCDIwMTQwOTA1ZAIIDw8WBh4JRm9udF9Cb2xkZx4EXyFTQgKEEB4JRm9yZUNvbG9yCj1kZAIKD2QWAgIBDw9kFgIeB09uQ2xpY2sFHGphdmFzY3JpcHQ6YnV0Q2xlYXJfQ2xpY2soKTtkAgwPDxYGHwFoHwIChBAfAwpHZGQCDg8PFgIeB1Zpc2libGVoZGQCEA8PFgYfAWgfAgKEEB8DCkdkZGSi2zjocmrGJllnPH1VNPbh&__EVENTVALIDATION=%2FwEdAAkJ3iIwU1Mi8o7slM8DAmCjib%2BVrHjO6GeEEDcmd50Vv%2FHinYr2havdthGBI4bn%2FuVafk25GKI%2BxFnm8toEIF08OdRfizmino3LPjd4bEI%2Fa8n%2BshoO65Mgov1Gk1LiWrVFJyI109haYg0KuKnXCrDTo3DdQysje%2Ft8sfeZbMNL%2FQdIuRSdO0Evx9MQYYVhnqHnlqy1Mf5yy3u9S%2Fzb2lLx&HiddenField_spDate=&HiddenField_page=PAGE_BS&txtTASKNO=1101&hidTASKNO=&btnOK=%E6%9F%A5%E8%A9%A2' + id
html = get_html(pagelink)
pagenum = etree.HTML(html).xpath('//span[@id="sp_ListCount"]').text
return pagenum
def save_sii_csv(csvfile, html):
result = []
parser = etree.HTMLParser(encoding='utf-8')
tree = etree.parse(cStringIO.StringIO(html), parser)
table1_first = tree.xpath('//table[@id="table1"]')
tds = table1_first.xpath('tr//td[@id or @class]')
tds_text =
result.append(tds_text)
result.append(tds_text)
column_title_1 = tree.xpath('//tr[@class="column_title_1"]')
column_title_1_text =
result.append(column_title_1_text)
column_value = tree.xpath('//tr')
column_value_text = [ for cv in column_value]
column_value_text = filter(all, column_value_text)
column_value_text.sort(key = lambda cvt: int(cvt))
result.extend(column_value_text)
with open(csvfile, 'wb') as outfile:
writer = UnicodeWriter(outfile, csv.excel, 'cp950')
writer.writerows(result)
def get_sii(idlist):
for id in idlist:
pagenum = get_sii_pagenum(id)
if pagenum:
link = 'http://bsr.twse.com.tw/bshtm/bsContent.aspx?StartNumber=' + id + '&FocusIndex=All_' + pagenum
html = get_html(link)
save_sii_csv(id + '.csv', html)
mkttypes = (('1', 'sii', get_sii),('2', 'sii', get_sii))
id_getfunc_list = []
for mktidx, mktname, getfunc in mkttypes:
with open(mktname + '.txt', 'rb') as infile:
content = infile.readlines()
idlist = for s in content]
idlist =
id_getfunc_list.append(list(itertools.izip_longest(idlist, , fillvalue=getfunc)))
for stuffs in itertools.izip_longest(*id_getfunc_list):
for stuff in stuffs:
if stuff is not None:
stuff(])
time.sleep(1)
for fn in glob.iglob('*.csv'):
if os.path.getsize(fn) < 2:
os.remove(fn)
tmjftony 發表於 14-9-8 22:09 static/image/common/back.gif
請教已經將PAGELINK部分改成與3F大大相同..但還是出錯
快瘋了..可以請高人指點一下嗎...
/bshtm/bsMenu.aspx?HiddenField_page=PAGE_BS&HiddenField_spDate=&__EVENTTARGET=&__EVENTARGUMENT=&__VIEWSTATE=%2FwEPDwUKLTQzNzI3ODE3MQ9kFgICAQ9kFg4CBQ8WAh4JaW5uZXJodG1sBQoyMDE0LzA5LzA1ZAIGDxYCHwAFCDIwMTQwOTA1ZAIIDw8WBh4JRm9udF9Cb2xkZx4EXyFTQgKEEB4JRm9yZUNvbG9yCj1kZAIKD2QWAgIBDw9kFgIeB09uQ2xpY2sFHGphdmFzY3JpcHQ6YnV0Q2xlYXJfQ2xpY2soKTtkAgwPDxYGHwFoHwIChBAfAwpHZGQCDg8PFgIeB1Zpc2libGVoZGQCEA8PFgYfAWgfAgKEEB8DCkdkZGSi2zjocmrGJllnPH1VNPbh&__EVENTVALIDATION=%2FwEdAAkJ3iIwU1Mi8o7slM8DAmCjib%2BVrHjO6GeEEDcmd50Vv%2FHinYr2havdthGBI4bn%2FuVafk25GKI%2BxFnm8toEIF08OdRfizmino3LPjd4bEI%2Fa8n%2BshoO65Mgov1Gk1LiWrVFJyI109haYg0KuKnXCrDTo3DdQysje%2Ft8sfeZbMNL%2FQdIuRSdO0Evx9MQYYVhnqHnlqy1Mf5yy3u9S%2Fzb2lLx&HiddenField_spDate=&HiddenField_page=PAGE_BS&txtTASKNO='+ id + '&hidTASKNO=&btnOK=%E6%9F%A5%E8%A9%A2'
waevan 發表於 14-9-10 18:56 static/image/common/back.gif
/bshtm/bsMenu.aspx?HiddenField_page=PAGE_BS&HiddenField_spDate=&__EVENTTARGET=&__EVENTARGUMENT=&__ ...
試了,不過還是不行
看起來不只是網址問題
原始網址尚未更改時,一天好像有機會可以自動下載一次,但中途會斷掉
斷掉會顯示
Traceback (most recent call last):
File "C:\DL\僅上市下載.py", line 100, in <module>
stuff(])
File "C:\DL\僅上市下載.py", line 79, in get_sii
pagenum = get_sii_pagenum(id)
File "C:\DL\僅上市下載.py", line 47, in get_sii_pagenum
html = get_html(pagelink)
File "C:\DL\僅上市下載.py", line 41, in get_html
with contextlib.closing(urllib2.urlopen(link)) as netfile:
File "C:\Python27\lib\urllib2.py", line 127, in urlopen
return _opener.open(url, data, timeout)
File "C:\Python27\lib\urllib2.py", line 404, in open
response = self._open(req, data)
File "C:\Python27\lib\urllib2.py", line 422, in _open
'_open', req)
File "C:\Python27\lib\urllib2.py", line 382, in _call_chain
result = func(*args)
File "C:\Python27\lib\urllib2.py", line 1214, in http_open
return self.do_open(httplib.HTTPConnection, req)
File "C:\Python27\lib\urllib2.py", line 1184, in do_open
raise URLError(err)
URLError: <urlopen error getaddrinfo failed>
真不知道到底哪個出錯了= =程式白癡的我
頁:
[1]