How to download a file from webpage using Htmlunit having javascript in anchor tag -
i trying click on link download file :
the html code line trying download is:
<a id="a_file" title="download zip data file" href="javascript:return true;" target="nulldisplay">histdata_com_mt_eurusd_m1_2013.zip</a>
and java code is:
webclient webclient = new webclient(browserversion.firefox_38); webclient.getoptions().setjavascriptenabled(true); webclient.setajaxcontroller(new nicelyresynchronizingajaxcontroller()); htmlpage htmlpage=webclient.getpage("http://www.histdata.com/download-free-forex-historical-data/?/metatrader/1-minute-bar-quotes/eurusd/2016/7"); list<htmlanchor> anchors=htmlpage.getanchors(); htmlanchor anchor = null; (int = 0; < anchors.size(); ++i) { anchor = anchors.get(i); string sanchor = anchor.astext(); if (sanchor.equals("histdata_com_mt_eurusd_m1_201607.zip")) break; } page p = anchor.click(); webclient.waitforbackgroundjavascript(60000); inputstream = p.getwebresponse().getcontentasstream(); int b = 0; while ((b = is.read()) != -1) { system.out.print((char)b); }
the error message is:
jul 12, 2016 1:29:57 pm com.gargoylesoftware.htmlunit.javascript.stricterrorreporter error severe: error: message=[invalid return] sourcename=[javascript url] line=[88] linesource=[return true;] lineoffset=[7] exception in thread "main" ======= exception start ======== exception class=[net.sourceforge.htmlunit.corejs.javascript.evaluatorexception] com.gargoylesoftware.htmlunit.scriptexception: invalid return (javascript url#88) @ com.gargoylesoftware.htmlunit.javascript.javascriptengine$htmlunitcontextaction.run(javascriptengine.java:904) @ net.sourceforge.htmlunit.corejs.javascript.context.call(context.java:628) @ net.sourceforge.htmlunit.corejs.javascript.contextfactory.call(contextfactory.java:515) @ com.gargoylesoftware.htmlunit.javascript.javascriptengine.compile(javascriptengine.java:729) @ com.gargoylesoftware.htmlunit.javascript.javascriptengine.compile(javascriptengine.java:694) @ com.gargoylesoftware.htmlunit.javascript.javascriptengine.execute(javascriptengine.java:746) @ com.gargoylesoftware.htmlunit.html.htmlpage.executejavascriptifpossible(htmlpage.java:902) @ com.gargoylesoftware.htmlunit.html.htmlanchor.doclickstateupdate(htmlanchor.java:114) @ com.gargoylesoftware.htmlunit.html.htmlanchor.doclickstateupdate(htmlanchor.java:179) @ com.gargoylesoftware.htmlunit.html.domelement.click(domelement.java:800) @ com.gargoylesoftware.htmlunit.html.domelement.click(domelement.java:747) @ com.gargoylesoftware.htmlunit.html.domelement.click(domelement.java:694) @ clickpage.main(clickpage.java:38) caused by: net.sourceforge.htmlunit.corejs.javascript.evaluatorexception: invalid return (javascript url#88) @ com.gargoylesoftware.htmlunit.javascript.stricterrorreporter.error(stricterrorreporter.java:65) @ net.sourceforge.htmlunit.corejs.javascript.parser.adderror(parser.java:188) @ net.sourceforge.htmlunit.corejs.javascript.parser.adderror(parser.java:167) @ net.sourceforge.htmlunit.corejs.javascript.parser.reporterror(parser.java:255) @ net.sourceforge.htmlunit.corejs.javascript.parser.reporterror(parser.java:244) @ net.sourceforge.htmlunit.corejs.javascript.parser.reporterror(parser.java:237) @ net.sourceforge.htmlunit.corejs.javascript.parser.returnoryield(parser.java:1632) @ net.sourceforge.htmlunit.corejs.javascript.parser.statementhelper(parser.java:1022) @ net.sourceforge.htmlunit.corejs.javascript.parser.statement(parser.java:928) @ net.sourceforge.htmlunit.corejs.javascript.parser.parse(parser.java:572) @ net.sourceforge.htmlunit.corejs.javascript.parser.parse(parser.java:492) @ net.sourceforge.htmlunit.corejs.javascript.context.compileimpl(context.java:2660) @ net.sourceforge.htmlunit.corejs.javascript.context.compilestring(context.java:1623) @ com.gargoylesoftware.htmlunit.javascript.htmlunitcontextfactory$timeoutcontext.compilestring(htmlunitcontextfactory.java:172) @ net.sourceforge.htmlunit.corejs.javascript.context.compilestring(context.java:1615) @ com.gargoylesoftware.htmlunit.javascript.javascriptengine$2.dorun(javascriptengine.java:720) @ com.gargoylesoftware.htmlunit.javascript.javascriptengine$htmlunitcontextaction.run(javascriptengine.java:889) ... 12 more enclosed exception: net.sourceforge.htmlunit.corejs.javascript.evaluatorexception: invalid return (javascript url#88) @ com.gargoylesoftware.htmlunit.javascript.stricterrorreporter.error(stricterrorreporter.java:65) @ net.sourceforge.htmlunit.corejs.javascript.parser.adderror(parser.java:188) @ net.sourceforge.htmlunit.corejs.javascript.parser.adderror(parser.java:167) @ net.sourceforge.htmlunit.corejs.javascript.parser.reporterror(parser.java:255) @ net.sourceforge.htmlunit.corejs.javascript.parser.reporterror(parser.java:244) @ net.sourceforge.htmlunit.corejs.javascript.parser.reporterror(parser.java:237) @ net.sourceforge.htmlunit.corejs.javascript.parser.returnoryield(parser.java:1632) @ net.sourceforge.htmlunit.corejs.javascript.parser.statementhelper(parser.java:1022) @ net.sourceforge.htmlunit.corejs.javascript.parser.statement(parser.java:928) @ net.sourceforge.htmlunit.corejs.javascript.parser.parse(parser.java:572) @ net.sourceforge.htmlunit.corejs.javascript.parser.parse(parser.java:492) @ net.sourceforge.htmlunit.corejs.javascript.context.compileimpl(context.java:2660) @ net.sourceforge.htmlunit.corejs.javascript.context.compilestring(context.java:1623) @ com.gargoylesoftware.htmlunit.javascript.htmlunitcontextfactory$timeoutcontext.compilestring(htmlunitcontextfactory.java:172) @ net.sourceforge.htmlunit.corejs.javascript.context.compilestring(context.java:1615) @ com.gargoylesoftware.htmlunit.javascript.javascriptengine$2.dorun(javascriptengine.java:720) @ com.gargoylesoftware.htmlunit.javascript.javascriptengine$htmlunitcontextaction.run(javascriptengine.java:889) @ net.sourceforge.htmlunit.corejs.javascript.context.call(context.java:628) @ net.sourceforge.htmlunit.corejs.javascript.contextfactory.call(contextfactory.java:515) @ com.gargoylesoftware.htmlunit.javascript.javascriptengine.compile(javascriptengine.java:729) @ com.gargoylesoftware.htmlunit.javascript.javascriptengine.compile(javascriptengine.java:694) @ com.gargoylesoftware.htmlunit.javascript.javascriptengine.execute(javascriptengine.java:746) @ com.gargoylesoftware.htmlunit.html.htmlpage.executejavascriptifpossible(htmlpage.java:902) @ com.gargoylesoftware.htmlunit.html.htmlanchor.doclickstateupdate(htmlanchor.java:114) @ com.gargoylesoftware.htmlunit.html.htmlanchor.doclickstateupdate(htmlanchor.java:179) @ com.gargoylesoftware.htmlunit.html.domelement.click(domelement.java:800) @ com.gargoylesoftware.htmlunit.html.domelement.click(domelement.java:747) @ com.gargoylesoftware.htmlunit.html.domelement.click(domelement.java:694) @ clickpage.main(clickpage.java:38) == calling javascript == return true; ======= exception end ========
please let me know wrong in code , how download file given link.
//complete solution //1. open page //2. list urls of page using xpath //3. download file of url . import java.io.file; import java.io.fileoutputstream; import java.io.ioexception; import java.io.inputstream; import java.io.outputstream; import java.net.httpurlconnection; import java.net.url; import java.util.date; import java.util.list; import java.util.map; import com.gargoylesoftware.htmlunit.browserversion; import com.gargoylesoftware.htmlunit.webclient; import com.gargoylesoftware.htmlunit.html.domattr; import com.gargoylesoftware.htmlunit.html.htmlanchor; import com.gargoylesoftware.htmlunit.html.htmlpage; public class crawler { public static void main(string[] args) throws throwable { string baseurl= "enter base http/https url here"; string url1 = baseurl+ "add addational url of main page"; string xpathofdownlaodlinks = "xpath of file url or--> html/body/div/div[3]/a/@href"; string pathtosavefile="d:\\local\\to\\save\\files"; string fileext = ".txt"; webclient webclient = new webclient(browserversion.chrome); webclient.getoptions().setjavascriptenabled(true); htmlpage page = webclient.getpage(url1); list<domattr> links = (list<domattr>) page.getbyxpath(xpathofdownlaodlinks); list<htmlanchor> anchors=page.getanchors(); (domattr object : links) { string link = baseurl+object.getvalue()+""; date d=new date(); downlaodrawfile(link,pathtosavefile +d.gettime() + fileext); } webclient.close(); } public static void downlaodrawfile(string link,string filename) throws ioexception, throwable{ url url = new url( link ); httpurlconnection http = (httpurlconnection)url.openconnection(); map< string, list< string >> header = http.getheaderfields(); while( isredirected( header )) { link = header.get( "location" ).get( 0 ); url = new url( link ); http = (httpurlconnection)url.openconnection(); header = http.getheaderfields(); } inputstream input = http.getinputstream(); byte[] buffer = new byte[4096]; int n = -1; outputstream output = new fileoutputstream( new file( filename )); while ((n = input.read(buffer)) != -1) { output.write( buffer, 0, n ); } output.close(); } private static boolean isredirected( map<string, list<string>> header ) { for( string hv : header.get( null )) { if( hv.contains( " 301 " ) || hv.contains( " 302 " )) return true; } return false; } }
Comments
Post a Comment