var page = require('webpage').create(),
system = require('system'),
address,output,csvPath,nodePathFile,outOriginalimg,PCSPuserAgent;
var fs = require("fs");
if (system.args.length < 0) {
console.log('Usage: rasterize.js URL filename');
phantom.exit(1);
} else {
address = system.args[1];// ��ȁE�url
output=system.args[2];
csvPath=system.args[3];// �����csv
PCSPuserAgent=system.args[6];
console.log(PCSPuserAgent);
var BeforecsvPath=csvPath.replace('.csv','_1.csv');
var csvindex=0;
nodePathFile=system.args[4];// ��ȁE�nodepath�ļ�
outOriginalimg=system.args[5];// ��ȡԭͼ
if(PCSPuserAgent.indexOf("iPhone") > -1)
{
page.settings = {
userAgent:PCSPuserAgent,
javascriptEnabled: true,
loadImages: true
};
}
else
{
page.settings = {
javascriptEnabled: true,
loadImages: true
};
}
page.viewportSize = {width:414,height:30};
page.open(address, function (status) {
// fs.write('test.txt', "childNodes Num{_}Node Name{_}NodeIndexPath{_}Width{_}Height{_}x{_}y{_}Dispaly{_}ImgUrlOrBackImgUrl" + "\r\n", 'a');
if(fs.exists(csvPath))
{
fs.remove(csvPath);
}
window.setTimeout(function () {
VisiteHtmlDom("0",BeforecsvPath);
console.log("---------------------FS MOVE------------------------------");
fs.move(BeforecsvPath,csvPath);
}, 40000);
waitFor2(
function csvCreate()
{
if(fs.exists(csvPath)){
if(csvindex<5)
{
csvindex++;
console.log("csvindex Index:"+csvindex);
var csvPathNow=csvPath.replace('.csv','__'+csvindex.toString()+'.csv');
var BeforecsvPathNow=csvPathNow.replace('.csv','_1.csv');
if(fs.exists(csvPathNow))
{
fs.remove(csvPathNow);
}
console.log("csvindex:"+csvindex+","+"csvPathNow:"+csvPathNow+","+"BeforecsvPathNow:"+BeforecsvPathNow);
VisiteHtmlDom("0",BeforecsvPathNow);
fs.move(BeforecsvPathNow,csvPathNow);
}
else
{
return true;
}
}
},
function csvCreate2()
{
console.log("-----------------csvPath2 END---------------");
}
);
waitFor(
function nodePathFileCheck()
{
if(!fs.exists(nodePathFile))
{
console.log('nodepathFile Not Find')
}
else
{
console.log("Find nodePathFile");
return true;
}
} ,
function heheda()
{
window.setTimeout(function () {
console.log("---------------------Capture Original Begin------------------------------");
var scrollheight=page.evaluate(function () {
return document.body.scrollHeight;
});
page.viewportSize = {width:414,height:scrollheight};
page.render(outOriginalimg);
console.log("---------------------Capture Begin------------------------------");
markCapture();
console.log("scrollheight:"+scrollheight);
window.setTimeout(function () {
page.render(output);
page.close();
console.log('render ok');
phantom.exit();
}, 10000);
}, 25000);
}
);
});
}
function VisiteHtmlDom(nodePath,inputcsvPath) {
var nodeinfo = page.evaluate(function (str) {
var root = document.getElementsByTagName('html');
var htmlNode = root[0];
var xpathArr = str.split("/");
for (var i = 1; i < xpathArr.length; i++) {
var index = parseInt(xpathArr[i]);
if((htmlNode.nodeName.indexOf("SCRIPT") > -1) || (htmlNode.nodeName.indexOf("NOSCRIPT") > -1)|| (htmlNode.nodeName.indexOf("HEAD") > -1))
{
htmlNode = htmlNode;
}
else if(htmlNode.nodeName.indexOf("IFRAME") > -1&&window.getComputedStyle(htmlNode).display!="none")
{
var iframdom = htmlNode.contentWindow.document;
var roots = iframdom.getElementsByTagName('html');
htmlNode=roots[0];
}
else
{
htmlNode = htmlNode.childNodes[index];
}
}
if ((htmlNode.nodeName.indexOf("#text") > -1))
{
return htmlNode.childNodes.length + "{_}" + htmlNode.nodeName+ "{_}" + str+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+ ("{_}" + htmlNode.nodeValue||"").replace(/\r|\n/ig, "");
}
else if((htmlNode.nodeName.indexOf("SCRIPT") > -1) || (htmlNode.nodeName.indexOf("NOSCRIPT") > -1)|| (htmlNode.nodeName.indexOf("HEAD") > -1)|| (htmlNode.nodeName.indexOf("#comment") > -1))
{
return "0"+"{_}"+htmlNode.nodeName + "{_}" + str+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}";
}
else {
//
var bgImgUrl= htmlNode.style.getPropertyValue("background-image");
// var bgImgUrl=htmlNode.style.getPropertyValue("background-image").replace(/^(url)\(|\)/g, '');
if(bgImgUrl!=null)
bgImgUrl=bgImgUrl.replace(/^(url)\(|\)/g, '');
var width = window.getComputedStyle(htmlNode).width;
var height = window.getComputedStyle(htmlNode).height;
var left = window.getComputedStyle(htmlNode).left;
var top = window.getComputedStyle(htmlNode).top;
var display = window.getComputedStyle(htmlNode).display;
switch (htmlNode.nodeName) {
case "IMG":
var imgUrl = htmlNode.src; //ȡͼƬ�ĵ�ַ
return htmlNode.childNodes.length + "{_}" + htmlNode.nodeName + "{_}" + str + "{_}" + width + "{_}" + height + "{_}" + left + "{_}" + top + "{_}" + display + "{_}" + imgUrl+ "{_}"+ "{_}"; //img bgurl txt
break;
case "EMBED":
case "#COMMENT":
return htmlNode.childNodes.length + "{_}" +htmlNode.nodeName+"{_}" + str+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}"+"{_}";
break;
case "IFRAME":
var iframeUrl = htmlNode.src;
if(window.getComputedStyle(htmlNode).display!="none")
{
return "1" + "{_}" + htmlNode.nodeName + "{_}" + str + "{_}" + width + "{_}" + height + "{_}" + left + "{_}" + top + "{_}" + display + "{_}" + iframeUrl+ "{_}"+ "{_}"; //img bgurl txt
}
else
{
return "0" + "{_}" + htmlNode.nodeName + "{_}" + str + "{_}" + width + "{_}" + height + "{_}" + left + "{_}" + top + "{_}" + display + "{_}" + iframeUrl+ "{_}"+ "{_}"; //img bgurl txt
}
default:
return htmlNode.childNodes.length + "{_}" + htmlNode.nodeName + "{_}" + str + "{_}" + width + "{_}" + height + "{_}" + left + "{_}" + top + "{_}" + display + "{_}" +"{_}"+ bgImgUrl+"{_}";
break;
}
}
}, nodePath);
console.log("create CSV");
fs.write(inputcsvPath, nodeinfo + "\r\n", 'a');
// if(!(nodeinfo.indexOf("undefined")>-1))
var childNodesCount = nodeinfo.split("{_}")[0].split("/")[0];
for (var childIndex = 0; childIndex < childNodesCount;childIndex++) {
var childNodesPath = nodePath + "/" + childIndex.toString();
VisiteHtmlDom(childNodesPath,inputcsvPath);
}
}
function markCapture()
{
var stream = fs.open(nodePathFile, 'r');
while(!stream.atEnd()) {
var line = stream.readLine();
page.evaluate(function (line) {
var root = document.getElementsByTagName('html');//��ȡdom
var htmlNode = root[0];
var xpathArr=line.split("/");
for(var i=1;i<xpathArr.length;i++)
{
var index=parseInt(xpathArr[i].substring(xpathArr[i].length - 3, 3));
htmlNode= htmlNode.childNodes[index];
if(htmlNode.nodeName.indexOf("IFRAME") > -1)
{
var iframdom = htmlNode.contentWindow.document;
var roots = iframdom.getElementsByTagName('html');
htmlNode=roots[0];
i++;
}
}
if (htmlNode.nodeName.indexOf("text")>-1)
{
htmlNode.parentNode.style.boxSizing = "border-box";
htmlNode.parentNode.style.border = "5px solid #ff0000";
}
else
{
htmlNode.style.boxSizing = "border-box";
htmlNode.style.border = "5px solid #ff0000";
}
},line);
}
stream.close();
}
function waitFor(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 120000, //< Default Max Timout is 3m
start = new Date().getTime(),
condition = false,
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
clearInterval(interval); //< Stop this interval
}
}
}, 5000); //< repeat 5000ms
};
function waitFor2(testFx, onReady, timeOutMillis) {
var maxtimeOutMillis = timeOutMillis ? timeOutMillis : 120000, //< Default Max Timout is 3m
start = new Date().getTime(),
condition = false,
interval = setInterval(function() {
if ( (new Date().getTime() - start < maxtimeOutMillis) && !condition ) {
// If not time-out yet and condition not yet fulfilled
condition = (typeof(testFx) === "string" ? eval(testFx) : testFx()); //< defensive code
} else {
if(!condition) {
// If condition still not fulfilled (timeout but condition is 'false')
console.log("'waitFor()' timeout");
phantom.exit(1);
} else {
// Condition fulfilled (timeout and/or condition is 'true')
console.log("'waitFor()' finished in " + (new Date().getTime() - start) + "ms.");
if(condition)
{
typeof(onReady) === "string" ? eval(onReady) : onReady(); //< Do what it's supposed to do once the condition is fulfilled
clearInterval(interval); //< Stop this interval
}
}
}
}, 5000); //< repeat 5000ms
};
转载于:https://www.cnblogs.com/c-x-a/p/7267747.html