This post was most recently updated on July 31st, 2024
Below selenium code will be useful to identify all website links from all the pages as well as broken links.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 |
package javacode; import java.io.IOException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.concurrent.TimeUnit; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; public class Pro { static List<WebElement> sublinkimg = new ArrayList<WebElement>(); static List<WebElement> sublink = new ArrayList<WebElement>(); static List<String> uniquelink = new ArrayList<String>(); static List<String> uniquelink1 = new ArrayList<String>(); static List<String> subimg = new ArrayList<String>(); static List<String> uniquelinkimg = new ArrayList<String>(); static List<WebElement> linktag = new ArrayList<WebElement>(); static List<String> cssurl = new ArrayList<String>(); static List<String> pagecssurl = new ArrayList<String>(); static String parentDomainName; //Find link tag link public static List<String> csslink(WebDriver obj) { linktag=obj.findElements(By.tagName("link")); for(int i=0;i<linktag.size();i++) { String css=linktag.get(i).getAttribute("href"); cssurl.add(css); } return cssurl; } //Find image link public static List<String> imagelink(WebDriver obj) { sublinkimg=obj.findElements(By.tagName("img")); for(int i=0;i<sublinkimg.size();i++) { String srcimg=sublinkimg.get(i).getAttribute("src"); if(!uniquelinkimg.contains(srcimg)) { uniquelinkimg.add(srcimg); } } return uniquelinkimg; } //Find anchor link public static List<String> findlink(String s, WebDriver obj) { obj.get(s); sublink = obj.findElements(By.tagName("a")); for (int i = 0; i < sublink.size(); i++) { String pagelink1 = sublink.get(i).getAttribute("href"); String pagelink = pagelink1.replace("#", ""); String uniquelinkurl=pagelink.toLowerCase(); if (uniquelink.contains(uniquelinkurl)) { continue; } else { uniquelink.add(uniquelinkurl); } } return uniquelink; } //Process link to check its reponse public static int processlink(String link) throws MalformedURLException, IOException { int j = 0; if (link.contains("http")) { HttpURLConnection http = (HttpURLConnection) (new URL(link).openConnection()); http.connect(); j = http.getResponseCode(); if (j == 200) { return j; } } return j; } public static void main(String[] args) throws MalformedURLException, IOException { System.setProperty("webdriver.chrome.driver", "path of chrome driver "); WebDriver obj = new ChromeDriver(); obj.manage().timeouts().pageLoadTimeout(50, TimeUnit.SECONDS); obj.manage().timeouts().implicitlyWait(30, TimeUnit.SECONDS); parentDomainName="domainname"; String url = "http://www.DomainName.com/";//Enter the url in the form of http://www.DomainName.com List<String> uniquelink1 = new ArrayList<String>(); uniquelink1.add(url); uniquelink1 = findlink(url, obj); for (int i = 0; i < uniquelink1.size(); i++) { List<String> pageurl = new ArrayList<String>(); String link = uniquelink1.get(i); int k = processlink(link); if (k == 200) { if (link.contains(".jpg") || link.contains(".png") || link.contains(".css")||link.contains(".php")||link.contains(".json")) { System.out.println(link + "|| img link working ||"+k); } else if (link.contains( parentDomainName)) { System.out.println(link + "||internal link working ||" + k); pageurl = findlink(link, obj); subimg =imagelink(obj); pagecssurl=csslink(obj); } else if (!link.contains( parentDomainName)) { System.out.println(link + "|| external link working ||" + k); } } else { System.out.println(link + "||link not working ||" + k); } for (int j = 0; j < pageurl.size(); j++) { String s = pageurl.get(i).replace("#", ""); if (!uniquelink1.contains(s)) { uniquelink1.add(s); } } for (int k1 = 0; k1 < subimg.size(); k1++) { String s = subimg.get(k1); if (!uniquelink1.contains(s)) { uniquelink1.add(s); } } for (int m= 0; m < pagecssurl.size(); m++) { String s = pagecssurl.get(m); if (!uniquelink1.contains(s)) { uniquelink1.add(s); } } } } } |
Output of above program after entering valid url and domain name
Link Name | Internal / External | Response of link |
http://www.mundrisoft.com/index.html | internal link working | 200 |
http://www.mundrisoft.com/ | internal link working | 200 |
http://www.mundrisoft.com/capabilities.html | internal link working | 200 |
http://www.mundrisoft.com/case-studies.html | internal link working | 200 |