/** * @return * unique document id of the parent page. The parent page is the * page in which the Url of this page is first observed. */ publicintgetParentDocid() { return parentDocid; }
/** * @return * url of the parent page. The parent page is the page in which * the Url of this page is first observed. */ public String getParentUrl() { return parentUrl; }
/** * @return * crawl depth at which this Url is first observed. Seed Urls * are at depth 0. Urls that are extracted from seed Urls are at depth 1, etc. */ publicshortgetDepth() { return depth; }
/** * @return * domain of this Url. For 'http://www.example.com/sample.htm', domain will be 'example.com' */ public String getDomain() { return domain; }
public String getSubDomain() { return subDomain; }
/** * @return * path of this Url. For 'http://www.example.com/sample.htm', path will be 'sample.htm' */ public String getPath() { return path; }
/** * @return * anchor string. For example, in <a href="example.com">A sample anchor</a> * the anchor string is 'A sample anchor' */ public String getAnchor() { return anchor; }