C#分析搜索引擎URL得到搜索关键字,并判断页面停留时间以及来源页面

时间:2023-03-09 16:08:38
C#分析搜索引擎URL得到搜索关键字,并判断页面停留时间以及来源页面

前台代码:

 var start;
var end;
var state;
var lasturl = document.referrer;
start = new Date($.ajax({ async: false }).getResponseHeader("Date"));//start是用户进入页面时间,此当前时间为服务器时间主要是处理客户端本地机器时间错误问题
$(document).ready(function () {//用户页面加载完毕
$(window).unload(function () {//页面卸载,就是用户关闭页面、点击链接跳转到其他页面或者刷新页面都会执行
end = new Date($.ajax({ async: false }).getResponseHeader("Date"));//用户退出时间
state = (end.getTime() - start.getTime()) / 1000;//停留时间=退出时间-开始时间(得到的是一个整数,毫秒为单位,1秒=1000)
if (window.location.pathname == "/default.htm") {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "首页访问", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/GeRenKuanDai/gerenkuandai.htm" || window.location.pathname.indexOf("GeRenKuanDai") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "家庭宽带信息资费查看", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/boxdetail/default.htm" || window.location.pathname.indexOf("boxdetail") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "查看大卖系列", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/cesu/default.htm" || window.location.pathname.indexOf("cesu") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "测速", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/government/default.htm" || window.location.pathname.indexOf("government") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "企业宽带查看", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
} else if (window.location.pathname == "/GwbnNewApply/default.htm" || window.location.pathname.indexOf("GwbnNewApply") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "宽带新开户页面", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/wodechangkuan/default.htm" || window.location.pathname.indexOf("wodechangkuan") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "网上营业厅-用户登录", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else if (window.location.pathname == "/xinwen/default.htm" || window.location.pathname.indexOf("xinwen") > 0) {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "新闻浏览", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
else {
$.post("/ashx/SiteStats.ashx", { start_time: start.Format("yyyy-MM-dd hh:mm:ss.S"), state_time: state, action: "", state_url: window.location.href, last_url: lasturl, state_title: $("title").html() });//写入数据库
}
});
});
Date.prototype.Format = function (fmt) {
var o = {
"M+": this.getMonth() + 1, //月份
"d+": this.getDate(), //日
"h+": this.getHours(), //小时
"m+": this.getMinutes(), //分
"s+": this.getSeconds(), //秒
"q+": Math.floor((this.getMonth() + 3) / 3), //季度
"S": this.getMilliseconds() //毫秒
};
if (/(y+)/.test(fmt)) fmt = fmt.replace(RegExp.$1, (this.getFullYear() + "").substr(4 - RegExp.$1.length));
for (var k in o)
if (new RegExp("(" + k + ")").test(fmt)) fmt = fmt.replace(RegExp.$1, (RegExp.$1.length == 1) ? (o[k]) : (("00" + o[k]).substr(("" + o[k]).length)));
return fmt;
}

后台ashx页面:

 <%@ WebHandler Language="C#" Class="SiteStats" %>

 using System;
using System.Collections.Generic;
using System.Text;
using System.Text.RegularExpressions;
using System.Web;
using Newtonsoft.Json;
using Newtonsoft.Json.Linq;
using System.Web.Script.Serialization;
using System.Net;
using System.IO;
public class SiteStats : IHttpHandler
{
string Keyword = String.Empty;
string Engine = String.Empty;
Maticsoft.BLL.tb_SiteStats tbSiteStatesBLL = new Maticsoft.BLL.tb_SiteStats();
Maticsoft.Model.tb_SiteStats tbSiteStatesModle = new Maticsoft.Model.tb_SiteStats();
public void ProcessRequest(HttpContext context)
{
context.Response.ContentType = "text/plain";
//#region 防止钓鱼
//string host = HttpContext.Current.Request.UrlReferrer.Host;
//if (string.Compare(host, HttpContext.Current.Request.Url.Host) > 0)
//{
// return;
//}
//#endregion
//取得来源url地址
//string url = HttpContext.Current.Request.UrlReferrer == null ? "" : HttpContext.Current.Request.UrlReferrer.ToString();
string url = context.Request["last_url"];
//判断是否搜索引擎链接
if (IsSearchEnginesGet(url))
{
//取得搜索关键字
Keyword = SearchKey(url);
//取得搜索引擎名称
Engine = EngineName;
}
tbSiteStatesModle.ID = Guid.NewGuid().ToString();
//tbSiteStatesModle.VisitTime = Convert.ToDateTime(context.Request["start_time"]);
tbSiteStatesModle.State_time = context.Request["state_time"];
tbSiteStatesModle.VisitTime = DateTime.Now.AddSeconds(-(Convert.ToDouble(tbSiteStatesModle.State_time.ToString())));
tbSiteStatesModle.State_url = context.Request["state_url"];
tbSiteStatesModle.State_time = context.Request["state_time"];
tbSiteStatesModle.State_title = context.Request["state_title"];
tbSiteStatesModle.UserAction = context.Request["action"];
tbSiteStatesModle.IpAddress = ReadIP();
tbSiteStatesModle.Last_title = string.IsNullOrEmpty(url) ? "" : GetTitle(url);
#region -跨域请求JSON数据-
//HttpWebRequest request = (HttpWebRequest)WebRequest.Create("http://ip.chinaz.com/getip.aspx");
//request.Method = "get";
//request.ContentType = "application/x-www-form-urlencoded";
//request.Timeout = 60 * 1000; // 第三方的超时时间请设为60秒
//Encoding encoding = Encoding.GetEncoding("gb2312");
//string json = "";
//using (Stream requestStream = request.GetRequestStream())
//{
// using (HttpWebResponse response = (HttpWebResponse)request.GetResponse())
// {
// using (Stream myResponseStream = response.GetResponseStream())
// {
// using (StreamReader myStreamReader = new StreamReader(myResponseStream, encoding))
// {
// json = myStreamReader.ReadToEnd();
// myStreamReader.Close();
// myResponseStream.Close();
// }
// }
// }
//}
//var str = serializer.DeserializeObject(json);
WebClient wc = new WebClient();
byte[] bytes = wc.DownloadData("http://ip.taobao.com/service/getIpInfo.php?ip=" + ReadIP());
wc.Encoding = Encoding.UTF8;
string jsonText = Encoding.UTF8.GetString(bytes);
JObject jo = (JObject)JsonConvert.DeserializeObject(jsonText);
#endregion
//string area = UnicodeToStr(jo["data"]["country"].ToString()) + UnicodeToStr(jo["data"]["area"].ToString() + UnicodeToStr(jo["data"]["city"].ToString()) + UnicodeToStr(jo["data"]["region"].ToString()));国家 地区 省份 市区 地区 ISP公司
if (UnicodeToStr(jo["code"].ToString()) == "")//返回成功
tbSiteStatesModle.ExtStr1 = UnicodeToStr(UnicodeToStr(jo["data"]["region"].ToString()) + UnicodeToStr(jo["data"]["city"].ToString()) + UnicodeToStr(jo["data"]["county"].ToString()) + UnicodeToStr(jo["data"]["isp"].ToString()));
HttpCookie cookie = context.Request.Cookies["unid"];
if (null == cookie && tbSiteStatesBLL.ExistsByIP(tbSiteStatesModle.IpAddress) != true)
{
tbSiteStatesModle.IsNewUser = "新访客";
cookie = new HttpCookie("unid", Guid.NewGuid().ToString());//获取用户唯一标识
cookie.Expires = DateTime.Now.AddYears();//设置cookie过期时间
context.Response.Cookies.Add(cookie);//将cookie写入客户端
}
if (null == cookie && tbSiteStatesBLL.ExistsByIP(tbSiteStatesModle.IpAddress) == true)
{
tbSiteStatesModle.IsNewUser = "老访客";
cookie = new HttpCookie("unid", Guid.NewGuid().ToString());//获取用户唯一标识
cookie.Expires = DateTime.Now.AddYears();//设置cookie过期时间
context.Response.Cookies.Add(cookie);//将cookie写入客户端
}
else
tbSiteStatesModle.IsNewUser = "老访客";
tbSiteStatesModle.Last_url = url;
tbSiteStatesModle.ExtStr2 = cookie != null ? cookie.Value : "";
tbSiteStatesModle.VisitWay = Engine;
tbSiteStatesModle.KeyWords = Keyword;
tbSiteStatesBLL.Add(tbSiteStatesModle);
//context.Response.Write(Keyword + Engine);
}
/// <summary>
/// 读取客户端IP地址
/// </summary>
/// <returns></returns>
public static string ReadIP()
{
HttpRequest request = HttpContext.Current.Request;
string IP = request.ServerVariables["HTTP_X_FORWARDED_FOR"];
if (string.IsNullOrEmpty(IP))
{
IP = request.ServerVariables["REMOTE_ADDR"];
}
if (string.IsNullOrEmpty(IP))
{
IP = request.UserHostAddress;
}
if (string.IsNullOrEmpty(IP))
{
IP = "0.0.0.0";
}
return IP;
}
/// <summary>
/// 根据URL提取网站Title
/// </summary>
/// <param name="Url"></param>
/// <returns></returns>
public string GetTitle(string Url)
{
string strHtml;
StreamReader sr = null; //用来读取流
Encoding code = Encoding.GetEncoding("utf-8"); //定义编码
//构造web请求,发送请求,获取响应
WebRequest HttpWebRequest = null;
WebResponse HttpWebResponse = null;
HttpWebRequest = WebRequest.Create(Url);
HttpWebResponse = HttpWebRequest.GetResponse(); //获得流
sr = new StreamReader(HttpWebResponse.GetResponseStream(), code);
strHtml = sr.ReadToEnd();
int start = strHtml.IndexOf("<title>") + ; strHtml = strHtml.Substring(start); int end = strHtml.LastIndexOf("</title>"); string title = strHtml.Substring(, end); ; return title;
} /// <summary>
/// Unicode转中文-支持非Unicode编码字符串
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public static string UnicodeToStr(string str)
{
string outStr = "";
Regex reg = new Regex(@"(?i)\\u([0-9a-f]{4})");
outStr = reg.Replace(str, delegate (Match m1)
{
return ((char)Convert.ToInt32(m1.Groups[].Value, )).ToString();
});
return outStr;
}
/// <summary>
/// GMT时间转成本地时间
/// </summary>
/// <param name="gmt">字符串形式的GMT时间</param>
/// <returns></returns>
public static DateTime GmtToLocal(string gmt)
{
DateTime dt = DateTime.MinValue;
try
{
string pattern = "";
if (gmt.IndexOf("+0") != -)
{ gmt = gmt.Replace("GMT", "");
pattern = "ddd, dd MMM yyyy HH':'mm':'ss zzz";
}
if (gmt.ToUpper().IndexOf("GMT") != -)
{
pattern = "ddd, dd MMM yyyy HH':'mm':'ss 'GMT'";
}
if (pattern != "")
{
dt = DateTime.ParseExact(gmt, pattern, System.Globalization.CultureInfo.InvariantCulture, System.Globalization.DateTimeStyles.AdjustToUniversal);
dt = dt.ToLocalTime();
}
else
{
dt = Convert.ToDateTime(gmt);
}
}
catch
{
}
return dt;
}
#region -搜索引擎-
//搜索引擎特征
private string[][] _Enginers = new string[][]
{
new string[]{"google","utf8","q"},
new string[]{"baidu", "gb2312", "wd"},
new string[]{"yahoo","utf8","p"},
new string[]{"yisou","utf8","search"},
new string[]{"live","utf8","q"},
new string[]{"tom","gb2312","word"},
new string[]{"","gb2312","q"},
new string[]{"iask","gb2312","k"},
new string[]{"soso","gb2312","w"},
new string[]{"sogou","gb2312","query"},
new string[]{"zhongsou","gb2312","w"},
new string[]{"so", "utf8", "q"},
new string[]{"openfind","utf8","q"},
new string[]{"alltheweb","utf8","q"},
new string[]{"lycos","utf8","query"},
new string[]{"onseek","utf8","q"},
new string[]{"youdao","utf8","q"},
new string[]{"bing","utf8","q"},
new string[]{"","gb2312","kw"}
};
//搜索引擎名称
private string _EngineName = "";
public string EngineName
{
get
{
return _EngineName;
}
}
//搜索引擎编码
private string _Coding = "utf8";
public string Coding
{
get
{
return _Coding;
}
}
//搜索引擎关键字查询参数名称
private string _RegexWord = "";
public string RegexWord
{
get
{
return _RegexWord;
}
} private string _Regex = @"("; //搜索引擎关键字
//建立搜索关键字正则表达式
public void EngineRegEx(string myString)
{
for (int i = , j = _Enginers.Length; i < j; i++)
{
if (myString.Contains(_Enginers[i][]))
{
_EngineName = _Enginers[i][];
_Coding = _Enginers[i][];
_RegexWord = _Enginers[i][];
_Regex += _EngineName + @".+.*[?/ &]" + _RegexWord + @"[=:])(?<key>[^&]*)";
break;
}
}
}
//得到搜索引擎关键字
public string SearchKey(string myString)
{
EngineRegEx(myString.ToLower());
if (_EngineName != "")
{
Regex myReg = new Regex(_Regex, RegexOptions.IgnoreCase);
Match matche = myReg.Match(myString);
myString = matche.Groups["key"].Value;
//去处表示为空格的+
myString = myString.Replace("+", " ");
if (_Coding == "gb2312")
{
//myString = GetUTF8String(myString);
myString = System.Web.HttpUtility.UrlDecode(myString);
}
else
{
myString = Uri.UnescapeDataString(myString);
}
}
return myString;
}
//整句转码
public string GetUTF8String(string myString)
{
Regex myReg = new Regex("(?< key >%..%..)", RegexOptions.IgnoreCase); MatchCollection matches = myReg.Matches(myString);
string myWord;
for (int i = , j = matches.Count; i < j; i++)
{
myWord = matches[i].Groups["key"].Value.ToString();
myString = myString.Replace(myWord, GB2312ToUTF8(myWord));
}
return myString;
}
//单字GB2312转UTF8 URL编码
public string GB2312ToUTF8(string myString)
{
string[] myWord = myString.Split('%');
byte[] myByte = new byte[] { Convert.ToByte(myWord[], ), Convert.ToByte(myWord[], ) };
Encoding GB = Encoding.GetEncoding("GB2312");
Encoding U8 = Encoding.UTF8;
myByte = Encoding.Convert(GB, U8, myByte);
char[] Chars = new char[U8.GetCharCount(myByte, , myByte.Length)];
U8.GetChars(myByte, , myByte.Length, Chars, );
return new string(Chars);
} //判断是否为搜索引擎爬虫,并返回其类型
public string isCrawler(string SystemInfo)
{
string[] BotList = new string[] { "Google", "Baidu", "yisou", "MSN", "Yahoo", "live",
"tom", "", "TMCrawler", "iask", "Sogou", "soso", "youdao", "zhongsou", "so",
"openfind", "alltheweb", "lycos", "bing", "" };
foreach (string Bot in BotList)
{
if (SystemInfo.ToLower().Contains(Bot.ToLower()))
{
return Bot;
}
}
return "null";
}
/// <summary>
/// 判断是否搜索引擎链接
/// </summary>
/// <param name="str"></param>
/// <returns></returns>
public bool IsSearchEnginesGet(string str)
{
string[] strArray = new string[] { "Google", "Baidu", "yisou", "MSN", "Yahoo", "live", "tom"
, "", "TMCrawler", "iask", "Sogou", "soso", "youdao", "zhongsou", "so", "openfind",
"alltheweb", "lycos", "bing", "" };
str = str.ToLower();
for (int i = ; i < strArray.Length; i++)
{
if (str.IndexOf(strArray[i].ToLower()) >= )
{
return true;
}
}
return false;
}
#endregion -搜索引擎-
public bool IsReusable
{
get
{
return false;
}
}
}