c# html内容处理类
using System;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using System.IO.Compression;
namespace HuaTong.General.Utility
{
/// <summary>
/// html内容处理
/// </summary>
public static class HtmlHelper
{
/// <summary>
/// 纯文本转HTML
/// </summary>
public static string TextToHTML(string value)
{
return TextToHTML(value, false, false);
}
/// <summary>
/// 纯文本转HTML
/// </summary>
public static string TextToHTML(string value, bool isHasEmptyRow)
{
return TextToHTML(value, isHasEmptyRow, false);
}
/// <summary>
/// 纯文本转HTML
/// </summary>
public static string TextToHTML(string value, bool isHasEmptyRow, bool isOutBr)
{
StringBuilder sr = new StringBuilder();
if (isHasEmptyRow)
{
sr.Append(StringHelper.RemoveEmptyRow(value));
}
else
{
sr.Append(value);
}
sr.Replace("&", "&");
sr.Replace(">", ">");
sr.Replace("<", "<");
sr.Replace(" ", " ");
sr.Replace("\"", """);
sr.Replace("©", "©");
sr.Replace("®", "®");
sr.Replace("×", "×");
sr.Replace("÷", "÷");
if (isOutBr)
{
sr.Replace("\r\n", "<br>");
sr.Replace("\r", "<br>");
sr.Replace("\n", "<br>");
}
else
{
sr.Insert(0, "<p>");
sr.Replace("\r\n", "\r");
sr.Replace("\r", "</p>\r\n<p>");
sr.Append("</p>");
}
return sr.ToString();
}
/// <summary>
/// HTML输出为JS
/// </summary>
public static string HtmlToScript(string value)
{
StringBuilder sr = new StringBuilder();
sr.Append(value);
sr.Replace("\\", "\\\\");
sr.Replace("/", "\\/");
sr.Replace("'", "\\'");
sr.Replace("\"", "\\\"");
string[] strs = sr.ToString().Split(new char[] { '\r', '\n' },
StringSplitOptions.RemoveEmptyEntries);
return String.Format("document.writeln(\"{0}\");",
String.Join("\");\r\ndocument.writeln(\"", strs),
StringSplitOptions.RemoveEmptyEntries);
}
/// <summary>
/// JS脚本输出字符串
/// </summary>
/// <returns></returns>
public static string ScriptStringFormat(string value)
{
value = value.Replace("\\", "\\\\");
value = value.Replace("'", "\\'");
value = value.Replace("\"", "\\\"");
return value;
}
/// <summary>
/// HTML转纯文本
/// </summary>
public static string HtmlToText(string value)
{
string regexstr = @"(&(#)?.+;)|(<[^>]*>)";
return Regex.Replace(value, regexstr, "", RegexOptions.IgnoreCase);
}
/// <summary>
/// 过滤HTML中的不安全标签
/// </summary>
public static string HtmlFilter(string value)
{
value = Regex.Replace(value, @"(\<|\s+)o([a-z]+\s?=)", "$1$2", RegexOptions.IgnoreCase);
value = Regex.Replace(value, @"(select|textarea|input|link|iframe|frameset|frame|form|applet|embedlayer|ilayer|meta|object|script|behavior|style)([\s|:|>])+", "$1.$2", RegexOptions.IgnoreCase);
value = Regex.Replace(value, @"javascript|eval", "", RegexOptions.IgnoreCase);
return value;
}
/// <summary>
/// 获取源代码
/// </summary>
public static string GetHtmlContent(string url, string encoding)
{
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 10000;
request.AllowAutoRedirect = false;
response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK)
{
if (response.ContentEncoding != null && StringHelper.IsEqualString(response.ContentEncoding,"gzip"))
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), Encoding.GetEncoding(encoding));
else
reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(encoding));
string html = reader.ReadToEnd();
return html;
}
}
catch
{
}
finally
{
if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close();
if (request != null)
request = null;
}
return string.Empty;
}
/// <summary>
/// 获取HTML网页的编码
/// </summary>
/// <returns></returns>
public static string GetEncoding(string url)
{
string charset = string.Empty;
HttpWebRequest request = null;
HttpWebResponse response = null;
StreamReader reader = null;
try
{
request = (HttpWebRequest)WebRequest.Create(url);
request.Timeout = 20000;
request.AllowAutoRedirect = false;
response = (HttpWebResponse)request.GetResponse();
if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
{
if (response.ContentEncoding != null && StringHelper.IsEqualString(response.ContentEncoding, "gzip"))
reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
else
reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII);
string html = reader.ReadToEnd();
Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");
if (reg_charset.IsMatch(html))
{
return reg_charset.Match(html).Groups["charset"].Value;
}
else if (response.CharacterSet != string.Empty)
{
return response.CharacterSet;
}
else
return Encoding.Default.BodyName;
}
}
catch
{
}
finally
{
if (response != null)
{
response.Close();
response = null;
}
if (reader != null)
reader.Close();
if (request != null)
request = null;
}
return Encoding.Default.BodyName;
}
}
}
评论列表