文章内容

2017/7/30 17:19:23,作 者: 黄兵

c# html内容处理类

using System;
using System.Text;
using System.Text.RegularExpressions;
using System.Net;
using System.IO;
using System.IO.Compression;

namespace HuaTong.General.Utility
{
    /// <summary>
    /// html内容处理
    /// </summary>
    public static class HtmlHelper
    {
        /// <summary>
        /// 纯文本转HTML
        /// </summary>
        public static string TextToHTML(string value)
        {
            return TextToHTML(value, false, false);
        }
        /// <summary>
        /// 纯文本转HTML
        /// </summary>
        public static string TextToHTML(string value, bool isHasEmptyRow)
        {
            return TextToHTML(value, isHasEmptyRow, false);
        }
        /// <summary>
        /// 纯文本转HTML
        /// </summary>
        public static string TextToHTML(string value, bool isHasEmptyRow, bool isOutBr)
        {
            StringBuilder sr = new StringBuilder();
            if (isHasEmptyRow)
            {
                sr.Append(StringHelper.RemoveEmptyRow(value));
            }
            else
            {
                sr.Append(value);
            }

            sr.Replace("&", "&amp;");
            sr.Replace(">", "&gt;");
            sr.Replace("<", "&lt;");
            sr.Replace(" ", "&nbsp;");
            sr.Replace("\"", "&quot;");
            sr.Replace("©", "&copy;");
            sr.Replace("®", "&reg;");
            sr.Replace("×", "&times;");
            sr.Replace("÷", "&divide;");
            if (isOutBr)
            {
                sr.Replace("\r\n", "<br>");
                sr.Replace("\r", "<br>");
                sr.Replace("\n", "<br>");
            }
            else
            {
                sr.Insert(0, "<p>");
                sr.Replace("\r\n", "\r");
                sr.Replace("\r", "</p>\r\n<p>");
                sr.Append("</p>");
            }

            return sr.ToString();
        }

        /// <summary>
        /// HTML输出为JS
        /// </summary>
        public static string HtmlToScript(string value)
        {
            StringBuilder sr = new StringBuilder();
            sr.Append(value);
            sr.Replace("\\", "\\\\");
            sr.Replace("/", "\\/");
            sr.Replace("'", "\\'");
            sr.Replace("\"", "\\\"");
            string[] strs = sr.ToString().Split(new char[] { '\r', '\n' },
                StringSplitOptions.RemoveEmptyEntries);

            return String.Format("document.writeln(\"{0}\");",
                String.Join("\");\r\ndocument.writeln(\"", strs),
                StringSplitOptions.RemoveEmptyEntries);
        }

        /// <summary>
        /// JS脚本输出字符串
        /// </summary>

        /// <returns></returns>
        public static string ScriptStringFormat(string value)
        {
            value = value.Replace("\\", "\\\\");
            value = value.Replace("'", "\\'");
            value = value.Replace("\"", "\\\"");
            return value;
        }

        /// <summary>
        /// HTML转纯文本
        /// </summary>
        public static string HtmlToText(string value)
        {
            string regexstr = @"(&(#)?.+;)|(<[^>]*>)";
            return Regex.Replace(value, regexstr, "", RegexOptions.IgnoreCase);
        }

        /// <summary>
        /// 过滤HTML中的不安全标签
        /// </summary>
        public static string HtmlFilter(string value)
        {
            value = Regex.Replace(value, @"(\<|\s+)o([a-z]+\s?=)", "$1$2", RegexOptions.IgnoreCase);
            value = Regex.Replace(value, @"(select|textarea|input|link|iframe|frameset|frame|form|applet|embedlayer|ilayer|meta|object|script|behavior|style)([\s|:|>])+", "$1.$2", RegexOptions.IgnoreCase);
            value = Regex.Replace(value, @"javascript|eval", "", RegexOptions.IgnoreCase);
            return value;
        }

        /// <summary> 
        /// 获取源代码 
        /// </summary> 
        public static string GetHtmlContent(string url, string encoding)
        {
            HttpWebRequest request = null;
            HttpWebResponse response = null;
            StreamReader reader = null;
            try
            {
                request = (HttpWebRequest)WebRequest.Create(url);
                request.Timeout = 10000;
                request.AllowAutoRedirect = false;
                response = (HttpWebResponse)request.GetResponse();
                if (response.StatusCode == HttpStatusCode.OK)
                {
                    if (response.ContentEncoding != null && StringHelper.IsEqualString(response.ContentEncoding,"gzip"))
                        reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress), Encoding.GetEncoding(encoding));
                    else
                        reader = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(encoding));
                    string html = reader.ReadToEnd();
                    return html;
                }
            }
            catch
            {
            }
            finally
            {
                if (response != null)
                {
                    response.Close();
                    response = null;
                }
                if (reader != null)
                    reader.Close();
                if (request != null)
                    request = null;
            }
            return string.Empty;
        }
        /// <summary>
        /// 获取HTML网页的编码
        /// </summary>

        /// <returns></returns>
        public static string GetEncoding(string url)
        {
            string charset = string.Empty;
            HttpWebRequest request = null;
            HttpWebResponse response = null;
            StreamReader reader = null;
            try
            {
                request = (HttpWebRequest)WebRequest.Create(url);
                request.Timeout = 20000;
                request.AllowAutoRedirect = false;
                response = (HttpWebResponse)request.GetResponse();
                if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)
                {
                    if (response.ContentEncoding != null && StringHelper.IsEqualString(response.ContentEncoding, "gzip"))
                        reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));
                    else
                        reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII);
                    string html = reader.ReadToEnd();
                    Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");
                    if (reg_charset.IsMatch(html))
                    {
                        return reg_charset.Match(html).Groups["charset"].Value;
                    }
                    else if (response.CharacterSet != string.Empty)
                    {
                        return response.CharacterSet;
                    }
                    else
                        return Encoding.Default.BodyName;
                }
            }
            catch
            {
            }
            finally
            {
                if (response != null)
                {
                    response.Close();
                    response = null;
                }
                if (reader != null)
                    reader.Close();
                if (request != null)
                    request = null;
            }
            return Encoding.Default.BodyName;
        }
    }
}
分享到:

发表评论

评论列表