| /// <summary>/// 去除HTML标记
 /// </summary>
 /// <param>包括HTML的源码 </param>
 /// <returns>已经去除后的文字</returns>
 public static string StripHTML(string strHtml)
 {
 string [] aryReg ={
 @"<script[^>]*?>.*?</script>",
 @"<(/s*)?!?((w+:)?w+)(w+(s*=?s*(([""'])(*)?>",@"([rn])[s]+",
 @"&(quot|#34);",
 @"&(amp|#38);",
 @"&(lt|#60);",
 @"&(gt|#62);",
 @"&(nbsp|#160);",
 @"&(iexcl|#161);",
 @"&(cent|#162);",
 @"&(pound|#163);",
 @"&(copy|#169);",
 @"(d+);",
 @"-->",
 @"<!--.*n"
 
 };
 string [] aryRep = {"",
 "",
 "",
 """,
 "&",
 "<",
 ">",
 " ",
 "xa1",//chr(161),
 "xa2",//chr(162),
 "xa3",//chr(163),
 "xa9",//chr(169),
 "",
 "rn",
 ""
 };
 string newReg =aryReg[0];string strOutput=strHtml;
 for(int i = 0;i<aryReg.Length;i++)
 {
 Regex regex = new Regex(aryReg[i],RegexOptions.IgnoreCase );
 strOutput = regex.Replace(strOutput,aryRep[i]);
 }
 strOutput.Replace("<","");strOutput.Replace(">","");
 strOutput.Replace("rn","");
 return strOutput;
 }
 (编辑:银川站长网) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |