WordAnalyze.cs 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268
  1. using Word = Microsoft.Office.Interop.Word;
  2. using System.Diagnostics;
  3. using Logger = Log.Log;
  4. using System.Text.RegularExpressions;
  5. using System.Collections;
  6. namespace WordAnalyze
  7. {
  8. public class Analyze
  9. {
  10. private string[] bigQuestionNum = { "一、", "二、", "三、", "四、", "五、", "六、", "七、", "八、", "九、" };
  11. private ArrayList smallQuestionNum = new ArrayList();
  12. private ArrayList smallQuestionNum2 = new ArrayList();
  13. private Word.ApplicationClass app;
  14. public Analyze()
  15. {
  16. // 开始app应用
  17. app = new Word.ApplicationClass();
  18. // 初始化小题目的编号
  19. for (int i = 1; i < 200; i++)
  20. {
  21. string num = i.ToString();
  22. smallQuestionNum.Add(num + ".");
  23. smallQuestionNum2.Add(num + ".");
  24. }
  25. }
  26. public void Close()
  27. {
  28. if (app != null)
  29. {
  30. app.Quit();
  31. }
  32. }
  33. public static string splitChar = "---";
  34. public string path = "";
  35. public void SetPath(string path)
  36. {
  37. this.path = path;
  38. }
  39. public string AnalyzeFile(string fileName)
  40. {
  41. if (!ValidFileName(fileName))
  42. {
  43. Logger.D("AnalyzeFile with invalid file name {0}", fileName);
  44. return "无效的文件名";
  45. }
  46. object fn = fileName;
  47. //Word.ApplicationClass app = new Word.ApplicationClass(); // 打开word应用,只会打开一个,即单例模式
  48. Word.Document doc = null; // 源word文件对象
  49. try
  50. {
  51. doc = app.Documents.Open(ref fn);
  52. doc.Paragraphs.Add();
  53. Word.Paragraphs garapraph = doc.Paragraphs; // 源文件的内容
  54. //bool insert = false; //判断是否有插入数据到新文档中,如果有则保存新word,否则直接关掉新word
  55. //int fileIndex = 1; // 切割后word的文件序号,例子: test1.doc, test2.doc
  56. Word.Document newDoc = null;
  57. newDoc = app.Documents.Add();
  58. // 遍历word,通过 --- 三个横线分割
  59. return dot(doc, fileName);
  60. }
  61. catch (System.Exception e)
  62. {
  63. Logger.E("analyze with file name({0}) error ->({1})", fileName, e.Message.ToString());
  64. return e.Message.ToString();
  65. }
  66. }
  67. // 判断文件后缀名是否是.doc 或者.docx
  68. public bool ValidFileName(string fileName)
  69. {
  70. if (fileName == null || fileName.Length < 1)
  71. {
  72. return false;
  73. }
  74. if (!fileName.EndsWith(".doc") && !fileName.EndsWith(".docx"))
  75. {
  76. return false;
  77. }
  78. return true;
  79. }
  80. // 用设定的目录当做子word的目录,默认为选择文件的目录
  81. public string Rename(string filename, int index)
  82. {
  83. var filePathIndex = filename.LastIndexOf('\\');
  84. string name = "";
  85. string tempPath = "";
  86. if (-1 != filePathIndex)
  87. {
  88. name = filename.Substring(filePathIndex);
  89. if (name.Length > 0)
  90. {
  91. name = name.Substring(1);
  92. }
  93. tempPath = filename.Substring(0, filePathIndex);
  94. }
  95. if ("" == path)
  96. {
  97. path = tempPath;
  98. }
  99. string newFilename = name;
  100. newFilename = path + "\\" + name;
  101. int lastIndex = newFilename.LastIndexOf('.');
  102. string newName = "";
  103. if (-1 != lastIndex)
  104. {
  105. newName = string.Format("{0}{1}{2}", newFilename.Substring(0, lastIndex), index, newFilename.Substring(lastIndex));
  106. }
  107. else
  108. {
  109. newName = string.Format("{0}{1}", newFilename, index);
  110. }
  111. return newName;
  112. }
  113. public string dot(Word.Document doc, string filename)
  114. {
  115. Word.Paragraphs paragraphs = doc.Paragraphs;
  116. Word.Document newDoc = app.Documents.Add();
  117. bool insert = false;
  118. bool bigMark = false;
  119. bool firstBig = false;
  120. int fileIndex = 1;
  121. try
  122. {
  123. for (int i = 1; i < paragraphs.Count; i++)
  124. {
  125. string rangeText = paragraphs[i].Range.Text.ToString();
  126. if (rangeText.Trim().Length < 1)
  127. {
  128. continue;
  129. }
  130. string listNum = paragraphs[i].Range.ListFormat.ListString;
  131. Logger.D("dot get index:{0}, message: {1}", i, rangeText);
  132. if (checkTextStartWithBig(rangeText))
  133. {
  134. Logger.D("dot get index: {0}, firstBig: {1}", i, firstBig);
  135. if (firstBig)
  136. {
  137. object file = Rename(filename, fileIndex++);
  138. newDoc.SaveAs2(file);
  139. newDoc.Close();
  140. newDoc = app.Documents.Add();
  141. }
  142. paragraphs[i].Range.Select();
  143. app.Selection.Copy();
  144. app.Documents[1].Activate();
  145. app.Selection.Paste();
  146. insert = false;
  147. bigMark = true;
  148. continue;
  149. }
  150. if (checkTextStartWithNum(listNum) || checkTextStartWithNum(rangeText))
  151. {
  152. Logger.D("dot get index: {0}, insert: {1}, bigMark: {2}, condition: {3}, condition2:{4}", i, insert, bigMark, (!bigMark && insert), (!bigMark) && insert);
  153. if (!bigMark && insert)
  154. {
  155. object file = Rename(filename, fileIndex++);
  156. newDoc.SaveAs2(file);
  157. newDoc.Close();
  158. newDoc = app.Documents.Add();
  159. }
  160. paragraphs[i].Range.Select();
  161. app.Selection.Copy();
  162. app.Documents[1].Activate();
  163. app.Selection.Paste();
  164. insert = true;
  165. bigMark = false;
  166. firstBig = true;
  167. continue;
  168. }
  169. paragraphs[i].Range.Select();
  170. app.Selection.Copy();
  171. app.Documents[1].Activate();
  172. app.Selection.Paste();
  173. insert = true;
  174. }
  175. if (insert)
  176. {
  177. object file = Rename(filename, fileIndex++);
  178. newDoc.SaveAs2(file);
  179. }
  180. newDoc.Close();
  181. insert = false;
  182. } catch(System.Exception e)
  183. {
  184. Logger.E("dot with filename({0}) error ->({1})", filename, e.Message.ToString());
  185. return e.Message.ToString();
  186. }
  187. finally
  188. {
  189. if(doc!= null)
  190. {
  191. doc.Undo();
  192. doc.Close();
  193. }
  194. }
  195. Logger.D("dot with filename({0}) success", filename);
  196. return "";
  197. }
  198. /// <summary>
  199. /// 判断内容是否是以大写的题目号开头,如一、或二、
  200. /// </summary>
  201. /// <param name="text"></param>
  202. /// <returns></returns>
  203. public bool checkTextStartWithBig(string text)
  204. {
  205. string textEscapeSpace = Regex.Replace(text, @"\s", "");
  206. textEscapeSpace = textEscapeSpace.Trim();
  207. for (int i = 0; i < bigQuestionNum.Length; i++)
  208. {
  209. if (textEscapeSpace.StartsWith(bigQuestionNum[i]))
  210. {
  211. return true;
  212. }
  213. }
  214. return false;
  215. }
  216. /// <summary>
  217. /// 判断内容是否有题目号开头,如1.或2.或3.
  218. /// </summary>
  219. /// <param name="text"></param>
  220. /// <returns></returns>
  221. public bool checkTextStartWithNum(string text)
  222. {
  223. if (text.Length < 1)
  224. {
  225. return false;
  226. }
  227. string textEscapeSpace = Regex.Replace(text, @"\s", "");
  228. textEscapeSpace = textEscapeSpace.Trim();
  229. foreach(var arg in smallQuestionNum)
  230. {
  231. string num = arg.ToString();
  232. if (textEscapeSpace.StartsWith(num))
  233. {
  234. return true;
  235. }
  236. }
  237. foreach(var arg in smallQuestionNum2)
  238. {
  239. string num = arg.ToString();
  240. if (textEscapeSpace.StartsWith(num))
  241. {
  242. return true;
  243. }
  244. }
  245. return false;
  246. }
  247. }
  248. }