using Word = Microsoft.Office.Interop.Word;
using System.Diagnostics;
using Logger = Log.Log;
using System.Text.RegularExpressions;
using System.Collections;
namespace WordAnalyze
{
public class Analyze
{
private string[] bigQuestionNum = { "一、", "二、", "三、", "四、", "五、", "六、", "七、", "八、", "九、" };
private ArrayList smallQuestionNum = new ArrayList();
private ArrayList smallQuestionNum2 = new ArrayList();
private Word.ApplicationClass app;
public Analyze()
{
// 开始app应用
app = new Word.ApplicationClass();
// 初始化小题目的编号
for (int i = 1; i < 200; i++)
{
string num = i.ToString();
smallQuestionNum.Add(num + ".");
smallQuestionNum2.Add(num + ".");
}
}
public void Close()
{
if (app != null)
{
app.Quit();
}
}
public static string splitChar = "---";
public string path = "";
public void SetPath(string path)
{
this.path = path;
}
public string AnalyzeFile(string fileName)
{
if (!ValidFileName(fileName))
{
Logger.D("AnalyzeFile with invalid file name {0}", fileName);
return "无效的文件名";
}
object fn = fileName;
//Word.ApplicationClass app = new Word.ApplicationClass(); // 打开word应用,只会打开一个,即单例模式
Word.Document doc = null; // 源word文件对象
try
{
doc = app.Documents.Open(ref fn);
doc.Paragraphs.Add();
Word.Paragraphs garapraph = doc.Paragraphs; // 源文件的内容
//bool insert = false; //判断是否有插入数据到新文档中,如果有则保存新word,否则直接关掉新word
//int fileIndex = 1; // 切割后word的文件序号,例子: test1.doc, test2.doc
Word.Document newDoc = null;
newDoc = app.Documents.Add();
// 遍历word,通过 --- 三个横线分割
return dot(doc, fileName);
}
catch (System.Exception e)
{
Logger.E("analyze with file name({0}) error ->({1})", fileName, e.Message.ToString());
return e.Message.ToString();
}
}
// 判断文件后缀名是否是.doc 或者.docx
public bool ValidFileName(string fileName)
{
if (fileName == null || fileName.Length < 1)
{
return false;
}
if (!fileName.EndsWith(".doc") && !fileName.EndsWith(".docx"))
{
return false;
}
return true;
}
// 用设定的目录当做子word的目录,默认为选择文件的目录
public string Rename(string filename, int index)
{
var filePathIndex = filename.LastIndexOf('\\');
string name = "";
string tempPath = "";
if (-1 != filePathIndex)
{
name = filename.Substring(filePathIndex);
if (name.Length > 0)
{
name = name.Substring(1);
}
tempPath = filename.Substring(0, filePathIndex);
}
if ("" == path)
{
path = tempPath;
}
string newFilename = name;
newFilename = path + "\\" + name;
int lastIndex = newFilename.LastIndexOf('.');
string newName = "";
if (-1 != lastIndex)
{
newName = string.Format("{0}{1}{2}", newFilename.Substring(0, lastIndex), index, newFilename.Substring(lastIndex));
}
else
{
newName = string.Format("{0}{1}", newFilename, index);
}
return newName;
}
public string dot(Word.Document doc, string filename)
{
Word.Paragraphs paragraphs = doc.Paragraphs;
Word.Document newDoc = app.Documents.Add();
bool insert = false;
bool bigMark = false;
bool firstBig = false;
int fileIndex = 1;
try
{
for (int i = 1; i < paragraphs.Count; i++)
{
string rangeText = paragraphs[i].Range.Text.ToString();
if (rangeText.Trim().Length < 1)
{
continue;
}
string listNum = paragraphs[i].Range.ListFormat.ListString;
Logger.D("dot get index:{0}, message: {1}", i, rangeText);
if (checkTextStartWithBig(rangeText))
{
Logger.D("dot get index: {0}, firstBig: {1}", i, firstBig);
if (firstBig)
{
object file = Rename(filename, fileIndex++);
newDoc.SaveAs2(file);
newDoc.Close();
newDoc = app.Documents.Add();
}
paragraphs[i].Range.Select();
app.Selection.Copy();
app.Documents[1].Activate();
app.Selection.Paste();
insert = false;
bigMark = true;
continue;
}
if (checkTextStartWithNum(listNum) || checkTextStartWithNum(rangeText))
{
Logger.D("dot get index: {0}, insert: {1}, bigMark: {2}, condition: {3}, condition2:{4}", i, insert, bigMark, (!bigMark && insert), (!bigMark) && insert);
if (!bigMark && insert)
{
object file = Rename(filename, fileIndex++);
newDoc.SaveAs2(file);
newDoc.Close();
newDoc = app.Documents.Add();
}
paragraphs[i].Range.Select();
app.Selection.Copy();
app.Documents[1].Activate();
app.Selection.Paste();
insert = true;
bigMark = false;
firstBig = true;
continue;
}
paragraphs[i].Range.Select();
app.Selection.Copy();
app.Documents[1].Activate();
app.Selection.Paste();
insert = true;
}
if (insert)
{
object file = Rename(filename, fileIndex++);
newDoc.SaveAs2(file);
}
newDoc.Close();
insert = false;
} catch(System.Exception e)
{
Logger.E("dot with filename({0}) error ->({1})", filename, e.Message.ToString());
return e.Message.ToString();
}
finally
{
if(doc!= null)
{
doc.Undo();
doc.Close();
}
}
Logger.D("dot with filename({0}) success", filename);
return "";
}
///
/// 判断内容是否是以大写的题目号开头,如一、或二、
///
///
///
public bool checkTextStartWithBig(string text)
{
string textEscapeSpace = Regex.Replace(text, @"\s", "");
textEscapeSpace = textEscapeSpace.Trim();
for (int i = 0; i < bigQuestionNum.Length; i++)
{
if (textEscapeSpace.StartsWith(bigQuestionNum[i]))
{
return true;
}
}
return false;
}
///
/// 判断内容是否有题目号开头,如1.或2.或3.
///
///
///
public bool checkTextStartWithNum(string text)
{
if (text.Length < 1)
{
return false;
}
string textEscapeSpace = Regex.Replace(text, @"\s", "");
textEscapeSpace = textEscapeSpace.Trim();
foreach(var arg in smallQuestionNum)
{
string num = arg.ToString();
if (textEscapeSpace.StartsWith(num))
{
return true;
}
}
foreach(var arg in smallQuestionNum2)
{
string num = arg.ToString();
if (textEscapeSpace.StartsWith(num))
{
return true;
}
}
return false;
}
}
}