by shinichi_wtn
2008-12-19 10:07
为了实现查询中的音模糊,需要建立词库wordlist中每个词语的拼音序列,现在仅有每个单字的读音表word,所以最简单的思想就是,
1)遍历word表中每个词语
2)对特定的词语分割为单字A[i],并在wordlist表里找到每个单字的读音B[i]
3)合并字符串B[i]得到读音序列B并添加至该词语的语音列
这个直接用SQL实现比较复杂,所以我们采用C#编程实现,编程思想就是如上的3条,最后发现执行效率非常高,创建近6万字的拼音序列不到3分钟,程序如下:
using System;
using System.Collections.Generic;
using System.Text;
using System.Data;
using System.Data.OleDb;
namespace JoinPinYin
{
class Program
{
static void Main(string[] args)
{
Console.WriteLine("正在执行词语拼音连接,该过程需要一定时间,请耐心等待");
DateTime starttime = DateTime.Now;
OleDbConnectionStringBuilder wtnStrBuilder = new OleDbConnectionStringBuilder();
wtnStrBuilder.Provider = "Microsoft.ACE.OLEDB.12.0";
wtnStrBuilder.DataSource = "chdict.accdb";
string strSQL = "select [词语] from word";
OleDbConnection cn = new OleDbConnection();
cn.ConnectionString = wtnStrBuilder.ConnectionString;
cn.Open();
OleDbCommand myCommand = new OleDbCommand(strSQL, cn);
OleDbDataReader myReader;
myReader = myCommand.ExecuteReader(CommandBehavior.CloseConnection);
int i = 0;
while (myReader.Read())
{
string word = myReader["词语"].ToString();
word.Trim();
string ss = "";
for (int j = 0; j < word.Length; j++)
{
string s = word.Substring(j, 1);
string sql = "select [sound] from wordlist where [word]='" + s + "'";
OleDbCommand wtnCmd = new OleDbCommand(sql, cn);
OleDbDataReader wtnreader;
wtnreader = wtnCmd.ExecuteReader();
if (wtnreader.Read())
{
ss += wtnreader["sound"].ToString();
}
wtnreader.Close();
sql = "update word set [语音]='" + ss + "' where [词语]='" + word + "'";
OleDbCommand wtnCmd2 = new OleDbCommand(sql, cn);
wtnCmd2.ExecuteNonQuery();
}
i++;
if (i % 1000 == 0)
{
Console.WriteLine("已经连接好{0}个词语", i);
}
}
cn.Close();
DateTime endtime = DateTime.Now;
Console.WriteLine("连接完成,总共用时" + (endtime - starttime).ToString());
Console.ReadLine();
}
}
}