估计是从ocr取得的数据。彻底的解决办法是做“相似数据库”,通过不断完善这个库的数据来反复过滤——反正是机器做,做多少次都不累人。

授人以渔,不授人以鱼。
程序代码:
CLEAR
CREATE CURSOR TMP (姓名 C(10), 省份 C(10))
INSERT INTO TMP VALUES ("张小三", "北京")
INSERT INTO TMP VALUES ("张小四", "北京市")
INSERT INTO TMP VALUES ("王小二", "天津")
INSERT INTO TMP VALUES ("王小四", "河北")
INSERT INTO TMP VALUES ("张三", "河北省")
INSERT INTO TMP VALUES ("王小三", "天津市")
INSERT INTO TMP VALUES ("赵四", "上海")
INSERT INTO TMP VALUES ("陈小小", "云南")
INSERT INTO TMP VALUES ("陈一二", "广西")
INSERT INTO TMP VALUES ("陈小二", "云南省")
INSERT INTO TMP VALUES ("李五", "中国 北京")
INSERT INTO TMP VALUES ("张小五", "北京市")
FOR i = 1 TO RECCOUNT()
GO i
c姓名 = 姓名
c省份 = 省份
bTag = .F.
SKIP
SCAN REST
IF (DIFFERENCE(c姓名, 姓名) == 4) AND (DIFFERENCE(c省份, 省份) == 4)
IF !bTag
?
? c姓名, c省份
bTag = .T.
ENDIF
? 姓名, 省份
ENDIF
ENDSCAN
ENDFOR
程序代码:
CLEAR
CREATE CURSOR TMP (姓名 C(10), 省份 C(10), 标志 L)
INSERT INTO TMP VALUES ("张小三", "北京", .F.)
INSERT INTO TMP VALUES ("张小四", "北京市", .F.)
INSERT INTO TMP VALUES ("王小二", "天津", .F.)
INSERT INTO TMP VALUES ("王小四", "河北", .F.)
INSERT INTO TMP VALUES ("张三", "河北省", .F.)
INSERT INTO TMP VALUES ("王小三", "天津市", .F.)
INSERT INTO TMP VALUES ("赵四", "上海", .F.)
INSERT INTO TMP VALUES ("陈小小", "云南", .F.)
INSERT INTO TMP VALUES ("陈一二", "广西", .F.)
INSERT INTO TMP VALUES ("陈小二", "云南省", .F.)
INSERT INTO TMP VALUES ("李五", "中国 北京", .F.)
INSERT INTO TMP VALUES ("张小五", "北京市", .F.)
FOR i = 1 TO RECCOUNT()
GO i
c姓名 = 姓名
c省份 = 省份
bTag = .F.
SKIP
SCAN REST FOR !标志
IF (DIFFERENCE(c姓名, 姓名) == 4) AND (DIFFERENCE(c省份, 省份) == 4)
IF !bTag
?
? c姓名, c省份
bTag = .T.
ENDIF
REPLACE 标志 WITH .T.
? 姓名, 省份
ENDIF
ENDSCAN
ENDFOR