You've already forked macports-ports
mirror of
https://github.com/encounter/macports-ports.git
synced 2026-03-30 11:29:27 -07:00
ab16d51d56
git-svn-id: https://svn.macports.org/repository/macports/trunk/dports@90590 d073be05-634f-4543-b044-5fe20cf6d1d6
33 lines
1.6 KiB
Diff
33 lines
1.6 KiB
Diff
--- core/src/dualist/pipes/DocumentPipe.java.orig 2012-02-11 05:07:28.000000000 +0900
|
|
+++ core/src/dualist/pipes/DocumentPipe.java 2012-02-22 22:44:45.000000000 +0900
|
|
@@ -13,6 +13,8 @@
|
|
import cc.mallet.pipe.TokenSequenceRemoveStopwords;
|
|
import cc.mallet.types.Instance;
|
|
|
|
+import dualist.pipes.SimpleMecabPipe;
|
|
+
|
|
public class DocumentPipe extends Pipe {
|
|
|
|
private Pipe myPipe = new SerialPipes(new Pipe[] {
|
|
@@ -24,6 +26,9 @@
|
|
new CharSequenceReplace(Pattern.compile("&(.*?);"), ""),
|
|
new CharSequenceReplace(Pattern.compile("[0-9]+"), "00"),
|
|
new CharSequenceLowercase(),
|
|
+ (System.getProperty("dualist.lang") != null &&
|
|
+ System.getProperty("dualist.lang").equals("ja")) ?
|
|
+ new SimpleMecabPipe() :
|
|
// new CharSequence2TokenSequence(CharSequenceLexer.LEX_WORD_CLASSES),
|
|
new CharSequence2TokenSequence("[\\p{L}\\p{Mn}]+"),
|
|
new TokenSequenceRemoveStopwords(),
|
|
--- build.xml.orig 2012-03-08 23:07:56.000000000 +0900
|
|
+++ build.xml 2012-03-09 09:32:14.000000000 +0900
|
|
@@ -26,7 +26,7 @@
|
|
<target name="compile">
|
|
<mkdir dir="${classes.dir}"/>
|
|
<!-- <javac srcdir="${src.dir}" destdir="${classes.dir}" classpathref="classpath"/> -->
|
|
- <javac debug="true" debuglevel="lines,vars,source" srcdir="${src.dir}" destdir="${classes.dir}" classpathref="classpath"/>
|
|
+ <javac debug="true" debuglevel="lines,vars,source" srcdir="${src.dir}" destdir="${classes.dir}" classpathref="classpath" encoding="UTF-8"/>
|
|
</target>
|
|
|
|
<target name="jar" depends="compile">
|