To find similar words (lines) in two files

I need to check the similarity of word 1 in file 1 with word 2 in file 2, etc. if word 1 (file 1). equal to word 2 (file 2), then file 3 will be displayed on the True and False screen. Below is the encoding, but I get stuck when there is no error, but does not give a way out. I am new to JAVA.

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Scanner;

public class test2 {

    private static ArrayList<String> load(String f1) throws FileNotFoundException {
        Scanner reader = new Scanner(new File(f1));
        ArrayList<String> out = new ArrayList<String>();
        while (reader.hasNext()) {
            String temp = reader.nextLine();
            String[] sts = temp.split(" ");
            for (int i = 0; i < sts.length; i++) {
                if (sts[i].equals("") && sts[i].equals(" ") && sts[i].equals("\n")) {
                    out.add(sts[i]);
                }
            }
        }
        return out;
    }

    private static void write(ArrayList<String> out, String fname) throws IOException {
        FileWriter writer = new FileWriter(new File("out_test2.txt"));
        for (int i = 0; i < out.size(); i++) {
            writer.write(out.get(i) + "\n");
        }
        writer.close();
    }

    public static void main(String[] args) throws IOException {
        ArrayList<String> file1;
        ArrayList<String> file2;
        ArrayList<String> out = new ArrayList<String>();
        file1 = load("IbanDict.txt");
        file2 = load("AFF_outVal.txt");

        for (int i = 0; i < file1.size(); i++) {
            String word1 = file1.get(i);
            for (int z = 0; z < file2.size(); z++) {
                if (word1.equalsIgnoreCase(file2.get(z))) {
                    boolean already = false;
                    for (int q = 0; q < out.size(); q++) {
                        if (out.get(q).equalsIgnoreCase(file1.get(i))) {
                            already = true;
                        }
                    }
                    if (already == false) {
                        out.add(file1.get(i));
                    }
                }
            }
        }
        write(out, "out_test2.txt");
    }

}
+5
source share
4 answers

First, it Scannerwill fake your string for you. There is no need to read in a line and tokenize using the method String.split; here .

Secondly, there seems to be a logical error here:

for (int i = 0; i < sts.length; i++) {
    if (sts[i].equals("") && sts[i].equals(" ")
            && sts[i].equals("\n"))
       out.add(sts[i]);
}

(if I understand what you are trying to do), it should be:

for (int i = 0; i < sts.length; i++) {
    if (!(sts[i].equals("") && sts[i].equals(" ") && sts[i]
           .equals("\n")))
       out.add(sts[i]);
}

.

. (); , awk Python ( , Java). Java, FilterReader/Writer, .

+2

import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileWriter;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class Test {

  private static final Pattern WORD_PATTERN = Pattern.compile("[\\w']+");

  private static Map<String, Integer> load(final String f1) throws FileNotFoundException {
    Scanner reader = new Scanner(new File(f1));
    Map<String, Integer> out = new HashMap<String, Integer>();
    while (reader.hasNext()) {
      String tempLine = reader.nextLine();
      if (tempLine != null && tempLine.trim().length() > 0) {
        Matcher matcher = WORD_PATTERN.matcher(tempLine);
        while (matcher.find()) {
          out.put(matcher.group().toLowerCase(), 0);
        }
      }
    }

    return out;
  }

  private static void write(final Map<String, Integer> out, final String fname) throws IOException {
    FileWriter writer = new FileWriter(new File(fname));
    for (Map.Entry<String, Integer> word : out.entrySet()) {
      if (word.getValue() == 1) {
        writer.write(word.getKey() + "\n");
      }
    }
    writer.close();
  }

  public static void main(final String[] args) throws IOException {
    Map<String, Integer> file1 = load("file1.txt");
    Map<String, Integer> file2 = load("file2.txt");

    // below for loop will run just one time, so it is much faster
    for (Map.Entry<String, Integer> file1Word : file1.entrySet()) {
      if (file2.containsKey(file1Word.getKey())) {
        file1.put(file1Word.getKey(), 1);
        file2.put(file1Word.getKey(), 1);
      }
    }

    write(file1, "test1.txt");
    write(file2, "test2.txt");
  }

}
+2

, . wulfgar.pro.

, Scanner , file1 " " "", file2 " ".

, Sets, , , , . for-each ( generics, ).

, while load:

private static final Pattern PUNCTUATION_PATTERN = Pattern.compile("[\\w']+");

private static Set<String> load(String f1) throws FileNotFoundException {
    Scanner reader = new Scanner(new File(f1));
    Set<String> out = new HashSet<String>();
    while (reader.hasNext()) {
        String tempLine = reader.nextLine();
        if (tempLine != null
                && tempLine.trim().length() > 0) {
            Matcher matcher = PUNCTUATION_PATTERN.matcher(tempLine);
            while (matcher.find()) {
                out.add(tempLine.substring(matcher.start(), matcher.end()));
            }
        }
    }
    return out;
}

for main :

public static void main(String[] args) throws IOException {
    Set<String> out = new HashSet<String>();
    Set<String> file1 = load("IbanDict.txt");
    Set<String> file2 = load("AFF_outVal.txt");

    for (String word1 : file1) {
        for (String word2 : file2) {
            if (word1.equalsIgnoreCase(word2)) {
                boolean already = false;
                for (String outStr : out) {
                    if (outStr.equalsIgnoreCase(word1)) {
                        already = true;
                    }
                }
                if (!already) {
                    out.add(word1);
                }
            }
        }
    }
    write(out, "out_test2.txt");
}

write File.separator :

private static void write(Iterable<String> out, String fname) throws IOException {
    OutputStreamWriter writer = new FileWriter(new File(fname));
    for (String s : out) {
        writer.write(s + File.separator);
    }
    writer.close();
}
+1

, , 2 1. , true, false.

, , , , , 1. 2 , .

. sts, , "" . , ArrayList. , .

Scanner reader = new Scanner(new File(f1));
ArrayList<String> out = new ArrayList<String>();
while (reader.hasNext()) {
  String temp = reader.nextLine();    
  String[] sts = temp.split(" ");
  for (int i = 0; i < sts.length; i++) {
    if (sts[i].equals("") && sts[i].equals(" ") && sts[i].equals("\n")) {
      out.add(sts[i]);
    }
  }
}

, , arraylist

while (reader.hasNext()) {
 out.add(reader.next());
}

, , .

, 2 ,

dictionary.contains(file2.get(i))

equals ArrayList, , .

, , 2 . , 2 Scanner.

. , hasNextLine() hasNext() , hasNextLine() , .

line = reader.nextLine();

, true false + ,

String[] splitLine = line.split(" "); 
for(String token: splitLine){    
  writer.write(dictionary.contains(file2.get(i))+" ");
}

, .

:

public class Test{

  private static List<String> loadDictionary(String fileName) throws FileNotFoundException {
    Scanner reader = new Scanner(new File(fileName));
    List<String> out = new ArrayList<String>();
    while (reader.hasNext()) {
      out.add(reader.next());
    }
    reader.close();
    return out;
  }

  public static void main(String[] args) throws IOException {
    List<String> dictionary;
    dictionary = loadDictionary("IbanDict.txt");

    Scanner reader = new Scanner(new File("AFF_outVal.txt"));
    OutputStreamWriter writer = new FileWriter(new File("out_test2.txt"));

    while(reader.hasNextLine()){
      String line = reader.nextLine();
      String[] tokens = line.split(" ");
      for(String token: tokens){
        writer.write(dictionary.contains(token)+" ");
      }
      writer.write(System.getProperty("line.separator"));
    }
    writer.close();
    reader.close();
  }
}
0

All Articles