Tuesday, 26 November 2013

Working example - check if word exist - using wiki url for checking

Here is working example how check if our word real exist.
We will use for it wikipedia url. If we put to url our wolrd and if page opens it, it will means word exist.
However If we do it by in example firefox we will see text "we do not have much information about this, do you want add some for it?" or something like that.

If we use it by writting in code, response would be quite different. We would get responseCode which is very helpful.

/**
 * Created with IntelliJ IDEA.
 * User: Michall
 * Date: 23.11.13
 * Time: 17:58
 * To change this template use File | Settings | File Templates.
 */
public class WikiBig {

    private int cc = 0;

    public static void main(String[] args) {
        new WikiBig().doStuff();
    }

    /**
     * We set path to words to check if any from the list exist
     */
    private void doStuff() {
        CookieHandler.setDefault(new CookieManager(null, CookiePolicy.ACCEPT_ALL));

        //word per line like (in polish language) example:
        //dom
        //kot
        //drzew
        //13921hx9x1 <- this will not exist so wiki will return us 400 and text will not be appendted to list of real words
        File file = new File("C:\\temp\\words.txt");
        loadTextFromFIle(file.getAbsolutePath());

    }

    /**
     * Unused but it converts con.getInputstream to string
     * @param is
     * @return
     */
    static String convertStreamToString(java.io.InputStream is) {
        java.util.Scanner s = new java.util.Scanner(is).useDelimiter("\\A");
        return s.hasNext() ? s.next() : "";
    }

    /**
     * for every word we take from file we add it to url
     * if wiki returns us 200 it means word exists
     * if not 400
     *
     * If we find existing word we will append it to new file with words existing
     * @param path
     */
    private void loadTextFromFIle(String path) {
        try {

            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(
                    path), "cp1250"));

            String strLine;
            // Read File Line By Line
            while (((strLine = br.readLine()) != null)) {
                askWordWiki(strLine);
            }
            br.close();


        } catch (FileNotFoundException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        } catch (IOException e) {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
    }

    /**
     * save text to list of existing words (to file of course)
     * @param strLine
     */
    private void askWordWiki(final String strLine) {
        if (cc > 12229)
        {
            try {

                String wordWithUrl = "http://pl.wiktionary.org/wiki/" + strLine;
                System.out.print(".");
                URL obj = null;

                obj = new URL(wordWithUrl);

                HttpURLConnection con = (HttpURLConnection) obj.openConnection();
                con.setRequestMethod("GET");
                int responseCode = con.getResponseCode();
                if (responseCode == 200) {
                    System.out.println(strLine);
                    writeAppendToFile(strLine);
                }
            } catch (MalformedURLException e) {
                askWordWiki(strLine);//prevent from timeout
            } catch (ProtocolException e) {
                askWordWiki(strLine);//prevent from timeout
            } catch (IOException e) {
                askWordWiki(strLine);//prevent from timeout
            }
        }
    }

    private void writeAppendToFile(String strLine) throws IOException {

        BufferedWriter bw = new BufferedWriter(new FileWriter("C:\\temp\\foundwords.txt", true));
        bw.append(strLine);
        bw.newLine();
        bw.close();
    }
}