/*
 * Decompiled with CFR 0.152.
 */
package com.googlecode.clearnlp.morphology;

import com.googlecode.clearnlp.util.pair.Pair;
import java.util.ArrayList;
import java.util.List;
import jregex.MatchResult;
import jregex.Pattern;
import jregex.Replacer;
import jregex.Substitution;
import jregex.TextBuffer;

public class MPLib {
    public static final java.util.regex.Pattern PUNCT_CHAR = java.util.regex.Pattern.compile("\\p{Punct}");
    public static final java.util.regex.Pattern PUNCT_ONLY = java.util.regex.Pattern.compile("^\\p{Punct}+$");
    public static final java.util.regex.Pattern PUNCT_PERIOD = java.util.regex.Pattern.compile("^(\\.|\\?|\\!)+$");
    public static final Pattern PUNCT_REPEAT = new Pattern("\\.{2,}|\\!{2,}|\\?{2,}|\\-{2,}|\\*{2,}|\\={2,}|\\~{2,}|\\,{2,}");
    public static final Replacer PUNCT_REPEAT_REPLACE = PUNCT_REPEAT.replacer(new Substitution(){

        public void appendSubstitution(MatchResult match, TextBuffer dest) {
            char c = match.group(0).charAt(0);
            dest.append(c);
            dest.append(c);
        }
    });
    public static final java.util.regex.Pattern DIGIT_SPAN = java.util.regex.Pattern.compile("\\d+");
    public static final java.util.regex.Pattern DIGIT_ONLY = java.util.regex.Pattern.compile("^\\d+$");
    public static final java.util.regex.Pattern DIGIT_LIKE = java.util.regex.Pattern.compile("\\d%|\\$\\d|(^|\\d)\\.\\d|\\d,\\d|\\d:\\d|\\d-\\d|\\d\\/\\d");
    public static final java.util.regex.Pattern ALPHA_CHAR = java.util.regex.Pattern.compile("\\p{Alpha}");
    public static final java.util.regex.Pattern ALNUM_CHAR = java.util.regex.Pattern.compile("\\p{Alnum}");
    public static final java.util.regex.Pattern WHITE_SPAN = java.util.regex.Pattern.compile("\\s+");
    public static final Pattern URL_SPAN = new Pattern("((([A-Za-z]{3,9}:(?:\\/\\/)?)(?:[-;:&=\\+\\$,\\w]+@)?[A-Za-z0-9.-]+|(?:www.|[-;:&=\\+\\$,\\w]+@)[A-Za-z0-9.-]+)((?:\\/[\\+~%\\/.\\w-_]*)?\\??(?:[-\\+=&;%@.\\w_]*)#?(?:[.\\!\\/\\\\w]*))?|(\\w+\\.)+(com|edu|gov|int|mil|net|org|biz)$)");
    public static final java.util.regex.Pattern FILE_EXTS = java.util.regex.Pattern.compile("\\S+\\.(3gp|7z|ace|ai(f){0,2}|amr|asf|asp(x)?|asx|avi|bat|bin|bmp|bup|cab|cbr|cd(a|l|r)|chm|dat|divx|dll|dmg|doc|dss|dvf|dwg|eml|eps|exe|fl(a|v)|gif|gz|hqx|(s)?htm(l)?|ifo|indd|iso|jar|jsp|jp(e)?g|lnk|log|m4(a|b|p|v)|mcd|mdb|mid|mov|mp(2|3|4)|mp(e)?g|ms(i|wmm)|ogg|pdf|php|png|pps|ppt|ps(d|t)?|ptb|pub|qb(b|w)|qxd|ra(m|r)|rm(vb)?|rtf|se(a|s)|sit(x)?|sql|ss|swf|tgz|tif|torrent|ttf|txt|vcd|vob|wav|wm(a|v)|wp(d|s)|xls|xml|xtm|zip)$");
    protected static final List<Pair<java.util.regex.Pattern, String>> BRACKET_LIST = new ArrayList<Pair<java.util.regex.Pattern, String>>(){
        {
            this.add(new Pair<java.util.regex.Pattern, String>(java.util.regex.Pattern.compile("-LRB-"), "("));
            this.add(new Pair<java.util.regex.Pattern, String>(java.util.regex.Pattern.compile("-RRB-"), ")"));
            this.add(new Pair<java.util.regex.Pattern, String>(java.util.regex.Pattern.compile("-LSB-"), "["));
            this.add(new Pair<java.util.regex.Pattern, String>(java.util.regex.Pattern.compile("-RSB-"), "]"));
            this.add(new Pair<java.util.regex.Pattern, String>(java.util.regex.Pattern.compile("-LCB-"), "{"));
            this.add(new Pair<java.util.regex.Pattern, String>(java.util.regex.Pattern.compile("-RCB-"), "}"));
            this.trimToSize();
        }
    };

    public static boolean containsURL(String str) {
        return URL_SPAN.matcher(str).find();
    }

    public static String[] splitWhiteSpaces(String str) {
        return WHITE_SPAN.split(str);
    }

    public static String normalizeBasic(String form) {
        if (MPLib.containsURL(form)) {
            return "#url#";
        }
        form = MPLib.normalizeDigits(form);
        form = MPLib.normalizePunctuation(form);
        return form;
    }

    public static String normalizeDigits(String form) {
        form = DIGIT_LIKE.matcher(form).replaceAll("0");
        return DIGIT_SPAN.matcher(form).replaceAll("0");
    }

    public static String normalizePunctuation(String form) {
        return PUNCT_REPEAT_REPLACE.replace(form);
    }

    public static String revertBracket(String form) {
        for (Pair<java.util.regex.Pattern, String> p : BRACKET_LIST) {
            form = ((java.util.regex.Pattern)p.o1).matcher(form).replaceAll((String)p.o2);
        }
        return form;
    }

    public static boolean containsAnyPunctuation(String form) {
        return PUNCT_CHAR.matcher(form).find();
    }

    public static boolean containsOnlyPunctuation(String form) {
        return PUNCT_ONLY.matcher(form).find();
    }

    public static boolean containsAnySpecificPunctuation(String form, char ... punctuation) {
        int size = form.length();
        for (int i = 0; i < size; ++i) {
            for (char p : punctuation) {
                if (form.charAt(i) != p) continue;
                return true;
            }
        }
        return false;
    }

    public static boolean containsOnlyDigits(String form) {
        return DIGIT_ONLY.matcher(form).find();
    }

    public static boolean isPeriodLike(String form) {
        if (PUNCT_PERIOD.matcher(form).find()) {
            return true;
        }
        if (form.length() > 1 && form.charAt(0) == '/') {
            return PUNCT_PERIOD.matcher(form.substring(1)).find();
        }
        return false;
    }

    public static boolean isAlpha(String form) {
        return ALPHA_CHAR.matcher(form).find();
    }

    public static boolean isAlnum(String form) {
        return ALNUM_CHAR.matcher(form).find();
    }
}

