package ws.palladian.extraction.feature;

import com.aliasi.util.Strings;
import edu.stanford.nlp.process.PTBLexer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import ws.palladian.extraction.content.PageContentExtractorException;
import ws.palladian.extraction.content.PalladianContentExtractor;
import ws.palladian.extraction.token.Tokenizer;
import ws.palladian.helper.StopWatch;
import ws.palladian.helper.collection.CollectionHelper;
import ws.palladian.helper.collection.CountMap;
import ws.palladian.helper.io.FileHelper;
import ws.palladian.retrieval.DocumentRetriever;
import ws.palladian.retrieval.RetrieverCallback;

/* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/feature/InformativenessAssigner.class */
public class InformativenessAssigner {
    private static final Logger LOGGER = LoggerFactory.getLogger(InformativenessAssigner.class);
    private HashMap<String, Double> tokenFrequencies;
    private Map<String, Double> normalizedTokenFrequencies;

    /* loaded from: input_file:lib/palladian.jar:ws/palladian/extraction/feature/InformativenessAssigner$SingletonHolder.class */
    static class SingletonHolder {
        static InformativenessAssigner instance = new InformativenessAssigner();

        SingletonHolder() {
        }
    }

    private InformativenessAssigner() {
        this.tokenFrequencies = CollectionHelper.newHashMap();
        this.normalizedTokenFrequencies = CollectionHelper.newHashMap();
    }

    public static InformativenessAssigner getInstance() {
        return SingletonHolder.instance;
    }

    public void loadFrequencyMap() {
        this.tokenFrequencies = (HashMap) FileHelper.deserialize("data/temp/tokenFrequencyMap.gz");
        normalizeFrequencyMap();
    }

    public void saveFrequencyMap() {
        FileHelper.serialize(this.tokenFrequencies, "data/temp/tokenFrequencyMap.gz");
    }

    private void normalizeFrequencyMap() {
        double d = 0.0d;
        for (Map.Entry<String, Double> entry : this.tokenFrequencies.entrySet()) {
            if (entry.getKey().length() > 1 && entry.getValue().doubleValue() > d) {
                d = entry.getValue().doubleValue();
            }
        }
        for (Map.Entry<String, Double> entry2 : this.tokenFrequencies.entrySet()) {
            this.normalizedTokenFrequencies.put(entry2.getKey().toString(), Double.valueOf(entry2.getValue().doubleValue() / d));
        }
    }

    public void initTokenFrequencyMap() {
        CountMap create = CountMap.create();
        for (int i = 0; i < 2; i++) {
            List<String> texts = getTexts();
            int i2 = 0;
            Iterator<String> it = texts.iterator();
            while (it.hasNext()) {
                List<String> list = Tokenizer.tokenize(it.next());
                Iterator<String> it2 = list.iterator();
                while (it2.hasNext()) {
                    create.add(it2.next());
                }
                i2 += list.size();
            }
            Iterator it3 = create.uniqueItems().iterator();
            while (it3.hasNext()) {
                this.tokenFrequencies.put((String) it3.next(), Double.valueOf(create.getCount(r0) / i2));
            }
            LOGGER.debug("added another set of " + texts.size() + " texts, number of tokens now " + this.tokenFrequencies.keySet().size());
            if ((i + 1) % 10 == 0) {
                LOGGER.debug("saving frequency map (i = " + i + PTBLexer.ptb3EllipsisStr);
                saveFrequencyMap();
            }
        }
        saveFrequencyMap();
        FileHelper.writeToFile("data/temp/tfmap.txt", CollectionHelper.getPrint(create.getSortedMap().entrySet()));
    }

    private List<String> getTexts() {
        StopWatch stopWatch = new StopWatch();
        final ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (int i = 0; i < 2; i++) {
            arrayList2.add("http://en.wikipedia.org/wiki/Special:Random?a=" + Math.random());
            arrayList2.add("http://random.yahoo.com/bin/ryl?a=" + Math.random());
            arrayList2.add("http://www.randomwebsite.com/cgi-bin/random.pl?a=" + Math.random());
        }
        new DocumentRetriever().getWebDocuments(arrayList2, new RetrieverCallback<Document>() { // from class: ws.palladian.extraction.feature.InformativenessAssigner.1
            @Override // ws.palladian.retrieval.RetrieverCallback
            public void onFinishRetrieval(Document document) {
                PalladianContentExtractor palladianContentExtractor = new PalladianContentExtractor();
                try {
                    palladianContentExtractor.setDocument(document);
                } catch (PageContentExtractorException e) {
                    e.printStackTrace();
                }
                arrayList.add(palladianContentExtractor.getSentencesString());
            }
        });
        LOGGER.info("got " + arrayList.size() + " texts in " + stopWatch.getElapsedTimeString());
        return arrayList;
    }

    public String tagText(String str) {
        List<String> list = Tokenizer.tokenize(str);
        CountMap create = CountMap.create();
        Iterator<String> it = list.iterator();
        while (it.hasNext()) {
            create.add(it.next());
        }
        int i = 1;
        Iterator it2 = create.uniqueItems().iterator();
        while (it2.hasNext()) {
            int count = create.getCount((String) it2.next());
            if (count > i) {
                i = count;
            }
        }
        HashMap hashMap = new HashMap();
        Iterator it3 = create.uniqueItems().iterator();
        while (it3.hasNext()) {
            hashMap.put((String) it3.next(), Double.valueOf(create.getCount(r0) / i));
        }
        StringBuilder sb = new StringBuilder();
        for (String str2 : list) {
            sb.append("<token style=\"color:hsl(").append(Math.round(255.0d * getInformativeness(str2))).append(",100%, 35%)\">");
            sb.append(str2);
            sb.append("</token>");
            sb.append(Strings.SINGLE_SPACE_STRING);
        }
        return sb.toString();
    }

    public double getInformativeness(String str) {
        double d = 1.0d;
        Double d2 = this.normalizedTokenFrequencies.get(str);
        if (d2 != null) {
            d = d2.doubleValue();
        }
        return d;
    }

    public String removeWordsWithLowInformativeness(String str, double d) {
        String str2 = "";
        for (String str3 : str.split("\\s")) {
            if (getInformativeness(str3) > d) {
                str2 = str2 + str3 + Strings.SINGLE_SPACE_STRING;
            }
        }
        return str2.trim();
    }

    public void saveAsHTML(String str, String str2) {
        StringBuilder sb = new StringBuilder();
        sb.append("<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01 Transitional//EN\">");
        sb.append("<html>");
        sb.append("<head>");
        sb.append("<title>Informativeness Tagged Text</title>");
        sb.append("</head>");
        sb.append("<body>");
        sb.append(str);
        sb.append("</body>");
        sb.append("</html>");
        FileHelper.writeToFile(str2, sb);
    }

    public static void main(String[] strArr) {
        StopWatch stopWatch = new StopWatch();
        InformativenessAssigner informativenessAssigner = new InformativenessAssigner();
        informativenessAssigner.initTokenFrequencyMap();
        System.exit(0);
        informativenessAssigner.loadFrequencyMap();
        informativenessAssigner.saveAsHTML(informativenessAssigner.tagText("Superman is a fictional character, a comic book superhero appearing in publications by DC Comics, widely considered to be an American cultural icon. Created by American writer Jerry Siegel and Canadian-born American artist Joe Shuster in 1932 while both were living in Cleveland, Ohio, and sold to Detective Comics, Inc. (later DC Comics) in 1938, the character first appeared in Action Comics #1 (June 1938) and subsequently appeared in various radio serials, television programs, films, newspaper strips, and video games. With the success of his adventures, Superman helped to create the superhero genre and establish its primacy within the American comic book. The character's appearance is distinctive and iconic: a blue, red and yellow costume, complete with cape, with a stylized 'S' shield on his chest. This shield is now typically used across media to symbolize the character.<br><br><br>Dom Cobb (Leonardo DiCaprio) and his partner Arthur (Joseph Gordon-Levitt) perform an illegal corporate espionage by entering the subconscious minds of their targets, using two-level \"dream within a dream\" strategies to \"extract\" valuable information. Each of the \"extractors\" carries a \"totem\", a personalized small object whose behavior is unpredictable to anyone except its owner, to determine if they are within another person's dream. Cobb's totem is a spinning top which perpetually spins in the dream state. Cobb struggles with memories of his dead wife, Mal (Marion Cotillard), who manifests within his dreams and tries to sabotage his efforts. Cobb is approached by the wealthy Mr. Saito (Ken Watanabe), Cobb's last extraction target, asking them to perform the act of \"inception\", planting an idea within the person's subconscious mind. Saito wishes to break up the vast energy empire of his competitor, the ailing Maurice Fischer (Pete Postlethwaite), by suggesting this idea to his son Robert Fischer (Cillian Murphy) who will inherit the empire when his father dies. Should Cobb succeed, Saito promises to use his influence to clear Cobb of the murder charges for his wife's death, allowing Cobb to re-enter the United States and reunite with his children. Cobb assembles his team: Eames (Tom Hardy), an identity forger; Yusuf (Dileep Rao), a chemist who concocts the powerful sedative needed to stabilize the layers of the shared dream; and Ariadne (Ellen Page), a young student architect tasked with designing the labyrinth of the dream landscapes. Saito insists on joining the team as an observer and to assure the job is completed. While planning the inception, Ariadne learns of the guilt Cobb struggles with from Mal's suicide and his separation from his children when he fled the country as a fugitive. The job is set into motion when Maurice Fischer dies and his son accompanies his father's body from Sydney to Los Angeles. During the flight, Cobb sedates Fischer, and the team bring him into a three-level shared dream. At each stage, the member of the team who is \"creating\" the dream remains while the other team members fall asleep within the dream to travel further down into Fischer's subconscious. The dreamers will then ride a synchronized system of \"kicks\" (a car diving off a bridge, a falling elevator, and a collapsing building) back up the levels to wake up to reality. In the first level, Yusuf's dream of a rainy city, the team successfully abducts Fischer, but the team is attacked by Fischer's militarized subconscious projections, which have been trained to hunt and kill extractors. Saito is mortally wounded during the shoot-out, but due to the strength of Yusuf's sedative, dying in the dream will send them into limbo, a deep subconscious level where they may lose their grip on reality and be trapped indefinitely. Eames takes the appearance of Fischer's godfather Peter Browning (Tom Berenger) to suggest that he reconsider his opinion of his father's will. Yusuf remains on the first level driving a van through the streets, while the remaining characters enter Arthur's dream, taking place in a corporate hotel. Cobb turns Fischer against Browning and persuades him to join the team as Arthur runs point, and they descend to the third dream level, a snowy mountain fortress dreamed by Eames, which Fischer is told represents Browning's subconscious. Yusuf's evasive driving on the first level manifests as distorted gravity effects on the second and an avalanche on the third. Saito succumbs to his wounds, and Cobb's projection of Mal sabotages the plan by shooting Fischer dead.[11] Cobb and Ariadne elect to enter limbo to find Fischer and Saito. There, Cobb confronts his projection of Mal, who tries to convince him to stay with her and his kids in limbo. Cobb refuses and confesses that he was responsible for Mal's suicide: to help her escape from limbo during a shared dream experience, he inspired in her the idea that her world wasn't real. Once she had returned to reality, she became convinced that she was still dreaming and needed to die in order to wake up. Through his confession, Cobb attains catharsis and chooses to remain in limbo to search for Saito; Eames defibrillates Fischer to bring him back up to the third-level mountain fortress, where he enters a safe room and discovers and accepts the idea to split up his father's business empire. Leaving Cobb behind, the team members escape by riding the kicks back up the levels of the dream. Cobb eventually finds an elderly Saito who has been waiting in limbo for decades in dream time (just a few hours in real time), and the two help each other to remember their arrangement. The team awakens on the flight; Saito arranges for Cobb to get through U.S. customs, and he goes home to reunite with his children. Cobb uses his spinning top to test reality but is distracted by his children before he sees the result."), "data/temp/taggedInformativeness.html");
        LOGGER.info("process took " + stopWatch.getElapsedTimeString());
    }
}
