/*
 * Decompiled with CFR 0.152.
 */
package org.apache.nutch.crawl;

import com.fasterxml.jackson.core.JsonGenerationException;
import com.fasterxml.jackson.core.JsonGenerator;
import com.fasterxml.jackson.core.PrettyPrinter;
import com.fasterxml.jackson.core.util.MinimalPrettyPrinter;
import com.fasterxml.jackson.databind.JsonSerializer;
import com.fasterxml.jackson.databind.Module;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.ObjectWriter;
import com.fasterxml.jackson.databind.SerializerProvider;
import com.fasterxml.jackson.databind.module.SimpleModule;
import com.tdunning.math.stats.MergingDigest;
import com.tdunning.math.stats.TDigest;
import java.io.Closeable;
import java.io.DataOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.lang.invoke.MethodHandles;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Random;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.jexl3.JexlScript;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BooleanWritable;
import org.apache.hadoop.io.ByteWritable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.MapFile;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.VIntWritable;
import org.apache.hadoop.io.VLongWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.GzipCodec;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Partitioner;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.MapFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.partition.HashPartitioner;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.crawl.NutchWritable;
import org.apache.nutch.util.AbstractChecker;
import org.apache.nutch.util.JexlUtil;
import org.apache.nutch.util.NutchConfiguration;
import org.apache.nutch.util.NutchJob;
import org.apache.nutch.util.SegmentReaderUtil;
import org.apache.nutch.util.StringUtil;
import org.apache.nutch.util.TimingUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CrawlDbReader
extends AbstractChecker
implements Closeable {
    private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    private MapFile.Reader[] readers = null;
    protected String crawlDb;
    private long lastModified = 0L;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void openReaders(String crawlDb, Configuration config) throws IOException {
        Path crawlDbPath = new Path(crawlDb, "current");
        FileStatus stat = crawlDbPath.getFileSystem(config).getFileStatus(crawlDbPath);
        long lastModified = stat.getModificationTime();
        CrawlDbReader crawlDbReader = this;
        synchronized (crawlDbReader) {
            if (this.readers != null) {
                if (this.lastModified == lastModified) {
                    return;
                }
                this.closeReaders();
            }
            this.lastModified = lastModified;
            this.readers = MapFileOutputFormat.getReaders((Path)crawlDbPath, (Configuration)config);
        }
    }

    private void closeReaders() {
        if (this.readers == null) {
            return;
        }
        for (int i = 0; i < this.readers.length; ++i) {
            try {
                this.readers[i].close();
                continue;
            }
            catch (Exception exception) {
                // empty catch block
            }
        }
        this.readers = null;
    }

    @Override
    public void close() {
        this.closeReaders();
    }

    private TreeMap<String, Writable> processStatJobHelper(String crawlDb, Configuration config, boolean sort) throws IOException, InterruptedException, ClassNotFoundException {
        Path tmpFolder = new Path(crawlDb, "stat_tmp" + System.currentTimeMillis());
        Job job = Job.getInstance((Configuration)config, (String)("Nutch CrawlDbReader: " + crawlDb));
        config = job.getConfiguration();
        config.setBoolean("db.reader.stats.sort", sort);
        FileInputFormat.addInputPath((Job)job, (Path)new Path(crawlDb, "current"));
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setJarByClass(CrawlDbReader.class);
        job.setMapperClass(CrawlDbStatMapper.class);
        job.setCombinerClass(CrawlDbStatReducer.class);
        job.setReducerClass(CrawlDbStatReducer.class);
        FileOutputFormat.setOutputPath((Job)job, (Path)tmpFolder);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(NutchWritable.class);
        config.setBoolean("mapreduce.fileoutputcommitter.marksuccessfuljobs", false);
        FileSystem fileSystem = tmpFolder.getFileSystem(config);
        try {
            boolean success = job.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job);
                LOG.error(message);
                fileSystem.delete(tmpFolder, true);
                throw new RuntimeException(message);
            }
        }
        catch (IOException | ClassNotFoundException | InterruptedException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            fileSystem.delete(tmpFolder, true);
            throw e;
        }
        SequenceFile.Reader[] readers = SegmentReaderUtil.getReaders(tmpFolder, config);
        Text key = new Text();
        NutchWritable value = new NutchWritable();
        TreeMap<String, Writable> stats = new TreeMap<String, Writable>();
        for (int i = 0; i < readers.length; ++i) {
            SequenceFile.Reader reader = readers[i];
            while (reader.next((Writable)key, (Writable)value)) {
                String k = key.toString();
                Writable val = (Writable)stats.get(k);
                if (val == null) {
                    stats.put(k, value.get());
                    continue;
                }
                if (k.equals("sc")) {
                    float min = Float.MAX_VALUE;
                    float max = Float.MIN_VALUE;
                    min = stats.containsKey("scn") ? ((FloatWritable)stats.get("scn")).get() : ((FloatWritable)stats.get("sc")).get();
                    max = stats.containsKey("scx") ? ((FloatWritable)stats.get("scx")).get() : ((FloatWritable)stats.get("sc")).get();
                    float fvalue = ((FloatWritable)value.get()).get();
                    if (min > fvalue) {
                        min = fvalue;
                    }
                    if (max < fvalue) {
                        max = fvalue;
                    }
                    stats.put("scn", (Writable)new FloatWritable(min));
                    stats.put("scx", (Writable)new FloatWritable(max));
                    continue;
                }
                if (k.equals("ft") || k.equals("fi")) {
                    long min = Long.MAX_VALUE;
                    long max = Long.MIN_VALUE;
                    String minKey = k + "n";
                    String maxKey = k + "x";
                    if (stats.containsKey(minKey)) {
                        min = ((LongWritable)stats.get(minKey)).get();
                    } else if (stats.containsKey(k)) {
                        min = ((LongWritable)stats.get(k)).get();
                    }
                    if (stats.containsKey(maxKey)) {
                        max = ((LongWritable)stats.get(maxKey)).get();
                    } else if (stats.containsKey(k)) {
                        max = ((LongWritable)stats.get(k)).get();
                    }
                    long lvalue = ((LongWritable)value.get()).get();
                    if (min > lvalue) {
                        min = lvalue;
                    }
                    if (max < lvalue) {
                        max = lvalue;
                    }
                    stats.put(k + "n", (Writable)new LongWritable(min));
                    stats.put(k + "x", (Writable)new LongWritable(max));
                    continue;
                }
                if (k.equals("sct")) {
                    FloatWritable fvalue = (FloatWritable)value.get();
                    ((FloatWritable)val).set(((FloatWritable)val).get() + fvalue.get());
                    continue;
                }
                if (k.equals("scd")) {
                    MergingDigest tdigest = null;
                    MergingDigest tdig = MergingDigest.fromBytes((ByteBuffer)ByteBuffer.wrap(((BytesWritable)value.get()).getBytes()));
                    if (val instanceof BytesWritable) {
                        tdigest = MergingDigest.fromBytes((ByteBuffer)ByteBuffer.wrap(((BytesWritable)val).getBytes()));
                        tdigest.add((TDigest)tdig);
                    } else {
                        tdigest = tdig;
                    }
                    ByteBuffer tdigestBytes = ByteBuffer.allocate(tdigest.smallByteSize());
                    tdigest.asSmallBytes(tdigestBytes);
                    stats.put(k, (Writable)new BytesWritable(tdigestBytes.array()));
                    continue;
                }
                LongWritable lvalue = (LongWritable)value.get();
                ((LongWritable)val).set(((LongWritable)val).get() + lvalue.get());
            }
            reader.close();
        }
        stats.remove("sc");
        stats.remove("fi");
        stats.remove("ft");
        fileSystem.delete(tmpFolder, true);
        return stats;
    }

    public void processStatJob(String crawlDb, Configuration config, boolean sort) throws IOException, InterruptedException, ClassNotFoundException {
        double[] quantiles = new double[]{0.01, 0.05, 0.1, 0.2, 0.25, 0.3, 0.4, 0.5, 0.6, 0.7, 0.75, 0.8, 0.9, 0.95, 0.99};
        if (config.get("db.stats.score.quantiles") != null) {
            ArrayList<Double> qs = new ArrayList<Double>();
            for (String s : config.getStrings("db.stats.score.quantiles")) {
                try {
                    double d = Double.parseDouble(s);
                    if (d >= 0.0 && d <= 1.0) {
                        qs.add(d);
                    } else {
                        LOG.warn("Skipping quantile {} not in range in db.stats.score.quantiles", (Object)s);
                    }
                }
                catch (NumberFormatException e) {
                    LOG.warn("Skipping bad floating point number {} in db.stats.score.quantiles: {}", (Object)s, (Object)e.getMessage());
                }
                quantiles = new double[qs.size()];
                int i = 0;
                for (Double q : qs) {
                    quantiles[i++] = q;
                }
                Arrays.sort(quantiles);
            }
        }
        LOG.info("CrawlDb statistics start: {}", (Object)crawlDb);
        TreeMap<String, Writable> stats = this.processStatJobHelper(crawlDb, config, sort);
        LOG.info("Statistics for CrawlDb: {}", (Object)crawlDb);
        LongWritable totalCnt = new LongWritable(0L);
        if (stats.containsKey("T")) {
            totalCnt = (LongWritable)stats.get("T");
            stats.remove("T");
        }
        LOG.info("TOTAL urls:\t{}", (Object)totalCnt.get());
        for (Map.Entry<String, Writable> entry : stats.entrySet()) {
            String k = entry.getKey();
            long value = 0L;
            double fvalue = 0.0;
            byte[] bytesValue = null;
            Writable val = entry.getValue();
            if (val instanceof LongWritable) {
                value = ((LongWritable)val).get();
            } else if (val instanceof FloatWritable) {
                fvalue = ((FloatWritable)val).get();
            } else if (val instanceof BytesWritable) {
                bytesValue = ((BytesWritable)val).getBytes();
            }
            if (k.equals("scn")) {
                LOG.info("min score:\t{}", (Object)fvalue);
                continue;
            }
            if (k.equals("scx")) {
                LOG.info("max score:\t{}", (Object)fvalue);
                continue;
            }
            if (k.equals("sct")) {
                LOG.info("avg score:\t{}", (Object)(fvalue / (double)totalCnt.get()));
                continue;
            }
            if (k.equals("scNaN")) {
                LOG.info("score == NaN:\t{}", (Object)value);
                continue;
            }
            if (k.equals("ftn")) {
                LOG.info("earliest fetch time:\t{}", (Object)new Date(60000L * value));
                continue;
            }
            if (k.equals("ftx")) {
                LOG.info("latest fetch time:\t{}", (Object)new Date(60000L * value));
                continue;
            }
            if (k.equals("ftt")) {
                LOG.info("avg of fetch times:\t{}", (Object)new Date(60000L * (value / totalCnt.get())));
                continue;
            }
            if (k.equals("fin")) {
                LOG.info("shortest fetch interval:\t{}", (Object)TimingUtil.secondsToDaysHMS(value));
                continue;
            }
            if (k.equals("fix")) {
                LOG.info("longest fetch interval:\t{}", (Object)TimingUtil.secondsToDaysHMS(value));
                continue;
            }
            if (k.equals("fit")) {
                LOG.info("avg fetch interval:\t{}", (Object)TimingUtil.secondsToDaysHMS(value / totalCnt.get()));
                continue;
            }
            if (k.startsWith("status")) {
                String[] st = k.split(" ");
                int code = Integer.parseInt(st[1]);
                if (st.length > 2) {
                    LOG.info("   {} :\t{}", (Object)st[2], (Object)val);
                    continue;
                }
                LOG.info("{} {} ({}):\t{}", new Object[]{st[0], code, CrawlDatum.getStatusName((byte)code), val});
                continue;
            }
            if (k.equals("scd")) {
                MergingDigest tdigest = MergingDigest.fromBytes((ByteBuffer)ByteBuffer.wrap(bytesValue));
                for (double q : quantiles) {
                    LOG.info("score quantile {}:\t{}", (Object)q, (Object)tdigest.quantile(q));
                }
                continue;
            }
            LOG.info("{}:\t{}", (Object)k, (Object)val);
        }
        LOG.info("CrawlDb statistics: done");
    }

    public CrawlDatum get(String crawlDb, String url, Configuration config) throws IOException {
        Text key = new Text(url);
        CrawlDatum val = new CrawlDatum();
        this.openReaders(crawlDb, config);
        CrawlDatum res = (CrawlDatum)MapFileOutputFormat.getEntry((MapFile.Reader[])this.readers, (Partitioner)new HashPartitioner(), (WritableComparable)key, (Writable)val);
        return res;
    }

    @Override
    protected int process(String line, StringBuilder output) throws Exception {
        Job job = Job.getInstance((Configuration)this.getConf(), (String)("Nutch CrawlDbReader: process " + this.crawlDb));
        Configuration config = job.getConfiguration();
        this.readUrl(this.crawlDb, line, config, output);
        return 0;
    }

    public void readUrl(String crawlDb, String url, Configuration config, StringBuilder output) throws IOException {
        CrawlDatum res = this.get(crawlDb, url, config);
        output.append("URL: " + url + "\n");
        if (res != null) {
            output.append(res);
        } else {
            output.append("not found");
        }
        output.append("\n");
    }

    public void processDumpJob(String crawlDb, String output, Configuration config, String format, String regex, String status, Integer retry, String expr, Float sample) throws IOException, ClassNotFoundException, InterruptedException {
        LOG.info("CrawlDb dump: starting");
        LOG.info("CrawlDb db: {}", (Object)crawlDb);
        Path outFolder = new Path(output);
        Job job = Job.getInstance((Configuration)config, (String)("Nutch CrawlDbReader: dump " + crawlDb));
        Configuration jobConf = job.getConfiguration();
        FileInputFormat.addInputPath((Job)job, (Path)new Path(crawlDb, "current"));
        job.setInputFormatClass(SequenceFileInputFormat.class);
        FileOutputFormat.setOutputPath((Job)job, (Path)outFolder);
        if (format.equals("csv")) {
            job.setOutputFormatClass(CrawlDatumCsvOutputFormat.class);
        } else if (format.equals("crawldb")) {
            job.setOutputFormatClass(MapFileOutputFormat.class);
        } else if (format.equals("json")) {
            job.setOutputFormatClass(CrawlDatumJsonOutputFormat.class);
        } else {
            job.setOutputFormatClass(TextOutputFormat.class);
        }
        if (status != null) {
            jobConf.set("status", status);
        }
        if (regex != null) {
            jobConf.set("regex", regex);
        }
        if (retry != null) {
            jobConf.setInt("retry", retry.intValue());
        }
        if (expr != null) {
            jobConf.set("expr", expr);
            LOG.info("CrawlDb db: expr: {}", (Object)expr);
        }
        if (sample != null) {
            jobConf.setFloat("sample", sample.floatValue());
        }
        job.setMapperClass(CrawlDbDumpMapper.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(CrawlDatum.class);
        job.setJarByClass(CrawlDbReader.class);
        try {
            boolean success = job.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job);
                LOG.error(message);
                throw new RuntimeException(message);
            }
        }
        catch (IOException | ClassNotFoundException | InterruptedException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            throw e;
        }
        LOG.info("CrawlDb dump: done");
    }

    public void processTopNJob(String crawlDb, long topN, float min, String output, Configuration config) throws IOException, ClassNotFoundException, InterruptedException {
        boolean success;
        LOG.info("CrawlDb topN: starting (topN={}, min={})", (Object)topN, (Object)Float.valueOf(min));
        LOG.info("CrawlDb db: {}", (Object)crawlDb);
        Path outFolder = new Path(output);
        Path tempDir = new Path(config.get("mapreduce.cluster.temp.dir", ".") + "/readdb-topN-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
        Job job = Job.getInstance((Configuration)config, (String)("Nutch CrawlDbReader: topN prepare " + crawlDb));
        FileInputFormat.addInputPath((Job)job, (Path)new Path(crawlDb, "current"));
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setJarByClass(CrawlDbReader.class);
        job.setMapperClass(CrawlDbTopNMapper.class);
        job.setReducerClass(Reducer.class);
        FileOutputFormat.setOutputPath((Job)job, (Path)tempDir);
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        job.setOutputKeyClass(FloatWritable.class);
        job.setOutputValueClass(Text.class);
        job.getConfiguration().setFloat("db.reader.topn.min", min);
        FileSystem fs = tempDir.getFileSystem(config);
        try {
            success = job.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job);
                LOG.error(message);
                fs.delete(tempDir, true);
                throw new RuntimeException(message);
            }
        }
        catch (IOException | ClassNotFoundException | InterruptedException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            fs.delete(tempDir, true);
            throw e;
        }
        LOG.info("CrawlDb topN: collecting topN scores.");
        job = Job.getInstance((Configuration)config, (String)("Nutch CrawlDbReader: topN collect " + crawlDb));
        job.getConfiguration().setLong("db.reader.topn", topN);
        FileInputFormat.addInputPath((Job)job, (Path)tempDir);
        job.setInputFormatClass(SequenceFileInputFormat.class);
        job.setMapperClass(Mapper.class);
        job.setReducerClass(CrawlDbTopNReducer.class);
        job.setJarByClass(CrawlDbReader.class);
        FileOutputFormat.setOutputPath((Job)job, (Path)outFolder);
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setOutputKeyClass(FloatWritable.class);
        job.setOutputValueClass(Text.class);
        job.setNumReduceTasks(1);
        try {
            success = job.waitForCompletion(true);
            if (!success) {
                String message = NutchJob.getJobFailureLogMessage("CrawlDbReader", job);
                LOG.error(message);
                fs.delete(tempDir, true);
                throw new RuntimeException(message);
            }
        }
        catch (IOException | ClassNotFoundException | InterruptedException e) {
            LOG.error(StringUtils.stringifyException((Throwable)e));
            fs.delete(tempDir, true);
            throw e;
        }
        fs.delete(tempDir, true);
        LOG.info("CrawlDb topN: done");
    }

    public int run(String[] args) throws IOException, InterruptedException, ClassNotFoundException, Exception {
        String crawlDb;
        CrawlDbReader dbr = new CrawlDbReader();
        if (args.length < 2) {
            System.err.println("Usage: CrawlDbReader <crawldb> (-stats | -dump <out_dir> | -topN <nnnn> <out_dir> [<min>] | -url <url> | -listen <port>)");
            System.err.println("\t<crawldb>\tdirectory name where crawldb is located");
            System.err.println("\t-stats [-sort] \tprint overall statistics to System.out");
            System.err.println("\t\t[-sort]\tlist status sorted by host");
            System.err.println("\t-dump <out_dir> [-format normal|csv|crawldb|json]\tdump the whole db to a text file in <out_dir>");
            System.err.println("\t\t[-format csv]\tdump in Csv format");
            System.err.println("\t\t[-format normal]\tdump in standard format (default option)");
            System.err.println("\t\t[-format crawldb]\tdump as CrawlDB");
            System.err.println("\t\t[-format json]\tdump in JSON Lines format");
            System.err.println("\t\t[-regex <expr>]\tfilter records with expression");
            System.err.println("\t\t[-retry <num>]\tminimum retry count");
            System.err.println("\t\t[-status <status>]\tfilter records by CrawlDatum status");
            System.err.println("\t\t[-expr <expr>]\tJexl expression to execute for this record");
            System.err.println("\t\t[-sample <fraction>]\tOnly process a random sample with this ratio");
            System.err.println("\t-url <url>\tprint information on <url> to System.out");
            System.err.println("\t-listen <port> [-keepClientCnxOpen]\tlisten on <port> for URLs and");
            System.err.println("\t\t\tsend information about <url> back");
            System.err.println("\t-topN <nnnn> <out_dir> [<min>]\tdump top <nnnn> urls sorted by score to <out_dir>");
            System.err.println("\t\t[<min>]\tskip records with scores below this value.");
            System.err.println("\t\t\tThis can significantly improve performance.");
            return -1;
        }
        String param = null;
        this.crawlDb = crawlDb = args[0];
        int numConsumed = 0;
        Configuration config = this.getConf();
        for (int i = 1; i < args.length; ++i) {
            if (args[i].equals("-stats")) {
                boolean toSort = false;
                if (i < args.length - 1 && "-sort".equals(args[i + 1])) {
                    toSort = true;
                    ++i;
                }
                dbr.processStatJob(crawlDb, config, toSort);
                continue;
            }
            if (args[i].equals("-dump")) {
                param = args[++i];
                String format = "normal";
                String regex = null;
                Integer retry = null;
                String status = null;
                String expr = null;
                Float sample = null;
                for (int j = i + 1; j < args.length; ++j) {
                    if (args[j].equals("-format")) {
                        format = args[++j];
                        i += 2;
                    }
                    if (args[j].equals("-regex")) {
                        regex = args[++j];
                        i += 2;
                    }
                    if (args[j].equals("-retry")) {
                        retry = Integer.parseInt(args[++j]);
                        i += 2;
                    }
                    if (args[j].equals("-status")) {
                        status = args[++j];
                        i += 2;
                    }
                    if (args[j].equals("-expr")) {
                        expr = args[++j];
                        i += 2;
                    }
                    if (!args[j].equals("-sample")) continue;
                    sample = Float.valueOf(Float.parseFloat(args[++j]));
                    i += 2;
                }
                dbr.processDumpJob(crawlDb, param, config, format, regex, status, retry, expr, sample);
                continue;
            }
            if (args[i].equals("-url")) {
                param = args[++i];
                StringBuilder output = new StringBuilder();
                dbr.readUrl(crawlDb, param, config, output);
                System.out.print(output);
                continue;
            }
            if (args[i].equals("-topN")) {
                param = args[++i];
                long topN = Long.parseLong(param);
                param = args[++i];
                float min = 0.0f;
                if (i < args.length - 1) {
                    min = Float.parseFloat(args[++i]);
                }
                dbr.processTopNJob(crawlDb, topN, min, param, config);
                continue;
            }
            numConsumed = super.parseArgs(args, i);
            if (numConsumed > 0) {
                i += numConsumed - 1;
                continue;
            }
            System.err.println("\nError: wrong argument " + args[i]);
            return -1;
        }
        if (numConsumed > 0) {
            return super.run();
        }
        return 0;
    }

    public static void main(String[] args) throws Exception {
        int result = ToolRunner.run((Configuration)NutchConfiguration.create(), (Tool)new CrawlDbReader(), (String[])args);
        System.exit(result);
    }

    public Object query(Map<String, String> args, Configuration conf, String type, String crawlId) throws Exception {
        HashMap<String, Object> results = new HashMap<String, Object>();
        String crawlDb = crawlId + "/crawldb";
        if (type.equalsIgnoreCase("stats")) {
            boolean sort = false;
            if (args.containsKey("sort") && args.get("sort").equalsIgnoreCase("true")) {
                sort = true;
            }
            TreeMap<String, Writable> stats = this.processStatJobHelper(crawlDb, NutchConfiguration.create(), sort);
            LongWritable totalCnt = (LongWritable)stats.get("T");
            stats.remove("T");
            results.put("totalUrls", String.valueOf(totalCnt.get()));
            HashMap<String, Map<String, Map<String, String>>> statusMap = new HashMap<String, Map<String, Map<String, String>>>();
            for (Map.Entry<String, Writable> entry : stats.entrySet()) {
                String k = entry.getKey();
                long val = 0L;
                double fval = 0.0;
                if (entry.getValue() instanceof LongWritable) {
                    val = ((LongWritable)entry.getValue()).get();
                } else if (entry.getValue() instanceof FloatWritable) {
                    fval = ((FloatWritable)entry.getValue()).get();
                } else if (entry.getValue() instanceof BytesWritable) continue;
                if (k.equals("scn")) {
                    results.put("minScore", String.valueOf(fval));
                    continue;
                }
                if (k.equals("scx")) {
                    results.put("maxScore", String.valueOf(fval));
                    continue;
                }
                if (k.equals("sct")) {
                    results.put("avgScore", String.valueOf(fval / (double)totalCnt.get()));
                    continue;
                }
                if (k.startsWith("status")) {
                    Map<String, Map<String, String>> individualStatusInfo;
                    String[] st = k.split(" ");
                    int code = Integer.parseInt(st[1]);
                    if (st.length > 2) {
                        Map<String, String> hostValues;
                        individualStatusInfo = (Map)statusMap.get(String.valueOf(code));
                        if (individualStatusInfo.containsKey("hostValues")) {
                            hostValues = (Map)individualStatusInfo.get("hostValues");
                        } else {
                            hostValues = new HashMap();
                            individualStatusInfo.put("hostValues", hostValues);
                        }
                        hostValues.put(st[2], String.valueOf(val));
                        continue;
                    }
                    individualStatusInfo = new HashMap<String, String>();
                    individualStatusInfo.put("statusValue", (Map<String, String>)((Object)CrawlDatum.getStatusName((byte)code)));
                    individualStatusInfo.put("count", (Map<String, String>)((Object)String.valueOf(val)));
                    statusMap.put(String.valueOf(code), individualStatusInfo);
                    continue;
                }
                results.put(k, String.valueOf(val));
            }
            results.put("status", statusMap);
            return results;
        }
        if (type.equalsIgnoreCase("dump")) {
            String output = args.get("out_dir");
            String format = "normal";
            String regex = null;
            Integer retry = null;
            String status = null;
            String expr = null;
            Float sample = null;
            if (args.containsKey("format")) {
                format = args.get("format");
            }
            if (args.containsKey("regex")) {
                regex = args.get("regex");
            }
            if (args.containsKey("retry")) {
                retry = Integer.parseInt(args.get("retry"));
            }
            if (args.containsKey("status")) {
                status = args.get("status");
            }
            if (args.containsKey("expr")) {
                expr = args.get("expr");
            }
            if (args.containsKey("sample")) {
                sample = Float.valueOf(Float.parseFloat(args.get("sample")));
            }
            this.processDumpJob(crawlDb, output, conf, format, regex, status, retry, expr, sample);
            File dumpFile = new File(output + "/part-00000");
            return dumpFile;
        }
        if (type.equalsIgnoreCase("topN")) {
            String output = args.get("out_dir");
            long topN = Long.parseLong(args.get("nnn"));
            float min = 0.0f;
            if (args.containsKey("min")) {
                min = Float.parseFloat(args.get("min"));
            }
            this.processTopNJob(crawlDb, topN, min, output, conf);
            File dumpFile = new File(output + "/part-00000");
            return dumpFile;
        }
        if (type.equalsIgnoreCase("url")) {
            String url = args.get("url");
            CrawlDatum res = this.get(crawlDb, url, conf);
            results.put("status", res.getStatus());
            results.put("fetchTime", new Date(res.getFetchTime()));
            results.put("modifiedTime", new Date(res.getModifiedTime()));
            results.put("retriesSinceFetch", res.getRetriesSinceFetch());
            results.put("retryInterval", res.getFetchInterval());
            results.put("score", Float.valueOf(res.getScore()));
            results.put("signature", StringUtil.toHexString(res.getSignature()));
            HashMap<String, String> metadata = new HashMap<String, String>();
            if (res.getMetaData() != null) {
                for (Map.Entry e : res.getMetaData().entrySet()) {
                    metadata.put(String.valueOf(e.getKey()), String.valueOf(e.getValue()));
                }
            }
            results.put("metadata", metadata);
            return results;
        }
        return results;
    }

    public static class CrawlDbDumpMapper
    extends Mapper<Text, CrawlDatum, Text, CrawlDatum> {
        Pattern pattern = null;
        Matcher matcher = null;
        String status = null;
        Integer retry = null;
        JexlScript expr = null;
        float sample;

        public void setup(Mapper.Context context) {
            Configuration config = context.getConfiguration();
            if (config.get("regex", null) != null) {
                this.pattern = Pattern.compile(config.get("regex"));
            }
            this.status = config.get("status", null);
            this.retry = config.getInt("retry", -1);
            if (config.get("expr", null) != null) {
                this.expr = JexlUtil.parseExpression(config.get("expr", null));
            }
            this.sample = config.getFloat("sample", 1.0f);
        }

        public void map(Text key, CrawlDatum value, Mapper.Context context) throws IOException, InterruptedException {
            if (this.sample < 1.0f && Math.random() > (double)this.sample) {
                return;
            }
            if (this.retry != -1 && value.getRetriesSinceFetch() < this.retry) {
                return;
            }
            if (this.status != null && !this.status.equalsIgnoreCase(CrawlDatum.getStatusName(value.getStatus()))) {
                return;
            }
            if (this.pattern != null) {
                this.matcher = this.pattern.matcher(key.toString());
                if (!this.matcher.matches()) {
                    return;
                }
            }
            if (this.expr != null && !value.execute(this.expr, key.toString())) {
                return;
            }
            context.write((Object)key, (Object)value);
        }
    }

    public static class CrawlDbTopNReducer
    extends Reducer<FloatWritable, Text, FloatWritable, Text> {
        private long topN;
        private long count = 0L;

        public void reduce(FloatWritable key, Iterable<Text> values, Reducer.Context context) throws IOException, InterruptedException {
            for (Text value : values) {
                if (this.count >= this.topN) continue;
                key.set(-key.get());
                context.write((Object)key, (Object)value);
                ++this.count;
            }
        }

        public void setup(Reducer.Context context) {
            Configuration conf = context.getConfiguration();
            this.topN = conf.getLong("db.reader.topn", 100L) / (long)Integer.parseInt(conf.get("mapreduce.job.reduces"));
        }
    }

    public static class CrawlDbTopNMapper
    extends Mapper<Text, CrawlDatum, FloatWritable, Text> {
        private static final FloatWritable fw = new FloatWritable();
        private float min = 0.0f;

        public void setup(Mapper.Context context) {
            Configuration conf = context.getConfiguration();
            this.min = conf.getFloat("db.reader.topn.min", 0.0f);
        }

        public void map(Text key, CrawlDatum value, Mapper.Context context) throws IOException, InterruptedException {
            if (value.getScore() < this.min) {
                return;
            }
            fw.set(-value.getScore());
            context.write((Object)fw, (Object)key);
        }
    }

    public static class CrawlDbStatReducer
    extends Reducer<Text, NutchWritable, Text, NutchWritable> {
        public void setup(Reducer.Context context) {
        }

        public void reduce(Text key, Iterable<NutchWritable> values, Reducer.Context context) throws IOException, InterruptedException {
            String k = key.toString();
            if (k.equals("T") || k.startsWith("status") || k.startsWith("retry") || k.equals("ftt") || k.equals("fit")) {
                long sum = 0L;
                for (NutchWritable value : values) {
                    sum += ((LongWritable)value.get()).get();
                }
                context.write((Object)key, (Object)new NutchWritable((Writable)new LongWritable(sum)));
            } else if (k.equals("sc")) {
                float min = Float.MAX_VALUE;
                float max = Float.MIN_VALUE;
                for (NutchWritable nvalue : values) {
                    float value = ((FloatWritable)nvalue.get()).get();
                    if (max < value) {
                        max = value;
                    }
                    if (!(min > value)) continue;
                    min = value;
                }
                context.write((Object)key, (Object)new NutchWritable((Writable)new FloatWritable(min)));
                context.write((Object)key, (Object)new NutchWritable((Writable)new FloatWritable(max)));
            } else if (k.equals("ft") || k.equals("fi")) {
                long min = Long.MAX_VALUE;
                long max = Long.MIN_VALUE;
                for (NutchWritable nvalue : values) {
                    long value = ((LongWritable)nvalue.get()).get();
                    if (max < value) {
                        max = value;
                    }
                    if (min <= value) continue;
                    min = value;
                }
                context.write((Object)key, (Object)new NutchWritable((Writable)new LongWritable(min)));
                context.write((Object)key, (Object)new NutchWritable((Writable)new LongWritable(max)));
            } else if (k.equals("sct")) {
                float cnt = 0.0f;
                for (NutchWritable nvalue : values) {
                    float value = ((FloatWritable)nvalue.get()).get();
                    cnt += value;
                }
                context.write((Object)key, (Object)new NutchWritable((Writable)new FloatWritable(cnt)));
            } else if (k.equals("scd")) {
                MergingDigest tdigest = null;
                for (NutchWritable nvalue : values) {
                    float val;
                    Writable value = nvalue.get();
                    if (value instanceof BytesWritable) {
                        byte[] bytes = ((BytesWritable)value).getBytes();
                        MergingDigest tdig = MergingDigest.fromBytes((ByteBuffer)ByteBuffer.wrap(bytes));
                        if (tdigest == null) {
                            tdigest = tdig;
                            continue;
                        }
                        tdigest.add((TDigest)tdig);
                        continue;
                    }
                    if (!(value instanceof FloatWritable) || Float.isNaN(val = ((FloatWritable)value).get())) continue;
                    if (tdigest == null) {
                        tdigest = (MergingDigest)TDigest.createMergingDigest((double)100.0);
                    }
                    tdigest.add((double)val);
                }
                ByteBuffer tdigestBytes = ByteBuffer.allocate(tdigest.smallByteSize());
                tdigest.asSmallBytes(tdigestBytes);
                context.write((Object)key, (Object)new NutchWritable((Writable)new BytesWritable(tdigestBytes.array())));
            }
        }
    }

    public static class CrawlDbStatMapper
    extends Mapper<Text, CrawlDatum, Text, NutchWritable> {
        NutchWritable COUNT_1 = new NutchWritable((Writable)new LongWritable(1L));
        private boolean sort = false;

        public void setup(Mapper.Context context) {
            Configuration conf = context.getConfiguration();
            this.sort = conf.getBoolean("db.reader.stats.sort", false);
        }

        public void map(Text key, CrawlDatum value, Mapper.Context context) throws IOException, InterruptedException {
            context.write((Object)new Text("T"), (Object)this.COUNT_1);
            context.write((Object)new Text("status " + value.getStatus()), (Object)this.COUNT_1);
            context.write((Object)new Text("retry " + value.getRetriesSinceFetch()), (Object)this.COUNT_1);
            if (Float.isNaN(value.getScore())) {
                context.write((Object)new Text("scNaN"), (Object)this.COUNT_1);
            } else {
                NutchWritable score = new NutchWritable((Writable)new FloatWritable(value.getScore()));
                context.write((Object)new Text("sc"), (Object)score);
                context.write((Object)new Text("sct"), (Object)score);
                context.write((Object)new Text("scd"), (Object)score);
            }
            NutchWritable fetchTime = new NutchWritable((Writable)new LongWritable(value.getFetchTime() / 60000L));
            context.write((Object)new Text("ft"), (Object)fetchTime);
            context.write((Object)new Text("ftt"), (Object)fetchTime);
            NutchWritable fetchInterval = new NutchWritable((Writable)new LongWritable((long)value.getFetchInterval()));
            context.write((Object)new Text("fi"), (Object)fetchInterval);
            context.write((Object)new Text("fit"), (Object)fetchInterval);
            if (this.sort) {
                try {
                    URL u = new URL(key.toString());
                    String host = u.getHost();
                    context.write((Object)new Text("status " + value.getStatus() + " " + host), (Object)this.COUNT_1);
                }
                catch (MalformedURLException e) {
                    LOG.error("Failed to get host from URL {}: {}", (Object)key.toString(), (Object)e.getMessage());
                }
            }
        }
    }

    public static class CrawlDatumJsonOutputFormat
    extends FileOutputFormat<Text, CrawlDatum> {
        public RecordWriter<Text, CrawlDatum> getRecordWriter(TaskAttemptContext context) throws IOException {
            Configuration conf = context.getConfiguration();
            boolean isCompressed = FileOutputFormat.getCompressOutput((JobContext)context);
            CompressionCodec codec = null;
            String extension = "";
            if (isCompressed) {
                Class codecClass = CrawlDatumJsonOutputFormat.getOutputCompressorClass((JobContext)context, GzipCodec.class);
                codec = (CompressionCodec)ReflectionUtils.newInstance((Class)codecClass, (Configuration)conf);
                extension = codec.getDefaultExtension();
            }
            Path file = this.getDefaultWorkFile(context, extension);
            FileSystem fs = file.getFileSystem(conf);
            FSDataOutputStream fileOut = fs.create(file, false);
            if (isCompressed) {
                return new LineRecordWriter(new DataOutputStream((OutputStream)codec.createOutputStream((OutputStream)fileOut)));
            }
            return new LineRecordWriter((DataOutputStream)fileOut);
        }

        public static class WritableSerializer
        extends JsonSerializer<Writable> {
            public void serialize(Writable obj, JsonGenerator jgen, SerializerProvider provider) throws IOException {
                if (obj instanceof NullWritable) {
                    jgen.writeNull();
                } else if (obj instanceof BooleanWritable) {
                    jgen.writeBoolean(((BooleanWritable)obj).get());
                } else if (obj instanceof IntWritable) {
                    jgen.writeNumber(((IntWritable)obj).get());
                } else if (obj instanceof VIntWritable) {
                    jgen.writeNumber(((VIntWritable)obj).get());
                } else if (obj instanceof LongWritable) {
                    jgen.writeNumber(((LongWritable)obj).get());
                } else if (obj instanceof VLongWritable) {
                    jgen.writeNumber(((VLongWritable)obj).get());
                } else if (obj instanceof ByteWritable) {
                    jgen.writeNumber((short)((ByteWritable)obj).get());
                } else if (obj instanceof FloatWritable) {
                    jgen.writeNumber(((FloatWritable)obj).get());
                } else if (obj instanceof DoubleWritable) {
                    jgen.writeNumber(((DoubleWritable)obj).get());
                } else if (obj instanceof BytesWritable) {
                    jgen.writeBinary(((BytesWritable)obj).getBytes());
                } else {
                    jgen.writeString(obj.toString());
                }
            }
        }

        protected static class LineRecordWriter
        extends RecordWriter<Text, CrawlDatum> {
            private DataOutputStream out;
            private ObjectMapper jsonMapper = new ObjectMapper();
            private ObjectWriter jsonWriter;

            public LineRecordWriter(DataOutputStream out) {
                this.out = out;
                this.jsonMapper.getFactory().configure(JsonGenerator.Feature.ESCAPE_NON_ASCII, true);
                SimpleModule module = new SimpleModule();
                module.addSerializer(Writable.class, (JsonSerializer)new WritableSerializer());
                this.jsonMapper.registerModule((Module)module);
                this.jsonWriter = this.jsonMapper.writer((PrettyPrinter)new JsonIndenter());
            }

            public synchronized void write(Text key, CrawlDatum value) throws IOException {
                LinkedHashMap<String, Object> data = new LinkedHashMap<String, Object>();
                data.put("url", key.toString());
                data.put("statusCode", value.getStatus());
                data.put("statusName", CrawlDatum.getStatusName(value.getStatus()));
                data.put("fetchTime", new Date(value.getFetchTime()).toString());
                data.put("modifiedTime", new Date(value.getModifiedTime()).toString());
                data.put("retriesSinceFetch", value.getRetriesSinceFetch());
                data.put("retryIntervalSeconds", value.getFetchInterval());
                data.put("retryIntervalDays", value.getFetchInterval() / 86400);
                data.put("score", Float.valueOf(value.getScore()));
                data.put("signature", value.getSignature() != null ? StringUtil.toHexString(value.getSignature()) : "null");
                LinkedHashMap metaData = null;
                if (value.getMetaData() != null) {
                    metaData = new LinkedHashMap();
                    for (Map.Entry e : value.getMetaData().entrySet()) {
                        metaData.put(((Writable)e.getKey()).toString(), e.getValue());
                    }
                }
                if (metaData != null) {
                    data.put("metadata", metaData);
                } else {
                    data.put("metadata", "");
                }
                this.out.write(this.jsonWriter.writeValueAsBytes(data));
                this.out.writeByte(10);
            }

            public synchronized void close(TaskAttemptContext context) throws IOException {
                this.out.close();
            }
        }
    }

    public static class CrawlDatumCsvOutputFormat
    extends FileOutputFormat<Text, CrawlDatum> {
        public RecordWriter<Text, CrawlDatum> getRecordWriter(TaskAttemptContext context) throws IOException {
            Configuration conf = context.getConfiguration();
            boolean isCompressed = FileOutputFormat.getCompressOutput((JobContext)context);
            CompressionCodec codec = null;
            String extension = "";
            if (isCompressed) {
                Class codecClass = CrawlDatumCsvOutputFormat.getOutputCompressorClass((JobContext)context, GzipCodec.class);
                codec = (CompressionCodec)ReflectionUtils.newInstance((Class)codecClass, (Configuration)conf);
                extension = codec.getDefaultExtension();
            }
            Path file = this.getDefaultWorkFile(context, extension);
            FileSystem fs = file.getFileSystem(conf);
            FSDataOutputStream fileOut = fs.create(file, false);
            if (isCompressed) {
                return new LineRecordWriter(new DataOutputStream((OutputStream)codec.createOutputStream((OutputStream)fileOut)));
            }
            return new LineRecordWriter((DataOutputStream)fileOut);
        }

        protected static class LineRecordWriter
        extends RecordWriter<Text, CrawlDatum> {
            private DataOutputStream out;

            public LineRecordWriter(DataOutputStream out) {
                this.out = out;
                try {
                    out.writeBytes("Url,Status code,Status name,Fetch Time,Modified Time,Retries since fetch,Retry interval seconds,Retry interval days,Score,Signature,Metadata\n");
                }
                catch (IOException e) {
                    LOG.error("Failed to write header line", (Throwable)e);
                }
            }

            public synchronized void write(Text key, CrawlDatum value) throws IOException {
                this.out.writeByte(34);
                this.out.writeBytes(key.toString());
                this.out.writeByte(34);
                this.out.writeByte(44);
                this.out.writeBytes(Integer.toString(value.getStatus()));
                this.out.writeByte(44);
                this.out.writeByte(34);
                this.out.writeBytes(CrawlDatum.getStatusName(value.getStatus()));
                this.out.writeByte(34);
                this.out.writeByte(44);
                this.out.writeBytes(new Date(value.getFetchTime()).toString());
                this.out.writeByte(44);
                this.out.writeBytes(new Date(value.getModifiedTime()).toString());
                this.out.writeByte(44);
                this.out.writeBytes(Integer.toString(value.getRetriesSinceFetch()));
                this.out.writeByte(44);
                this.out.writeBytes(Float.toString(value.getFetchInterval()));
                this.out.writeByte(44);
                this.out.writeBytes(Float.toString(value.getFetchInterval() / 86400));
                this.out.writeByte(44);
                this.out.writeBytes(Float.toString(value.getScore()));
                this.out.writeByte(44);
                this.out.writeByte(34);
                this.out.writeBytes(value.getSignature() != null ? StringUtil.toHexString(value.getSignature()) : "null");
                this.out.writeByte(34);
                this.out.writeByte(44);
                this.out.writeByte(34);
                if (value.getMetaData() != null) {
                    for (Map.Entry e : value.getMetaData().entrySet()) {
                        this.out.writeBytes(((Writable)e.getKey()).toString());
                        this.out.writeByte(58);
                        this.out.writeBytes(((Writable)e.getValue()).toString());
                        this.out.writeBytes("|||");
                    }
                }
                this.out.writeByte(34);
                this.out.writeByte(10);
            }

            public synchronized void close(TaskAttemptContext context) throws IOException {
                this.out.close();
            }
        }
    }

    public static class JsonIndenter
    extends MinimalPrettyPrinter {
        public void writeObjectFieldValueSeparator(JsonGenerator jg) throws IOException, JsonGenerationException {
            jg.writeRaw(": ");
        }

        public void writeObjectEntrySeparator(JsonGenerator jg) throws IOException, JsonGenerationException {
            jg.writeRaw(", ");
        }
    }
}

