Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public class S3ATextInputFormat extends TextInputFormat {
- private static final PathFilter hiddenFileFilter = p -> {
- String name = p.getName();
- return !name.startsWith("_") && !name.startsWith(".");
- };
- private static class MultiPathFilter implements PathFilter {
- private List<PathFilter> filters;
- public MultiPathFilter(List<PathFilter> filters) {
- this.filters = filters;
- }
- public boolean accept(Path path) {
- for (PathFilter filter : filters) {
- if (!filter.accept(path)) {
- return false;
- }
- }
- return true;
- }
- }
- @Override
- protected List<FileStatus> listStatus(JobContext job) throws IOException {
- Path[] inputPaths = getInputPaths(job);
- Configuration conf = job.getConfiguration();
- List<PathFilter> filters = new ArrayList<PathFilter>();
- filters.add(hiddenFileFilter);
- PathFilter jobFilter = getInputPathFilter(job);
- if (jobFilter != null) {
- filters.add(jobFilter);
- }
- PathFilter inputFilter = new MultiPathFilter(filters);
- return Arrays
- .stream(inputPaths)
- .flatMap(
- inputPath -> {
- try {
- return inputPath.getFileSystem(conf).listFiles(inputPath, true)
- } catch (IOException e) {
- throw new RuntimeException(e);
- }
- }
- )
- .collect(Collectors.toList());
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement