Site Overlay

大数据学习01-完整代码

文章热度: 1 热度

WTF.java

package Jihe;

import java.io.*;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class WTF {

    public Set<String> removeRecommmand(String filePath) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new FileReader(filePath));
        String str;
        Set<String> jobs =  new HashSet<String>();
        Set<String> jobs_out =  new HashSet<String>();
        int count = 1;
        while ((str = bufferedReader.readLine()) != null)
        {
            int i = str.indexOf(",");
//            System.out.print(str.substring(0,i));
//            System.out.println();
//            System.out.print(str.substring(i,str.length()));
//            System.out.println();
            jobs.add(str.substring(i,str.length()));
        }
        for(String sstr : jobs)
        {
            jobs_out.add(count+sstr);
            count++;
        }

        return jobs_out;
    }

    public Map<Integer,Jobs> insertData(Set<String> jobs)
    {
        Map<Integer,Jobs> jobsMap = new HashMap<>();
        for(String txt :jobs)
        {
            //System.out.println(txt);
            Jobs job = new Jobs();
            String item[] = txt.split(",");
            if (item.length != 8) continue;
            try{
                job.setNumber(Integer.parseInt(item[0]));
            }catch (NumberFormatException e)
            {
                job.setNumber(1);
            }
            job.setJobName(item[1]);
            job.setCompany(item[2]);
            job.setRequest(item[3]);
            job.setSalary(item[4]);
            job.setNetwork(item[5]);
            job.setLocation(item[6]);
            job.setUnknown(item[7]);
            //求平均工资
            if (job.getSalary().trim().contains("面议"))
            {
                job.setAvgSalary(0);
                continue;
            }
            else{
                String matcher = "\\d+(.)?(\\d+)?[-|至]\\d+(.)?(\\d+)?(\\D+)?";
                Pattern p = Pattern.compile(matcher);
                Matcher m = p.matcher(job.getSalary());

                if (m.find()) {
                    System.out.println(job.getSalary().substring(m.start(),m.end()));
                    String temp = job.getSalary().substring(m.start(),m.end()).replaceAll("[^.|0-9]",",");
                    System.out.println(temp);
                    String temp1[] = temp.split(",");
                    float a=0,sum=0;
                    int count=0;
                    for(String aa : temp1)
                    {
                        try{
                            a=Float.parseFloat(aa);
                            System.out.print("  a=" + a);
                            if (a!=0) count++;
                        }catch (NumberFormatException e) {

                        }finally {
                            sum+=a;
                        }
                    }
                    System.out.println("count"+count);
                    if (job.getSalary().contains("年")) sum = sum/12;
                    if (job.getSalary().contains("万")) sum = sum*10000;
                    if (job.getSalary().contains("千")) sum = sum * 1000;
                    if (job.getSalary().contains("K")) sum = sum*1000;
                    //sum /= count;
                    if (sum <= 100000) job.setAvgSalary(sum/count);
                    else job.setAvgSalary(sum/count/12);
                    System.out.println(job.getAvgSalary());
                }else
                {
                    matcher = "\\d+(.)?(\\d+)?(\\D+)?";
                    p = Pattern.compile(matcher);
                    m = p.matcher(job.getSalary());
                    if (m.find()) {
                        System.out.println(job.getSalary().substring(m.start(),m.end()));
                        String temp = job.getSalary().substring(m.start(), m.end()).replaceAll("[^.|0-9]", ",");
                        System.out.println(temp);
                        String temp1[] = temp.split(",");
                        float a = 0, sum = 0;
                        int count = 0;
                        for (String aa : temp1) {
                            try {
                                a = Float.parseFloat(aa);
                                System.out.print("  a=" + a);
                                if (a!=0) count++;
                            } catch (NumberFormatException e) {

                            } finally {
                                sum += a;
                            }
                        }
                        System.out.println("count"+count);
                        if (job.getSalary().contains("年")) sum = sum / 12;
                        if (job.getSalary().contains("万")) sum = sum * 10000;
                        if (job.getSalary().contains("K")) sum = sum * 1000;
                        if (job.getSalary().contains("千")) sum = sum * 1000;
                        if (job.getSalary().contains("天")) sum = sum * 30;
                        if (sum <= 100000) job.setAvgSalary(sum/count);
                        else job.setAvgSalary(sum/count/12);
                        System.out.println(job.getAvgSalary());
                    }
                    else
                    {
                        System.out.println(job.getSalary());
                        System.out.println(job.getNumber());
                    }
                }
            }
            jobsMap.put(job.getNumber(),job);
        }
        return jobsMap;
    }

    public double culculateAverageSalaries(Map<Integer,Jobs> jm)
    {
        double avg=0;
        for (Jobs job : jm.values())
        {
            avg += job.getAvgSalary();
        }
        return avg / jm.size();
    }

    public Map<String,Float> culculateAverageSalaries_City(Map<Integer,Jobs> jm)
    {
        Map<String, ArrayList<Float>> result = new HashMap<>();
        for (Jobs job : jm.values())
        {
            if (result.containsKey(job.getLocation()))
            {
                ArrayList<Float> temp = result.get(job.getLocation());
                temp.add(job.getAvgSalary());
                result.put(job.getLocation(),temp);
            }
            else
            {
                ArrayList<Float> temp = new ArrayList<>();
                temp.add(job.getAvgSalary());
                result.put(job.getLocation(),temp);
            }
        }
        Map<String , Float> return_result = new HashMap<>();
        for (String keyname : result.keySet())
        {
            float tempavg=0;
            for (int i=0;i<result.get(keyname).size();i++)
            {
                tempavg += result.get(keyname).get(i) / result.get(keyname).size();
            }
            return_result.put(keyname,tempavg);
        }
        return return_result;
    }

    public Map<String,Float> culculateAverageSalaries_job(Map<Integer,Jobs> jm)
    {
        Map<String, ArrayList<Float>> result = new HashMap<>();
        for (Jobs job : jm.values())
        {
            if (result.containsKey(job.getJobName()))
            {
                ArrayList<Float> temp = result.get(job.getJobName());
                temp.add(job.getAvgSalary());
                result.put(job.getJobName(),temp);
            }
            else
            {
                ArrayList<Float> temp = new ArrayList<>();
                temp.add(job.getAvgSalary());
                result.put(job.getJobName(),temp);
            }
        }
        Map<String , Float> return_result = new HashMap<>();
        for (String keyname : result.keySet())
        {
            float tempavg=0;
            for (int i=0;i<result.get(keyname).size();i++)
            {
                tempavg += result.get(keyname).get(i) / result.get(keyname).size();
            }
            return_result.put(keyname,tempavg);
        }
        return return_result;
    }

    public boolean isSameJob(String[] str1,String[] str2)
    {
        if (str1[0].equals(str2[0]) && str1[1].equals(str2[1])) return true;
        else return false;
    }

    public Map<String[],Float> culculateAverageSalaries_job_city(Map<Integer,Jobs> jm)
    {
        Map<String[],ArrayList<Float>> result = new HashMap<>();

        for (Jobs job:jm.values())
        {
            String[] str = new String[2];
            str[0] = job.getLocation();
            str[1] = job.getJobName();
            if(result.containsKey(str))
            {
                //System.out.println("a");
                ArrayList<Float> temp = result.get(str);
                temp.add(job.getAvgSalary());
                result.put(str,temp);
            }
            else
            {
                //System.out.println("7");
                ArrayList<Float> temp = new ArrayList<>();
                temp.add(job.getAvgSalary());
                result.put(str,temp);
            }
        }
        Map<String[],Float> return_result = new HashMap<>();
        for (String[] keyname : result.keySet())
        {

            float tempavg=0;
            for (int i=0;i<result.get(keyname).size();i++)
            {
                tempavg += result.get(keyname).get(i) / result.get(keyname).size();
            }
            return_result.put(keyname,tempavg);
        }
        return return_result;
    }

    public void writeFile(Map<Integer,Jobs> jobsMap,String filename) throws IOException {
        BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(filename));
        String tempstr="";
        for (int key:jobsMap.keySet())
        {
            tempstr = "";
            tempstr =key +  "," +
                    jobsMap.get(key).getJobName() + "," +
                    jobsMap.get(key).getCompany() + "," +
                    jobsMap.get(key).getRequest() + "," +
                    jobsMap.get(key).getAvgSalary() + "," +
                    jobsMap.get(key).getSalary() + "," +
                    jobsMap.get(key).getNetwork() + "," +
                    jobsMap.get(key).getLocation() + "," +
                    jobsMap.get(key).getUnknown();
            bufferedWriter.write(tempstr);
            bufferedWriter.newLine();
        }
        bufferedWriter.flush();
        bufferedWriter.close();
    }
}

Jobs.java–Bean类

package Jihe;

public class Jobs {
    private int number;
    private String jobName;
    private String company;
    private String request;
    private String salary;
    private String network;
    private String location;
    private String unknown;

    public float getAvgSalary() {
        return avgSalary;
    }

    public void setAvgSalary(float avgSalary) {
        this.avgSalary = avgSalary;
    }

    private float avgSalary;

    public int getNumber() {
        return number;
    }

    public void setNumber(int number) {
        this.number = number;
    }

    public String getJobName() {
        return jobName;
    }

    public void setJobName(String jobName) {
        this.jobName = jobName;
    }

    public String getCompany() {
        return company;
    }

    public void setCompany(String company) {
        this.company = company;
    }

    public String getRequest() {
        return request;
    }

    public void setRequest(String request) {
        this.request = request;
    }

    public String getSalary() {
        return salary;
    }

    public void setSalary(String salary) {
        this.salary = salary;
    }

    public String getNetwork() {
        return network;
    }

    public void setNetwork(String network) {
        this.network = network;
    }

    public String getLocation() {
        return location;
    }

    public void setLocation(String location) {
        this.location = location;
    }

    public String getUnknown() {
        return unknown;
    }

    public void setUnknown(String unknown) {
        this.unknown = unknown;
    }

    //1,工程师,北京闪银奇异科技有限公司,1年以上,14.4-21.6万元,https://www.jobui.com/jobs?jobKw=ETL&cityKw=北京&n=1,北京,2
}

main.java -主函数

package Jihe;


import com.sun.org.apache.xerces.internal.impl.xpath.regex.Match;

import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class main {
    public static void main(String[] args) throws IOException {
        WTF wtf = new WTF();
        Set<String> jobs = wtf.removeRecommmand("D:\\实习软件\\salaryall.txt");
        Map<Integer,Jobs> jobsMap = new HashMap<Integer, Jobs>();
        jobsMap = wtf.insertData(jobs);

        wtf.writeFile(jobsMap,"d:\\实习软件\\salaryall_clear.txt");


    }
}
0

说点什么

200
  Subscribe  
提醒