好得很程序员自学网

<tfoot draggable='sEl'></tfoot>

大数据排序的实现代码, 理论上支持几百亿没问题吧

大数据排序的实现代码, 理论上支持几百亿没问题吧

大数据排序的实现代码, 理论上支持几百亿没问题吧

先上说说思路,

1, 把一个bigdata文件拆分成N个小文件,小文件容量小于当前机器的内存

2,对小文件进行排序处理

3,对小文件进行并归排序,代码中我是用1 and 1,一个个并归生成新的排序完成的文件,直到全部并归完成

简单说说我这里的并归算法,代码中的sortBySmallFile,如有文件A有n个元素, 文件B有m个元素

OK,上代码

?

package   com.ben.file;

 

import   java.io.BufferedReader;

import   java.io.BufferedWriter;

import   java.io.File;

import   java.io.FileReader;

import   java.io.FileWriter;

import   java.io.IOException;

import   java.util.ArrayList;

import   java.util.Collections;

import   java.util.List;

import   java.util.Random;

 

/**

  *

  * @author Hejinbin

  * QQ 277803242

  * email qing878@gmail.com

  */

public   class   BigDataSort {

 

     public   final   static   String SMALL_FILE_PATH = " D://temp//BigData// " ;

     public   final   static   int   BIG_NUM_LINE = 1000000 ;

     public   final   static   String ORING_FILE_PATH = " D://temp//BigData//bigData.txt " ;

     public   final   static   int   SMALL_FILE_LINE = 100000 ; //1M for 1 small file

     private   File tempFiles[];

 

     public   BigDataSort() throws   IOException {

         createBigsortNums();

         beSmallFileAndSort();

         unitFileToSort();

     }

 

     private   void   createBigsortNums() throws   IOException {

         BufferedWriter writer = new   BufferedWriter( new   FileWriter(ORING_FILE_PATH));

         Random random = new   Random();

         for   ( int   i = 0 ; i < BIG_NUM_LINE; i++) {

             writer.write(String.valueOf(random.nextInt( 100000000 )));

             writer.newLine(); // add a new line . in order to show easy by file

         }

         writer.close();

     }

 

     private   void   beSmallFileAndSort() throws   IOException {

         BufferedReader bigDataFile = new   BufferedReader( new   FileReader(ORING_FILE_PATH));

         List<Integer> smallLine = null ;

         tempFiles = new   File[BIG_NUM_LINE / SMALL_FILE_LINE];

         for   ( int   i = 0 ; i < tempFiles.length; i++) {

             tempFiles[i] = new   File(SMALL_FILE_PATH + "sortTempFile"   + i + ".temp" );

             BufferedWriter smallWtite = new   BufferedWriter( new   FileWriter(tempFiles[i]));

             smallLine = new   ArrayList<Integer>();

             for   ( int   j = 0 ; j < SMALL_FILE_LINE; j++)

                 smallLine.add(Integer.parseInt(bigDataFile.readLine()));

             Collections.sort(smallLine);

             for   (Object num : smallLine.toArray())

                 smallWtite.write(num + "\n" );

             smallWtite.close();

         }

     }

 

     private   void   unitFileToSort() throws   IOException {

         File tempFile = null ;

         for ( int    i= 1 ;i<tempFiles.length;i++){

             tempFile=sortBySmallFile(tempFiles[ 0 ],tempFiles[i]);

             tempFiles[ 0 ].delete();

             tempFiles[ 0 ]=tempFile;

         }

         tempFile.renameTo( new   File(ORING_FILE_PATH+ "sortResult.txt" ));

     }

 

     public   static   File sortBySmallFile(File fromFile, File toFile) throws   IOException {

         BufferedReader fromRd = new   BufferedReader( new   FileReader(fromFile));

         BufferedReader toTempRd = new   BufferedReader( new   FileReader(toFile));

         File newSortFile = new   File(SMALL_FILE_PATH + fromFile.getName() + ".temp" );

         BufferedWriter newSortFileWt = new   BufferedWriter( new   FileWriter(newSortFile));

         int   index = - 1 ;

         int   toPoint = - 1 ;

         while   (fromRd.ready()) {

             index = Integer.parseInt(fromRd.readLine());

             if   (index < toPoint) {

                 newSortFileWt.write(String.valueOf(index));

                 newSortFileWt.newLine();

                 continue ;

             } else   {

                 if   (toPoint != - 1 ) {

                     newSortFileWt.write(String.valueOf(toPoint));

                     newSortFileWt.newLine();

                 }

             }

             while   (toTempRd.ready()) {

                 toPoint = Integer.parseInt(toTempRd.readLine());

                 if   (toPoint < index) {

                     newSortFileWt.write(String.valueOf(toPoint));

                     newSortFileWt.newLine();

                 } else   {

                     newSortFileWt.write(String.valueOf(index));

                     newSortFileWt.newLine();

                     break ;

                 }

             }

 

         }

         newSortFileWt.write(String.valueOf(index>toPoint?index:toPoint));

         newSortFileWt.newLine();

         newSortFileWt.close();

         fromRd.close();

         toTempRd.close();

         toFile.delete();

         return   newSortFile;

     }

 

     public   static   void   main(String[] args) throws   IOException {

         BigDataSort bigDataSort = new   BigDataSort();

     }

}

  注释没写, 过几天加上,可能有些许BUG在,欢迎讨论

 

 

分类:  JAVA CORE

作者: Leo_wl

    

出处: http://www.cnblogs.com/Leo_wl/

    

本文版权归作者和博客园共有,欢迎转载,但未经作者同意必须保留此段声明,且在文章页面明显位置给出原文连接,否则保留追究法律责任的权利。

版权信息

查看更多关于大数据排序的实现代码, 理论上支持几百亿没问题吧的详细内容...

  阅读:43次