Hadoop:NoSuchMethodException(Hadoop : NoSuchMethodException)
这是工作,加入两个关系,
import org.apache.hadoop.mapred.*; import org.apache.hadoop.conf.*; import org.apache.hadoop.util.*; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.*; import org.apache.hadoop.contrib.utils.join.*; import java.io.*; public class TwoByTwo extends Configured implements Tool { public static class MapClass extends DataJoinMapperBase { protected Text generateInputTag( String inputFile) { String datasource = inputFile.split( "\\.")[ 0]; return new Text( datasource); }//end generateInputTag protected Text generateGroupKey( TaggedMapOutput aRecord) { String line = ( ( Text) aRecord.getData()).toString(); //between two relations there may be more than one common attributes //so group key has to include all these common attributes. Common //attributes begin with an '_'( underscore). String[] tokens = line.split(","); String groupKey = ""; for( String s : tokens) { if( s.charAt( 0) == '_') { groupKey = groupKey + s; } } return new Text( groupKey); }//end generateGroupKey protected TaggedMapOutput generateTaggedMapOutput( Object value) { TaggedWritable retv = new TaggedWritable( ( Text) value); retv.setTag( this.inputTag); return retv; }//end TaggedMapOutput }//end MapClass public static class Reduce extends DataJoinReducerBase { protected TaggedMapOutput combine( Object[] tags, Object[] values) { if( tags.length < 2) { return null; } String joinedStr = ""; for( int i = 0; i < values.length; i++) { if( i > 0) { joinedStr += ","; } TaggedWritable tw = ( TaggedWritable) values[ i]; String line = ( ( Text) tw.getData()).toString(); String[] tokens = line.split( ",", 2); joinedStr += tokens[ 1]; } TaggedWritable retv = new TaggedWritable( new Text( joinedStr)); retv.setTag( ( Text) tags[ 0]); return retv; }//end TaggedMapOutput }//end Reduce public static class TaggedWritable extends TaggedMapOutput { private Writable data; public TaggedWritable( Writable data) { this.tag = new Text( ""); this.data = data; }//end TaggedWritable public Writable getData() { return data; }//end getData public void write( DataOutput out) throws IOException { this.tag.write( out); this.data.write( out); }//end write public void readFields( DataInput in) throws IOException { this.tag.readFields( in); this.data.readFields( in); }//end readFields }//end TaggedWritable public int run( String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf( conf, TwoByTwo.class); Path in = new Path( "relations/"); Path out = new Path( "relout/"); FileInputFormat.setInputPaths( job, in); FileOutputFormat.setOutputPath( job, out); job.setJobName( "TwoByTwo"); job.setMapperClass( MapClass.class); job.setReducerClass( Reduce.class); job.setInputFormat( TextInputFormat.class); job.setOutputFormat( TextOutputFormat.class); job.setOutputKeyClass( Text.class); job.setOutputValueClass( TaggedWritable.class); job.set("mapred.textoutputformat.separator", ","); JobClient.runJob( job); return 0; }//end run public static void main( String[] args) throws Exception { int res = ToolRunner.run( new Configuration(), new TwoByTwo(), args); System.exit( res); }//end main }
当我做这份工作时,
bin/hadoop jar /home/hduser/TwoByTwo.jar TwoByTwo -libjars /usr/local/hadoop/contrib/datajoin/hadoop-datajoin-1.0.3.jar
MapClass运行正常。 当Reduce在运行一段时间后运行时,我得到这个NoSuchMethodException
12/10/18 16:38:17 INFO mapred.JobClient: map 100% reduce 27% 12/10/18 16:38:19 INFO mapred.JobClient: Task Id : attempt_201210181416_0013_r_000000_0, Status : FAILED java.lang.RuntimeException: java.lang.NoSuchMethodException: TwoByTwo$TaggedWritable.<init>() at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:115) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:62) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40) at org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271) at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211) at org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249) at org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245) at org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:106) at org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:129) at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519) at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420) at org.apache.hadoop.mapred.Child$4.run(Child.java:255) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121) at org.apache.hadoop.mapred.Child.main(Child.java:249) Caused by: java.lang.NoSuchMethodException: TwoByTwo$TaggedWritable.<init>() at java.lang.Class.getConstructor0(Class.java:2721) at java.lang.Class.getDeclaredConstructor(Class.java:2002) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:109) ... 15 more
我有嵌套类TaggedWritable的问题。 为什么我在减少方面而不是在地图方面有这个类的问题? 我该如何解决这个错误? 两个关系的约束对错误起任何作用? 谢谢你的帮助。
This is the job, which joins two relations,
import org.apache.hadoop.mapred.*; import org.apache.hadoop.conf.*; import org.apache.hadoop.util.*; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.*; import org.apache.hadoop.contrib.utils.join.*; import java.io.*; public class TwoByTwo extends Configured implements Tool { public static class MapClass extends DataJoinMapperBase { protected Text generateInputTag( String inputFile) { String datasource = inputFile.split( "\\.")[ 0]; return new Text( datasource); }//end generateInputTag protected Text generateGroupKey( TaggedMapOutput aRecord) { String line = ( ( Text) aRecord.getData()).toString(); //between two relations there may be more than one common attributes //so group key has to include all these common attributes. Common //attributes begin with an '_'( underscore). String[] tokens = line.split(","); String groupKey = ""; for( String s : tokens) { if( s.charAt( 0) == '_') { groupKey = groupKey + s; } } return new Text( groupKey); }//end generateGroupKey protected TaggedMapOutput generateTaggedMapOutput( Object value) { TaggedWritable retv = new TaggedWritable( ( Text) value); retv.setTag( this.inputTag); return retv; }//end TaggedMapOutput }//end MapClass public static class Reduce extends DataJoinReducerBase { protected TaggedMapOutput combine( Object[] tags, Object[] values) { if( tags.length < 2) { return null; } String joinedStr = ""; for( int i = 0; i < values.length; i++) { if( i > 0) { joinedStr += ","; } TaggedWritable tw = ( TaggedWritable) values[ i]; String line = ( ( Text) tw.getData()).toString(); String[] tokens = line.split( ",", 2); joinedStr += tokens[ 1]; } TaggedWritable retv = new TaggedWritable( new Text( joinedStr)); retv.setTag( ( Text) tags[ 0]); return retv; }//end TaggedMapOutput }//end Reduce public static class TaggedWritable extends TaggedMapOutput { private Writable data; public TaggedWritable( Writable data) { this.tag = new Text( ""); this.data = data; }//end TaggedWritable public Writable getData() { return data; }//end getData public void write( DataOutput out) throws IOException { this.tag.write( out); this.data.write( out); }//end write public void readFields( DataInput in) throws IOException { this.tag.readFields( in); this.data.readFields( in); }//end readFields }//end TaggedWritable public int run( String[] args) throws Exception { Configuration conf = getConf(); JobConf job = new JobConf( conf, TwoByTwo.class); Path in = new Path( "relations/"); Path out = new Path( "relout/"); FileInputFormat.setInputPaths( job, in); FileOutputFormat.setOutputPath( job, out); job.setJobName( "TwoByTwo"); job.setMapperClass( MapClass.class); job.setReducerClass( Reduce.class); job.setInputFormat( TextInputFormat.class); job.setOutputFormat( TextOutputFormat.class); job.setOutputKeyClass( Text.class); job.setOutputValueClass( TaggedWritable.class); job.set("mapred.textoutputformat.separator", ","); JobClient.runJob( job); return 0; }//end run public static void main( String[] args) throws Exception { int res = ToolRunner.run( new Configuration(), new TwoByTwo(), args); System.exit( res); }//end main }
When I run this job,
bin/hadoop jar /home/hduser/TwoByTwo.jar TwoByTwo -libjars /usr/local/hadoop/contrib/datajoin/hadoop-datajoin-1.0.3.jar
MapClass runs appropriately. When Reduce runs after sometime of running I get this NoSuchMethodException
12/10/18 16:38:17 INFO mapred.JobClient: map 100% reduce 27% 12/10/18 16:38:19 INFO mapred.JobClient: Task Id : attempt_201210181416_0013_r_000000_0, Status : FAILED java.lang.RuntimeException: java.lang.NoSuchMethodException: TwoByTwo$TaggedWritable.<init>() at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:115) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:62) at org.apache.hadoop.io.serializer.WritableSerialization$WritableDeserializer.deserialize(WritableSerialization.java:40) at org.apache.hadoop.mapred.Task$ValuesIterator.readNextValue(Task.java:1271) at org.apache.hadoop.mapred.Task$ValuesIterator.next(Task.java:1211) at org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.moveToNext(ReduceTask.java:249) at org.apache.hadoop.mapred.ReduceTask$ReduceValuesIterator.next(ReduceTask.java:245) at org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.regroup(DataJoinReducerBase.java:106) at org.apache.hadoop.contrib.utils.join.DataJoinReducerBase.reduce(DataJoinReducerBase.java:129) at org.apache.hadoop.mapred.ReduceTask.runOldReducer(ReduceTask.java:519) at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:420) at org.apache.hadoop.mapred.Child$4.run(Child.java:255) at java.security.AccessController.doPrivileged(Native Method) at javax.security.auth.Subject.doAs(Subject.java:415) at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121) at org.apache.hadoop.mapred.Child.main(Child.java:249) Caused by: java.lang.NoSuchMethodException: TwoByTwo$TaggedWritable.<init>() at java.lang.Class.getConstructor0(Class.java:2721) at java.lang.Class.getDeclaredConstructor(Class.java:2002) at org.apache.hadoop.util.ReflectionUtils.newInstance(ReflectionUtils.java:109) ... 15 more
I have problem with nested class TaggedWritable. Why do I have problem with this class on reduce side and not on map side? How can I resolve this error? The constraint of two relations plays any role to the error? Thanks for any help.
原文:https://stackoverflow.com/questions/12956488
最满意答案
尝试逐行浏览文件。 就像是
import re files = glob.glob("*.txt") for f in files: with open(f, "r") as fin: data = [] for line in fin: m = re.match('(.*LOAD-DATE:)', line) if m: line = m.group(1) line = re.sub('LOAD-DATE:', '', line) data.append(line) with open(f, 'w') as fout: fout.writelines(data)
Try going line by line through the file. Something like
import re files = glob.glob("*.txt") for f in files: with open(f, "r") as fin: data = [] for line in fin: m = re.match('(.*LOAD-DATE:)', line) if m: line = m.group(1) line = re.sub('LOAD-DATE:', '', line) data.append(line) with open(f, 'w') as fout: fout.writelines(data)
相关问答
更多-
我试试这个。 sd=[0-9]+\.[5-9][0-9]* 参见演示: http : //regex101.com/r/dF8nC3/1 I try this. sd=[0-9]+\.[5-9][0-9]* SEE DEMO: http://regex101.com/r/dF8nC3/1
-
下面的方法是您想要匹配的情况。 如果您需要在触发匹配的列表中使用正则表达式,那么您运气不好并且可能需要循环。 根据您提供的链接 : import re regexes= 'quick', 'brown', 'fox' combinedRegex = re.compile('|'.join('(?:{0})'.format(x) for x in regexes)) lines = 'The quick brown fox jumps over the lazy dog', 'Lorem ipsum dol ...
-
所以,请使用此代码: import os import re def locate(pattern = r'\d+[_]', root=os.curdir): for path, dirs, files in os.walk(os.path.abspath(root)): for filename in re.findall(pattern, ' '.join(files)): yield os.path.join(path, filename) ..这只会 ...
-
Python:使用正则表达式和循环来清理多个文本文件(Python: using regex and a loop to clean mulitiple text files)[2024-01-08]
尝试逐行浏览文件。 就像是 import re files = glob.glob("*.txt") for f in files: with open(f, "r") as fin: data = [] for line in fin: m = re.match('(.*LOAD-DATE:)', line) if m: line = m.group(1) ... -
没有什么可以自动完成你想要的。 但是,有一个python zipfile模块可以让这很容易。 以下是如何迭代文件中的行。 #!/usr/bin/python import zipfile f = zipfile.ZipFile('myfile.zip') for subfile in f.namelist(): print subfile data = f.read(subfile) for line in data.split('\n'): print line ...
-
在PowerShell中使用多行正则表达式循环几个文本文件(Loop through several text files with a multiple line regex in PowerShell)[2021-10-10]
编辑2解决方案 这是最终的代码。 我的例子中的正则表达式与示例日志不匹配。 $Files = (Get-ChildItem $Path -Filter $FilterFile -Recurse | Get-Content | Out-String) (Select-String -inputObject $Files -pattern $Regex -AllMatches).Matches.Count 表达式很好,这不是问题。 谢谢! Edit 2 Solution Here is the final ... -
使用正则表达式更新文本文件(Using regex to update a text file)[2022-10-28]
sed 's/\([^\( | \)]*\)/\1\'$'\n/g' testFile.txt | grep \# | tr -d ' ' | tr -d ' ' > dirNotes.txt 给定一个名为testFile.txt的文件, 其中包含以下内容: This is a #test file. #MyTest #TestTwo #TestThree #TestFour 该文件在文件的不同位置有几个主题标签。 顶部的命令执行以下操作: 查看整个 ... -
通过多文本文件循环到Python中的数据提取(Looping though mulitiple text files to data extraction in Python)[2023-01-24]
这是因为open返回实际流式传输的FileObjects ,因此您无法多次访问它 - 实际上只能访问一次。 你想在这里做的更像是这样的: for filename in os.listdir('C:\project'): bigfile = open(filename, 'r').read() # Now the file contents are saved within bigfile, and you can do as you please, # accessing mul ... -
正则表达式的替换循环(Substitution Loop from regex)[2022-07-08]
你是在文件中一次使用它吗? 你能尝试一下这种方法: 使用while循环逐行读取文件 一次在一行使用正则表达式 将格式化文本写入新文件 请注意,我正在使用此方法来处理大小约为5GB的文本文件。 我的系统只有4GB RAM,效果很好。 Are you using it at once in the file? Can you give a try using this approach: Read the file line by line using while loop Use regex at one l ... -
关于参数查询的一些注意事项: Set cmd = CreateObject("ADODB.Command") cmd.ActiveConnection = con ''A connection cmd.CommandType = 4 ''adCmdStoredProc =4, A stored query will be used cmd.CommandText = "TheNameOfThequery" ''adInteger=3, adVarWChar = 202 ''Parameters are ...