io.druid.segment.loading.SegmentLoadingException: Exception loading >> segment[GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z] > > at >> io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:309) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ZkCoordinator.addSegment(ZkCoordinator.java:350) >> [druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:44) >> [druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:152) >> [druid-server-0.9.1.1.jar:0.9.1.1] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:522) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:516) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93) >> [curator-framework-2.10.0.jar:?] > > at >> com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297) >> [guava-16.0.1.jar:?] > > at >> org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:85) >> [curator-framework-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache.callListeners(PathChildrenCache.java:514) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.EventOperation.invoke(EventOperation.java:35) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache$9.run(PathChildrenCache.java:772) >> [curator-recipes-2.10.0.jar:?] > > at >> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) >> [?:1.8.0_91] > > at java.util.concurrent.FutureTask.run(FutureTask.java:266) >> [?:1.8.0_91] > > at >> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) >> [?:1.8.0_91] > > at java.util.concurrent.FutureTask.run(FutureTask.java:266) >> [?:1.8.0_91] > > at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) >> [?:1.8.0_91] > > at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) >> [?:1.8.0_91] > > at java.lang.Thread.run(Thread.java:745) [?:1.8.0_91] > > Caused by: io.druid.segment.loading.SegmentLoadingException: >> /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd >> (No such file or directory) > > at >> io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:52) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegment(SegmentLoaderLocalCacheManager.java:96) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ServerManager.loadSegment(ServerManager.java:152) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:305) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > ... 18 more > > Caused by: java.io.FileNotFoundException: >> /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd >> (No such file or directory) > > at java.io.FileInputStream.open0(Native Method) ~[?:1.8.0_91] > > at java.io.FileInputStream.open(FileInputStream.java:195) > ~[?:1.8.0_91] > at java.io.FileInputStream.<init>(FileInputStream.java:138) > ~[?:1.8.0_91] > at > io.druid.segment.SegmentUtils.getVersionFromDir(SegmentUtils.java:43) > ~[druid-api-0.9.1.1.jar:0.9.1.1] > at io.druid.segment.IndexIO.loadIndex(IndexIO.java:211) > ~[druid-processing-0.9.1.1.jar:0.9.1.1] > at > io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:49) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > at > io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegment(SegmentLoaderLocalCacheManager.java:96) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > at > io.druid.server.coordination.ServerManager.loadSegment(ServerManager.java:152) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > at > io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:305) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > ... 18 more > 2016-08-29T07:23:12,299 INFO [ZkCoordinator-0] > com.metamx.emitter.core.LoggingEmitter - Event > [{"feed":"alerts","timestamp":"2016-08-29T07:23:12.299Z","service":"druid/historical","host":" > 10.1.7.7:8083","severity":"component-failure","description":"Failed to > load segment for > dataSource","data":{"class":"io.druid.server.coordination.ZkCoordinator","exceptionType":"io.druid.segment.loading.SegmentLoadingException","exceptionMessage":"Exception > loading > segment[GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z]","exceptionStackTrace":"io.druid.segment.loading.SegmentLoadingException: > Exception loading > segment[GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z]\n\tat > io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:309)\n\tat > io.druid.server.coordination.ZkCoordinator.addSegment(ZkCoordinator.java:350)\n\tat > io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:44)\n\tat > io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:152)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:522)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:516)\n\tat > org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93)\n\tat > com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297)\n\tat > org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:85)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache.callListeners(PathChildrenCache.java:514)\n\tat > org.apache.curator.framework.recipes.cache.EventOperation.invoke(EventOperation.java:35)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache$9.run(PathChildrenCache.java:772)\n\tat > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat > java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat > java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat > java.lang.Thread.run(Thread.java:745)\nCaused by: > io.druid.segment.loading.SegmentLoadingException: > /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd > (No such file or directory)\n\tat > io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:52)\n\tat > io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegment(SegmentLoaderLocalCacheManager.java:96)\n\tat > io.druid.server.coordination.ServerManager.loadSegment(ServerManager.java:152)\n\tat > io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:305)\n\t... > 18 more\nCaused by: java.io.FileNotFoundException: > /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd > (No such file or directory)\n\tat java.io.FileInputStream.open0(Native > Method)\n\tat java.io.FileInputStream.open(FileInputStream.java:195)\n\tat > java.io.FileInputStream.<init>(FileInputStream.java:138)\n\tat > io.druid.segment.SegmentUtils.getVersionFromDir(SegmentUtils.java:43)\n\tat > io.druid.segment.IndexIO.loadIndex(IndexIO.java:211)\n\tat > io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:49)\n\t... > 21 > more\n","segment":{"dataSource":"GpuCoreStatus","interval":"2016-08-29T05:00:00.000Z/2016-08-29T06:00:00.000Z","version":"2016-08-29T05:25:23.417Z","loadSpec":{"type":"local","path":"/data/imply/druid/segments/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.zip"},"dimensions":"id,bar1MemoryUsageComputeMode,pciBus,driverModelVbiosVersion,driverModelMinorNumber,driverModelGpuUuid,pciDeviceId,pciDevice,pciBusId,productName,productBrand,persistenceMode,ip,pciBridgeChipPerformanceState,pciSubSystemId,pciDomain","metrics":"count,fbMemoryUsageUsed_sum,fbMemoryUsageUsed_max,fbMemoryUsageUsed_min,pciBridgeChipFanSpeed_sum,pciBridgeChipFanSpeed_max,pciBridgeChipFanSpeed_min,fbMemoryUsageTotal_sum,fbMemoryUsageTotal_max,fbMemoryUsageTotal_min,fbMemoryUsageFree_sum,fbMemoryUsageFree_max,fbMemoryUsageFree_min,temperatureGpuCurrentTemp_sum,temperatureGpuCurrentTemp_max,temperatureGpuCurrentTemp_min","shardSpec":{"type":"linear","partitionNum":0},"binaryVersion":9,"size":22832772,"identifier":"GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z"}}}] > 2016-08-29T07:23:12,299 INFO [ZkCoordinator-0] > io.druid.server.coordination.ZkCoordinator - zNode[/druid/loadQueue/ > 10.1.7.7:8083/GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z] > was removed > 2016-08-29T07:23:12,301 INFO [ZkCoordinator-Exec--0] > io.druid.server.coordination.ServerManager - Told to delete a queryable for > a dataSource[GpuCoreStatus] that doesn't exist. > 2016-08-29T07:23:12,301 WARN [ZkCoordinator-Exec--0] > io.druid.server.coordination.ZkCoordinator - Unable to delete > segmentInfoCacheFile[/data/imply/druid/segment-cache/info_dir/GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z] > > do not have any other errors or exceptions in host2 and host3 On Fri, Aug 26, 2016 at 5:25 AM, Fangjin Yang <fan...@imply.io> wrote: > Hi Leon, can you describe you cluster a little bit, most notable, where is > Zookeeper running? > > > On Sunday, August 21, 2016 at 6:24:33 PM UTC-7, Leon wrote: >> >> Hi everyone, >> >> We have choose druid recently, it is very impressive, the required disk >> size is dramatically less than similar products. >> >> But now, we meet some issue after running a small cluster. >> >> we have three hosts: >> >> one for a historical and a middleManager >> one for a overlord and a coordinate and a tranquility >> one for a broker and a middleManager >> >> it runs well in first hours, but after that, the overlord output large >> mount of logs like: >> >> >>> 2016-08-22T01:03:08,902 INFO [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskLockbox - Task[index_realtime_NetInterfa >>>> ceStatus_2016-08-20T12:00:00.000Z_0_0] already present in >>>> TaskLock[index_realtime_NetInterfaceStatus] >>> >>> 2016-08-22T01:03:08,902 INFO [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskLockbox - Reacquired lock on >>>> interval[2016-08-20T12:00:00.000Z/2016-08-20T13:00:00.000Z] >>>> version[2016-08-20T12:36:19.493Z] for task: >>>> index_realtime_NetInterfaceStatus_2016-08-20T12:00:00.000Z_0_0 >>> >>> these logs output several time per *millisecond*. >> >> I found the recent exception or error output is: >> >>> 2016-08-22T01:03:06,388 INFO [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskMaster - Bowing out! >>> >>> 2016-08-22T01:03:06,388 ERROR [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskMaster - Failed to lead: >>>> {class=io.druid.indexing.overlord.TaskMaster, exceptionType=class >>>> java.lang.reflect.InvocationTargetException, exceptionMessage=null} >>> >>> java.lang.reflect.InvocationTargetException >>> >>> at sun.reflect.GeneratedMethodAccessor34.invoke(Unknown Source) >>>> ~[?:?] >>> >>> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >>>> ~[?:1.7.0_91] >>> >>> at java.lang.reflect.Method.invoke(Method.java:606) >>>> ~[?:1.7.0_91] >>> >>> at com.metamx.common.lifecycle.Lifecycle$AnnotationBasedHandler.start(Lifecycle.java:350) >>>> ~[java-util-0.27.9.jar:?] >>> >>> at com.metamx.common.lifecycle.Lifecycle.start(Lifecycle.java:259) >>>> ~[java-util-0.27.9.jar:?] >>> >>> at io.druid.indexing.overlord.TaskMaster$1.takeLeadership(TaskMaster.java:141) >>>> [druid-indexing-service-0.9.1.1.jar:0.9.1.1] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector$W >>>> rappedListener.takeLeadership(LeaderSelector.java:534) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector.doWork(LeaderSelector.java:399) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector.d >>>> oWorkLoop(LeaderSelector.java:441) [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector.a >>>> ccess$100(LeaderSelector.java:64) [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector$2.call(LeaderSelector.java:245) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector$2.call(LeaderSelector.java:239) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at java.util.concurrent.FutureTask.run(FutureTask.java:262)

Google Groups | Leon | 4 months ago
  1. 0

    overlord has large mount of logs

    Google Groups | 4 months ago | Leon
    io.druid.segment.loading.SegmentLoadingException: Exception loading >> segment[GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z] > > at >> io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:309) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ZkCoordinator.addSegment(ZkCoordinator.java:350) >> [druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:44) >> [druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:152) >> [druid-server-0.9.1.1.jar:0.9.1.1] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:522) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:516) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93) >> [curator-framework-2.10.0.jar:?] > > at >> com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297) >> [guava-16.0.1.jar:?] > > at >> org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:85) >> [curator-framework-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache.callListeners(PathChildrenCache.java:514) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.EventOperation.invoke(EventOperation.java:35) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache$9.run(PathChildrenCache.java:772) >> [curator-recipes-2.10.0.jar:?] > > at >> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) >> [?:1.8.0_91] > > at java.util.concurrent.FutureTask.run(FutureTask.java:266) >> [?:1.8.0_91] > > at >> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) >> [?:1.8.0_91] > > at java.util.concurrent.FutureTask.run(FutureTask.java:266) >> [?:1.8.0_91] > > at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) >> [?:1.8.0_91] > > at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) >> [?:1.8.0_91] > > at java.lang.Thread.run(Thread.java:745) [?:1.8.0_91] > > Caused by: io.druid.segment.loading.SegmentLoadingException: >> /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd >> (No such file or directory) > > at >> io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:52) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegment(SegmentLoaderLocalCacheManager.java:96) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ServerManager.loadSegment(ServerManager.java:152) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:305) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > ... 18 more > > Caused by: java.io.FileNotFoundException: >> /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd >> (No such file or directory) > > at java.io.FileInputStream.open0(Native Method) ~[?:1.8.0_91] > > at java.io.FileInputStream.open(FileInputStream.java:195) > ~[?:1.8.0_91] > at java.io.FileInputStream.<init>(FileInputStream.java:138) > ~[?:1.8.0_91] > at > io.druid.segment.SegmentUtils.getVersionFromDir(SegmentUtils.java:43) > ~[druid-api-0.9.1.1.jar:0.9.1.1] > at io.druid.segment.IndexIO.loadIndex(IndexIO.java:211) > ~[druid-processing-0.9.1.1.jar:0.9.1.1] > at > io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:49) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > at > io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegment(SegmentLoaderLocalCacheManager.java:96) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > at > io.druid.server.coordination.ServerManager.loadSegment(ServerManager.java:152) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > at > io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:305) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > ... 18 more > 2016-08-29T07:23:12,299 INFO [ZkCoordinator-0] > com.metamx.emitter.core.LoggingEmitter - Event > [{"feed":"alerts","timestamp":"2016-08-29T07:23:12.299Z","service":"druid/historical","host":" > 10.1.7.7:8083","severity":"component-failure","description":"Failed to > load segment for > dataSource","data":{"class":"io.druid.server.coordination.ZkCoordinator","exceptionType":"io.druid.segment.loading.SegmentLoadingException","exceptionMessage":"Exception > loading > segment[GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z]","exceptionStackTrace":"io.druid.segment.loading.SegmentLoadingException: > Exception loading > segment[GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z]\n\tat > io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:309)\n\tat > io.druid.server.coordination.ZkCoordinator.addSegment(ZkCoordinator.java:350)\n\tat > io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:44)\n\tat > io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:152)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:522)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:516)\n\tat > org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93)\n\tat > com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297)\n\tat > org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:85)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache.callListeners(PathChildrenCache.java:514)\n\tat > org.apache.curator.framework.recipes.cache.EventOperation.invoke(EventOperation.java:35)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache$9.run(PathChildrenCache.java:772)\n\tat > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat > java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat > java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat > java.lang.Thread.run(Thread.java:745)\nCaused by: > io.druid.segment.loading.SegmentLoadingException: > /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd > (No such file or directory)\n\tat > io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:52)\n\tat > io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegment(SegmentLoaderLocalCacheManager.java:96)\n\tat > io.druid.server.coordination.ServerManager.loadSegment(ServerManager.java:152)\n\tat > io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:305)\n\t... > 18 more\nCaused by: java.io.FileNotFoundException: > /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd > (No such file or directory)\n\tat java.io.FileInputStream.open0(Native > Method)\n\tat java.io.FileInputStream.open(FileInputStream.java:195)\n\tat > java.io.FileInputStream.<init>(FileInputStream.java:138)\n\tat > io.druid.segment.SegmentUtils.getVersionFromDir(SegmentUtils.java:43)\n\tat > io.druid.segment.IndexIO.loadIndex(IndexIO.java:211)\n\tat > io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:49)\n\t... > 21 > more\n","segment":{"dataSource":"GpuCoreStatus","interval":"2016-08-29T05:00:00.000Z/2016-08-29T06:00:00.000Z","version":"2016-08-29T05:25:23.417Z","loadSpec":{"type":"local","path":"/data/imply/druid/segments/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.zip"},"dimensions":"id,bar1MemoryUsageComputeMode,pciBus,driverModelVbiosVersion,driverModelMinorNumber,driverModelGpuUuid,pciDeviceId,pciDevice,pciBusId,productName,productBrand,persistenceMode,ip,pciBridgeChipPerformanceState,pciSubSystemId,pciDomain","metrics":"count,fbMemoryUsageUsed_sum,fbMemoryUsageUsed_max,fbMemoryUsageUsed_min,pciBridgeChipFanSpeed_sum,pciBridgeChipFanSpeed_max,pciBridgeChipFanSpeed_min,fbMemoryUsageTotal_sum,fbMemoryUsageTotal_max,fbMemoryUsageTotal_min,fbMemoryUsageFree_sum,fbMemoryUsageFree_max,fbMemoryUsageFree_min,temperatureGpuCurrentTemp_sum,temperatureGpuCurrentTemp_max,temperatureGpuCurrentTemp_min","shardSpec":{"type":"linear","partitionNum":0},"binaryVersion":9,"size":22832772,"identifier":"GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z"}}}] > 2016-08-29T07:23:12,299 INFO [ZkCoordinator-0] > io.druid.server.coordination.ZkCoordinator - zNode[/druid/loadQueue/ > 10.1.7.7:8083/GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z] > was removed > 2016-08-29T07:23:12,301 INFO [ZkCoordinator-Exec--0] > io.druid.server.coordination.ServerManager - Told to delete a queryable for > a dataSource[GpuCoreStatus] that doesn't exist. > 2016-08-29T07:23:12,301 WARN [ZkCoordinator-Exec--0] > io.druid.server.coordination.ZkCoordinator - Unable to delete > segmentInfoCacheFile[/data/imply/druid/segment-cache/info_dir/GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z] > > do not have any other errors or exceptions in host2 and host3 On Fri, Aug 26, 2016 at 5:25 AM, Fangjin Yang <fan...@imply.io> wrote: > Hi Leon, can you describe you cluster a little bit, most notable, where is > Zookeeper running? > > > On Sunday, August 21, 2016 at 6:24:33 PM UTC-7, Leon wrote: >> >> Hi everyone, >> >> We have choose druid recently, it is very impressive, the required disk >> size is dramatically less than similar products. >> >> But now, we meet some issue after running a small cluster. >> >> we have three hosts: >> >> one for a historical and a middleManager >> one for a overlord and a coordinate and a tranquility >> one for a broker and a middleManager >> >> it runs well in first hours, but after that, the overlord output large >> mount of logs like: >> >> >>> 2016-08-22T01:03:08,902 INFO [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskLockbox - Task[index_realtime_NetInterfa >>>> ceStatus_2016-08-20T12:00:00.000Z_0_0] already present in >>>> TaskLock[index_realtime_NetInterfaceStatus] >>> >>> 2016-08-22T01:03:08,902 INFO [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskLockbox - Reacquired lock on >>>> interval[2016-08-20T12:00:00.000Z/2016-08-20T13:00:00.000Z] >>>> version[2016-08-20T12:36:19.493Z] for task: >>>> index_realtime_NetInterfaceStatus_2016-08-20T12:00:00.000Z_0_0 >>> >>> these logs output several time per *millisecond*. >> >> I found the recent exception or error output is: >> >>> 2016-08-22T01:03:06,388 INFO [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskMaster - Bowing out! >>> >>> 2016-08-22T01:03:06,388 ERROR [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskMaster - Failed to lead: >>>> {class=io.druid.indexing.overlord.TaskMaster, exceptionType=class >>>> java.lang.reflect.InvocationTargetException, exceptionMessage=null} >>> >>> java.lang.reflect.InvocationTargetException >>> >>> at sun.reflect.GeneratedMethodAccessor34.invoke(Unknown Source) >>>> ~[?:?] >>> >>> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >>>> ~[?:1.7.0_91] >>> >>> at java.lang.reflect.Method.invoke(Method.java:606) >>>> ~[?:1.7.0_91] >>> >>> at com.metamx.common.lifecycle.Lifecycle$AnnotationBasedHandler.start(Lifecycle.java:350) >>>> ~[java-util-0.27.9.jar:?] >>> >>> at com.metamx.common.lifecycle.Lifecycle.start(Lifecycle.java:259) >>>> ~[java-util-0.27.9.jar:?] >>> >>> at io.druid.indexing.overlord.TaskMaster$1.takeLeadership(TaskMaster.java:141) >>>> [druid-indexing-service-0.9.1.1.jar:0.9.1.1] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector$W >>>> rappedListener.takeLeadership(LeaderSelector.java:534) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector.doWork(LeaderSelector.java:399) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector.d >>>> oWorkLoop(LeaderSelector.java:441) [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector.a >>>> ccess$100(LeaderSelector.java:64) [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector$2.call(LeaderSelector.java:245) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector$2.call(LeaderSelector.java:239) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at java.util.concurrent.FutureTask.run(FutureTask.java:262)
  2. 0

    Urgently need help getting my historical nodes up

    Google Groups | 3 months ago | Ben Vogan
    io.druid.segment.loading.SegmentLoadingException: Exception loading >> segment[business_events_test_2016-10-24T15:00:00.000Z_2016-1 >> 0-24T16:00:00.000Z_2016-09-11T15:12:33.459Z] >> at io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:309) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] >> at io.druid.server.coordination.ZkCoordinator.addSegment(ZkCoordinator.java:350) >> [druid-server-0.9.1.1.jar:0.9.1.1] >> at io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:44) >> [druid-server-0.9.1.1.jar:0.9.1.1] >> at io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:152) >> [druid-server-0.9.1.1.jar:0.9.1.1] >> at org.apache.curator.framework.recipes.cache.PathChildrenCache >> $5.apply(PathChildrenCache.java:522) [curator-recipes-2.10.0.jar:?] >> at org.apache.curator.framework.recipes.cache.PathChildrenCache >> $5.apply(PathChildrenCache.java:516) [curator-recipes-2.10.0.jar:?] >> at org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93) >> [curator-framework-2.10.0.jar:?] >> at com.google.common.util.concurrent.MoreExecutors$SameThreadEx >> ecutorService.execute(MoreExecutors.java:297) [guava-16.0.1.jar:?] >> at org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:85) >> [curator-framework-2.10.0.jar:?] >> at org.apache.curator.framework.recipes.cache.PathChildrenCache >> .callListeners(PathChildrenCache.java:514) [curator-recipes-2.10.0.jar:?] >> at org.apache.curator.framework.recipes.cache.EventOperation.invoke(EventOperation.java:35) >> [curator-recipes-2.10.0.jar:?] >> at org.apache.curator.framework.recipes.cache.PathChildrenCache >> $9.run(PathChildrenCache.java:772) [curator-recipes-2.10.0.jar:?] >> at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
  3. 0

    overlord has large mount of logs

    Google Groups | 4 months ago | Leon
    io.druid.segment.loading.SegmentLoadingException: Exception loading >>> segment[GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z] >> >> at >>> io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:309) >>> ~[druid-server-0.9.1.1.jar:0.9.1.1] >> >> at >>> io.druid.server.coordination.ZkCoordinator.addSegment(ZkCoordinator.java:350) >>> [druid-server-0.9.1.1.jar:0.9.1.1] >> >> at >>> io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:44) >>> [druid-server-0.9.1.1.jar:0.9.1.1] >> >> at >>> io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:152) >>> [druid-server-0.9.1.1.jar:0.9.1.1] >> >> at >>> org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:522) >>> [curator-recipes-2.10.0.jar:?] >> >> at >>> org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:516) >>> [curator-recipes-2.10.0.jar:?] >> >> at >>> org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93) >>> [curator-framework-2.10.0.jar:?] >> >> at >>> com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297) >>> [guava-16.0.1.jar:?] >> >> at >>> org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:85) >>> [curator-framework-2.10.0.jar:?] >> >> at >>> org.apache.curator.framework.recipes.cache.PathChildrenCache.callListeners(PathChildrenCache.java:514) >>> [curator-recipes-2.10.0.jar:?] >> >> at >>> org.apache.curator.framework.recipes.cache.EventOperation.invoke(EventOperation.java:35) >>> [curator-recipes-2.10.0.jar:?] >> >> at >>> org.apache.curator.framework.recipes.cache.PathChildrenCache$9.run(PathChildrenCache.java:772) >>> [curator-recipes-2.10.0.jar:?] >> >> at >>> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) >>> [?:1.8.0_91] >> >> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
  4. Speed up your debug routine!

    Automated exception search integrated into your IDE

  5. 0

    overlord has large mount of logs

    Google Groups | 4 months ago | Leon
    io.druid.segment.loading.SegmentLoadingException: Exception loading >>> segment[PartitionStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:26.133Z] >> >> at >>> io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:309) >>> ~[druid-server-0.9.1.1.jar:0.9.1.1] >> >> at >>> io.druid.server.coordination.ZkCoordinator.addSegment(ZkCoordinator.java:350) >>> [druid-server-0.9.1.1.jar:0.9.1.1] >> >> at >>> io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:44) >>> [druid-server-0.9.1.1.jar:0.9.1.1] >> >> at >>> io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:152) >>> [druid-server-0.9.1.1.jar:0.9.1.1] >> >> at >>> org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:522) >>> [curator-recipes-2.10.0.jar:?] >> >> at >>> org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:516) >>> [curator-recipes-2.10.0.jar:?] >> >> at >>> org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93) >>> [curator-framework-2.10.0.jar:?] >> >> at >>> com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297) >>> [guava-16.0.1.jar:?] >> >> at >>> org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:85) >>> [curator-framework-2.10.0.jar:?] >> >> at >>> org.apache.curator.framework.recipes.cache.PathChildrenCache.callListeners(PathChildrenCache.java:514) >>> [curator-recipes-2.10.0.jar:?] >> >> at >>> org.apache.curator.framework.recipes.cache.EventOperation.invoke(EventOperation.java:35) >>> [curator-recipes-2.10.0.jar:?] >> >> at >>> org.apache.curator.framework.recipes.cache.PathChildrenCache$9.run(PathChildrenCache.java:772) >>> [curator-recipes-2.10.0.jar:?] >> >> at >>> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) >>> [?:1.8.0_91] >> >> at java.util.concurrent.FutureTask.run(FutureTask.java:266)
  6. 0

    Urgently need help getting my historical nodes up

    Google Groups | 3 months ago | Ben Vogan
    io.druid.segment.loading.SegmentLoadingException: Exception loading segment[business_events_test_2016-10-24T15:00:00.000Z_2016-10-24T16:00:00.000Z_2016-09-11T15:12:33.459Z] at io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:309) ~[druid-server-0.9.1.1.jar:0.9.1.1] at io.druid.server.coordination.ZkCoordinator.addSegment(ZkCoordinator.java:350) [druid-server-0.9.1.1.jar:0.9.1.1] at io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:44) [druid-server-0.9.1.1.jar:0.9.1.1] at io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:152) [druid-server-0.9.1.1.jar:0.9.1.1] at org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:522) [curator-recipes-2.10.0.jar:?] at org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:516) [curator-recipes-2.10.0.jar:?] at org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93) [curator-framework-2.10.0.jar:?] at com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297) [guava-16.0.1.jar:?] at org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:85) [curator-framework-2.10.0.jar:?] at org.apache.curator.framework.recipes.cache.PathChildrenCache.callListeners(PathChildrenCache.java:514) [curator-recipes-2.10.0.jar:?]

    Not finding the right solution?
    Take a tour to get the most out of Samebug.

    Tired of useless tips?

    Automated exception search integrated into your IDE

    Root Cause Analysis

    1. io.druid.segment.loading.SegmentLoadingException

      Exception loading >> segment[GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z] > > at >> io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:309) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ZkCoordinator.addSegment(ZkCoordinator.java:350) >> [druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:44) >> [druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:152) >> [druid-server-0.9.1.1.jar:0.9.1.1] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:522) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:516) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93) >> [curator-framework-2.10.0.jar:?] > > at >> com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297) >> [guava-16.0.1.jar:?] > > at >> org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:85) >> [curator-framework-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache.callListeners(PathChildrenCache.java:514) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.EventOperation.invoke(EventOperation.java:35) >> [curator-recipes-2.10.0.jar:?] > > at >> org.apache.curator.framework.recipes.cache.PathChildrenCache$9.run(PathChildrenCache.java:772) >> [curator-recipes-2.10.0.jar:?] > > at >> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) >> [?:1.8.0_91] > > at java.util.concurrent.FutureTask.run(FutureTask.java:266) >> [?:1.8.0_91] > > at >> java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) >> [?:1.8.0_91] > > at java.util.concurrent.FutureTask.run(FutureTask.java:266) >> [?:1.8.0_91] > > at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142) >> [?:1.8.0_91] > > at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617) >> [?:1.8.0_91] > > at java.lang.Thread.run(Thread.java:745) [?:1.8.0_91] > > Caused by: io.druid.segment.loading.SegmentLoadingException: >> /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd >> (No such file or directory) > > at >> io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:52) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegment(SegmentLoaderLocalCacheManager.java:96) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ServerManager.loadSegment(ServerManager.java:152) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > at >> io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:305) >> ~[druid-server-0.9.1.1.jar:0.9.1.1] > > ... 18 more > > Caused by: java.io.FileNotFoundException: >> /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd >> (No such file or directory) > > at java.io.FileInputStream.open0(Native Method) ~[?:1.8.0_91] > > at java.io.FileInputStream.open(FileInputStream.java:195) > ~[?:1.8.0_91] > at java.io.FileInputStream.<init>(FileInputStream.java:138) > ~[?:1.8.0_91] > at > io.druid.segment.SegmentUtils.getVersionFromDir(SegmentUtils.java:43) > ~[druid-api-0.9.1.1.jar:0.9.1.1] > at io.druid.segment.IndexIO.loadIndex(IndexIO.java:211) > ~[druid-processing-0.9.1.1.jar:0.9.1.1] > at > io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:49) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > at > io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegment(SegmentLoaderLocalCacheManager.java:96) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > at > io.druid.server.coordination.ServerManager.loadSegment(ServerManager.java:152) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > at > io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:305) > ~[druid-server-0.9.1.1.jar:0.9.1.1] > ... 18 more > 2016-08-29T07:23:12,299 INFO [ZkCoordinator-0] > com.metamx.emitter.core.LoggingEmitter - Event > [{"feed":"alerts","timestamp":"2016-08-29T07:23:12.299Z","service":"druid/historical","host":" > 10.1.7.7:8083","severity":"component-failure","description":"Failed to > load segment for > dataSource","data":{"class":"io.druid.server.coordination.ZkCoordinator","exceptionType":"io.druid.segment.loading.SegmentLoadingException","exceptionMessage":"Exception > loading > segment[GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z]","exceptionStackTrace":"io.druid.segment.loading.SegmentLoadingException: > Exception loading > segment[GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z]\n\tat > io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:309)\n\tat > io.druid.server.coordination.ZkCoordinator.addSegment(ZkCoordinator.java:350)\n\tat > io.druid.server.coordination.SegmentChangeRequestLoad.go(SegmentChangeRequestLoad.java:44)\n\tat > io.druid.server.coordination.ZkCoordinator$1.childEvent(ZkCoordinator.java:152)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:522)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache$5.apply(PathChildrenCache.java:516)\n\tat > org.apache.curator.framework.listen.ListenerContainer$1.run(ListenerContainer.java:93)\n\tat > com.google.common.util.concurrent.MoreExecutors$SameThreadExecutorService.execute(MoreExecutors.java:297)\n\tat > org.apache.curator.framework.listen.ListenerContainer.forEach(ListenerContainer.java:85)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache.callListeners(PathChildrenCache.java:514)\n\tat > org.apache.curator.framework.recipes.cache.EventOperation.invoke(EventOperation.java:35)\n\tat > org.apache.curator.framework.recipes.cache.PathChildrenCache$9.run(PathChildrenCache.java:772)\n\tat > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat > java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat > java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)\n\tat > java.util.concurrent.FutureTask.run(FutureTask.java:266)\n\tat > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)\n\tat > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)\n\tat > java.lang.Thread.run(Thread.java:745)\nCaused by: > io.druid.segment.loading.SegmentLoadingException: > /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd > (No such file or directory)\n\tat > io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:52)\n\tat > io.druid.segment.loading.SegmentLoaderLocalCacheManager.getSegment(SegmentLoaderLocalCacheManager.java:96)\n\tat > io.druid.server.coordination.ServerManager.loadSegment(ServerManager.java:152)\n\tat > io.druid.server.coordination.ZkCoordinator.loadSegment(ZkCoordinator.java:305)\n\t... > 18 more\nCaused by: java.io.FileNotFoundException: > /data/imply/druid/segment-cache/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.drd > (No such file or directory)\n\tat java.io.FileInputStream.open0(Native > Method)\n\tat java.io.FileInputStream.open(FileInputStream.java:195)\n\tat > java.io.FileInputStream.<init>(FileInputStream.java:138)\n\tat > io.druid.segment.SegmentUtils.getVersionFromDir(SegmentUtils.java:43)\n\tat > io.druid.segment.IndexIO.loadIndex(IndexIO.java:211)\n\tat > io.druid.segment.loading.MMappedQueryableIndexFactory.factorize(MMappedQueryableIndexFactory.java:49)\n\t... > 21 > more\n","segment":{"dataSource":"GpuCoreStatus","interval":"2016-08-29T05:00:00.000Z/2016-08-29T06:00:00.000Z","version":"2016-08-29T05:25:23.417Z","loadSpec":{"type":"local","path":"/data/imply/druid/segments/GpuCoreStatus/2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z/2016-08-29T05:25:23.417Z/0/index.zip"},"dimensions":"id,bar1MemoryUsageComputeMode,pciBus,driverModelVbiosVersion,driverModelMinorNumber,driverModelGpuUuid,pciDeviceId,pciDevice,pciBusId,productName,productBrand,persistenceMode,ip,pciBridgeChipPerformanceState,pciSubSystemId,pciDomain","metrics":"count,fbMemoryUsageUsed_sum,fbMemoryUsageUsed_max,fbMemoryUsageUsed_min,pciBridgeChipFanSpeed_sum,pciBridgeChipFanSpeed_max,pciBridgeChipFanSpeed_min,fbMemoryUsageTotal_sum,fbMemoryUsageTotal_max,fbMemoryUsageTotal_min,fbMemoryUsageFree_sum,fbMemoryUsageFree_max,fbMemoryUsageFree_min,temperatureGpuCurrentTemp_sum,temperatureGpuCurrentTemp_max,temperatureGpuCurrentTemp_min","shardSpec":{"type":"linear","partitionNum":0},"binaryVersion":9,"size":22832772,"identifier":"GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z"}}}] > 2016-08-29T07:23:12,299 INFO [ZkCoordinator-0] > io.druid.server.coordination.ZkCoordinator - zNode[/druid/loadQueue/ > 10.1.7.7:8083/GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z] > was removed > 2016-08-29T07:23:12,301 INFO [ZkCoordinator-Exec--0] > io.druid.server.coordination.ServerManager - Told to delete a queryable for > a dataSource[GpuCoreStatus] that doesn't exist. > 2016-08-29T07:23:12,301 WARN [ZkCoordinator-Exec--0] > io.druid.server.coordination.ZkCoordinator - Unable to delete > segmentInfoCacheFile[/data/imply/druid/segment-cache/info_dir/GpuCoreStatus_2016-08-29T05:00:00.000Z_2016-08-29T06:00:00.000Z_2016-08-29T05:25:23.417Z] > > do not have any other errors or exceptions in host2 and host3 On Fri, Aug 26, 2016 at 5:25 AM, Fangjin Yang <fan...@imply.io> wrote: > Hi Leon, can you describe you cluster a little bit, most notable, where is > Zookeeper running? > > > On Sunday, August 21, 2016 at 6:24:33 PM UTC-7, Leon wrote: >> >> Hi everyone, >> >> We have choose druid recently, it is very impressive, the required disk >> size is dramatically less than similar products. >> >> But now, we meet some issue after running a small cluster. >> >> we have three hosts: >> >> one for a historical and a middleManager >> one for a overlord and a coordinate and a tranquility >> one for a broker and a middleManager >> >> it runs well in first hours, but after that, the overlord output large >> mount of logs like: >> >> >>> 2016-08-22T01:03:08,902 INFO [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskLockbox - Task[index_realtime_NetInterfa >>>> ceStatus_2016-08-20T12:00:00.000Z_0_0] already present in >>>> TaskLock[index_realtime_NetInterfaceStatus] >>> >>> 2016-08-22T01:03:08,902 INFO [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskLockbox - Reacquired lock on >>>> interval[2016-08-20T12:00:00.000Z/2016-08-20T13:00:00.000Z] >>>> version[2016-08-20T12:36:19.493Z] for task: >>>> index_realtime_NetInterfaceStatus_2016-08-20T12:00:00.000Z_0_0 >>> >>> these logs output several time per *millisecond*. >> >> I found the recent exception or error output is: >> >>> 2016-08-22T01:03:06,388 INFO [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskMaster - Bowing out! >>> >>> 2016-08-22T01:03:06,388 ERROR [Curator-LeaderSelector-0] >>>> io.druid.indexing.overlord.TaskMaster - Failed to lead: >>>> {class=io.druid.indexing.overlord.TaskMaster, exceptionType=class >>>> java.lang.reflect.InvocationTargetException, exceptionMessage=null} >>> >>> java.lang.reflect.InvocationTargetException >>> >>> at sun.reflect.GeneratedMethodAccessor34.invoke(Unknown Source) >>>> ~[?:?] >>> >>> at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) >>>> ~[?:1.7.0_91] >>> >>> at java.lang.reflect.Method.invoke(Method.java:606) >>>> ~[?:1.7.0_91] >>> >>> at com.metamx.common.lifecycle.Lifecycle$AnnotationBasedHandler.start(Lifecycle.java:350) >>>> ~[java-util-0.27.9.jar:?] >>> >>> at com.metamx.common.lifecycle.Lifecycle.start(Lifecycle.java:259) >>>> ~[java-util-0.27.9.jar:?] >>> >>> at io.druid.indexing.overlord.TaskMaster$1.takeLeadership(TaskMaster.java:141) >>>> [druid-indexing-service-0.9.1.1.jar:0.9.1.1] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector$W >>>> rappedListener.takeLeadership(LeaderSelector.java:534) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector.doWork(LeaderSelector.java:399) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector.d >>>> oWorkLoop(LeaderSelector.java:441) [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector.a >>>> ccess$100(LeaderSelector.java:64) [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector$2.call(LeaderSelector.java:245) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at org.apache.curator.framework.recipes.leader.LeaderSelector$2.call(LeaderSelector.java:239) >>>> [curator-recipes-2.10.0.jar:?] >>> >>> at java.util.concurrent.FutureTask.run(FutureTask.java:262)

      at java.util.concurrent.Executors$RunnableAdapter.call()
    2. Java RT
      Thread.run
      1. java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:471)
      2. java.util.concurrent.FutureTask.run(FutureTask.java:262)
      3. java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
      4. java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
      5. java.lang.Thread.run(Thread.java:745)[?:1.7.0_91]
      5 frames