package mr import ( "errors" "fmt" "log" "net" "net/http" "net/rpc" "os" "sync" "time" ) const machineDefaultLastSecond = 3 type Coordinator struct { // Your definitions here. mapTask []MapTask mapSize int reduceTask []ReduceTask reduceSize int taskQueue chan Task lock sync.Mutex machine map[int]*Machine machineId int running chan bool } func (c *Coordinator) popTask(machine *Machine) { if machine.Task.TaskType == TASK_NONE { return } DPrintf("machine %d pop task %d, task list is %v %v\n", machine.Id, machine.Task.TaskId, machine.Task.MapTask, machine.Task.ReduceTask) c.taskQueue <- machine.Task machine.Task.TaskType = TASK_NONE machine.State = MACHINE_IDLE } func (c *Coordinator) getMachine(id int) *Machine { machine, ok := c.machine[id] if !ok { DPrintf("machine not found at %d\n", id) return nil } return machine } // Your code here -- RPC handlers for the worker to call. func (c *Coordinator) GetTask(args *Machine, reply *Machine) error { c.lock.Lock() defer c.lock.Unlock() if c.Done() { reply.State = MACHINE_DONE return nil } machine := c.getMachine(args.Id) if machine == nil { reply.State = MACHINE_INIT return nil } if machine.Task.TaskType != TASK_NONE { return errors.New("task is not none") } c.lock.Unlock() task, ok := <-c.taskQueue c.lock.Lock() if !ok { reply.State = MACHINE_DONE return nil } machine.Task = task machine.State = MACHINE_READY DPrintf("machine %d get task %d, task list is %v %v\n", machine.Id, machine.Task.TaskId, machine.Task.MapTask, machine.Task.ReduceTask) *reply = *machine return nil } func (c *Coordinator) genReduceTask() { var mapId, reduceId int entries, err := os.ReadDir(".") if err != nil { log.Fatal("read dir error") } for _, entry := range entries { n, _ := fmt.Sscanf(entry.Name(), "mr-%d-%d", &mapId, &reduceId) if n == 2 && reduceId < c.reduceSize && mapId >= 0 { // log.Printf("reduce task %d find file %s", reply.ReduceTask.TaskId, entry.Name()) c.reduceTask[reduceId].Fnames = append(c.reduceTask[reduceId].Fnames, entry.Name()) } } } func (c *Coordinator) SubmitTask(args *Machine, reply *Machine) error { c.lock.Lock() defer c.lock.Unlock() if c.Done() { reply.State = MACHINE_DONE return nil } machine := c.getMachine(args.Id) if machine == nil { reply.State = MACHINE_INIT return nil } task := machine.Task switch task.TaskType { case TASK_MAP: // log.Printf("submit map task %d\n", task.TaskId) c.mapTask[task.TaskId].done = true for _, task := range c.mapTask { if !task.done { goto END } } c.genReduceTask() DPrintf("end map task\n") go func() { for i, task := range c.reduceTask { c.taskQueue <- Task{ MachineId: -1, TaskId: i, TaskType: TASK_REDUCE, ReduceTask: &task, MapTask: nil, } } }() // log.Printf("task queue is %+v %+v\n", c.reduceTask, c.taskQueue) case TASK_REDUCE: // log.Print("submit reduce task") c.reduceTask[task.TaskId].done = true for _, task := range c.reduceTask { if !task.done { goto END } } DPrintf("end reduce task\n") for _, m := range c.machine { delete(c.machine, m.Id) } close(c.taskQueue) <-c.running c.running <- false case TASK_NONE: log.Print("submit tasknone") } END: machine.State = MACHINE_IDLE machine.Task.TaskType = TASK_NONE *reply = *machine return nil } func (c *Coordinator) FlushTimeout(args int, reply *Empty) error { c.lock.Lock() defer c.lock.Unlock() if c.Done() { return nil } machine := c.getMachine(args) if machine == nil { return errors.New("flush machine not found") } machine.LastSecond = machineDefaultLastSecond return nil } func (c *Coordinator) UpdateState(args *Machine, reply *Machine) error { c.lock.Lock() defer c.lock.Unlock() if args.State == MACHINE_INIT { newId := c.machineId c.machineId += 1 machine := MakeMachine(newId, machineDefaultLastSecond) machine.State = MACHINE_IDLE *reply = *machine c.machine[newId] = machine // log.Printf("machine %d init map is %v\n", newId, c.machine) return nil } machine, ok := c.machine[args.Id] if !ok { *reply = *args reply.State = MACHINE_INIT return nil } machine.LastSecond = machineDefaultLastSecond *reply = *machine return nil } func (c *Coordinator) deamon() { for { time.Sleep(time.Second) c.lock.Lock() for _, m := range c.machine { if m.LastSecond > 0 { m.LastSecond -= 1 } if m.LastSecond == 0 { new_m := Machine{} new_m = *m go c.popTask(&new_m) DPrintf("delete machine %d\n", m.Id) delete(c.machine, m.Id) } } c.lock.Unlock() } } // an example RPC handler. // // the RPC argument and reply types are defined in rpc.go. func (c *Coordinator) Example(args *ExampleArgs, reply *ExampleReply) error { reply.Y = args.X + 1 return nil } // start a thread that listens for RPCs from worker.go func (c *Coordinator) server() { rpc.Register(c) rpc.HandleHTTP() //l, e := net.Listen("tcp", ":1234") sockname := coordinatorSock() os.Remove(sockname) l, e := net.Listen("unix", sockname) if e != nil { log.Fatal("listen error:", e) } go http.Serve(l, nil) } // main/mrcoordinator.go calls Done() periodically to find out // if the entire job has finished. func (c *Coordinator) Done() bool { ret := false // Your code here. state := <-c.running ret = !state c.running <- state return ret } // create a Coordinator. // main/mrcoordinator.go calls this function. // nReduce is the number of reduce tasks to use. func MakeCoordinator(files []string, nReduce int) *Coordinator { c := Coordinator{ mapTask: make([]MapTask, len(files)), mapSize: len(files), reduceTask: make([]ReduceTask, nReduce), reduceSize: nReduce, taskQueue: make(chan Task), machine: make(map[int]*Machine), machineId: 1, lock: sync.Mutex{}, running: make(chan bool, 1), } // Your code here. c.running <- true for i, file := range files { c.mapTask[i] = MapTask{ Fname: file, NReduce: nReduce, done: false, } } for i := range nReduce { c.reduceTask[i] = ReduceTask{ Fnames: make([]string, 0), done: false, } } go func() { for i, task := range c.mapTask { c.taskQueue <- Task{ MachineId: -1, TaskType: TASK_MAP, MapTask: &task, ReduceTask: nil, TaskId: i, } } }() go c.deamon() c.server() return &c }