1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
|
From vgoyal@redhat.com Wed Nov 20 09:51:43 2013
From: Vivek Goyal <vgoyal@redhat.com>
Date: Wed, 20 Nov 2013 12:50:49 -0500
Subject: [PATCH 4/6] kexec: A new system call, kexec_file_load, for in kernel kexec
To: linux-kernel@vger.kernel.org, kexec@lists.infradead.org
Cc: ebiederm@xmission.com, hpa@zytor.com, mjg59@srcf.ucam.org, greg@kroah.com, Vivek Goyal <vgoyal@redhat.com>
Message-ID: <1384969851-7251-5-git-send-email-vgoyal@redhat.com>
This patch implements the in kernel kexec functionality. It implements a
new system call kexec_file_load. I think parameter list of this system
call will change as I have not done the kernel image signature handling
yet. I have been told that I might have to pass the detached signature
and size as part of system call.
Previously segment list was prepared in user space. Now user space just
passes kernel fd, initrd fd and command line and kernel will create a
segment list internally.
This patch contains generic part of the code. Actual segment preparation
and loading is done by arch and image specific loader. Which comes in
next patch.
Signed-off-by: Vivek Goyal <vgoyal@redhat.com>
---
arch/x86/kernel/machine_kexec_64.c | 57 ++++
arch/x86/syscalls/syscall_64.tbl | 1
include/linux/kexec.h | 57 ++++
include/linux/syscalls.h | 3
include/uapi/linux/kexec.h | 4
kernel/kexec.c | 486 ++++++++++++++++++++++++++++++++++++-
kernel/sys_ni.c | 1
7 files changed, 607 insertions(+), 2 deletions(-)
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -22,6 +22,13 @@
#include <asm/mmu_context.h>
#include <asm/debugreg.h>
+/* arch dependent functionality related to kexec file based syscall */
+static struct kexec_file_type kexec_file_type[]={
+ {"", NULL, NULL, NULL, NULL},
+};
+
+static int nr_file_types = sizeof(kexec_file_type)/sizeof(kexec_file_type[0]);
+
static void free_transition_pgtable(struct kimage *image)
{
free_page((unsigned long)image->arch.pud);
@@ -200,7 +207,7 @@ void machine_kexec(struct kimage *image)
{
unsigned long page_list[PAGES_NR];
void *control_page;
- int save_ftrace_enabled;
+ int save_ftrace_enabled, idx;
#ifdef CONFIG_KEXEC_JUMP
if (image->preserve_context)
@@ -226,6 +233,11 @@ void machine_kexec(struct kimage *image)
#endif
}
+ /* Call image loader to prepare for entry */
+ idx = image->file_handler_idx;
+ if (kexec_file_type[idx].prep_entry)
+ kexec_file_type[idx].prep_entry(image);
+
control_page = page_address(image->control_code_page) + PAGE_SIZE;
memcpy(control_page, relocate_kernel, KEXEC_CONTROL_CODE_MAX_SIZE);
@@ -281,3 +293,46 @@ void arch_crash_save_vmcoreinfo(void)
#endif
}
+/* arch dependent functionality related to kexec file based syscall */
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ int i, ret = -ENOEXEC;
+
+ for (i = 0; i < nr_file_types; i++) {
+ if (!kexec_file_type[i].probe)
+ continue;
+
+ ret = kexec_file_type[i].probe(buf, buf_len);
+ if (!ret) {
+ image->file_handler_idx = i;
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image, char *kernel,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int idx = image->file_handler_idx;
+
+ if (idx < 0)
+ return ERR_PTR(-ENOEXEC);
+
+ return kexec_file_type[idx].load(image, kernel, kernel_len, initrd,
+ initrd_len, cmdline, cmdline_len);
+}
+
+int arch_image_file_post_load_cleanup(struct kimage *image)
+{
+ int idx = image->file_handler_idx;
+
+ if (kexec_file_type[idx].cleanup)
+ return kexec_file_type[idx].cleanup(image);
+ return 0;
+}
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -320,6 +320,7 @@
311 64 process_vm_writev sys_process_vm_writev
312 common kcmp sys_kcmp
313 common finit_module sys_finit_module
+314 common kexec_file_load sys_kexec_file_load
#
# x32-specific system call numbers start at 512 to avoid cache impact
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -110,13 +110,60 @@ struct kimage {
#define KEXEC_TYPE_DEFAULT 0
#define KEXEC_TYPE_CRASH 1
unsigned int preserve_context : 1;
+ /* If set, we are using file mode kexec syscall */
+ unsigned int file_mode : 1;
#ifdef ARCH_HAS_KIMAGE_ARCH
struct kimage_arch arch;
#endif
+
+ /* Additional Fields for file based kexec syscall */
+ void *kernel_buf;
+ unsigned long kernel_buf_len;
+
+ void *initrd_buf;
+ unsigned long initrd_buf_len;
+
+ char *cmdline_buf;
+ unsigned long cmdline_buf_len;
+
+ /* index of file handler in array */
+ int file_handler_idx;
+
+ /* Image loader handling the kernel can store a pointer here */
+ void * image_loader_data;
};
+/*
+ * Keeps a track of buffer parameters as provided by caller for requesting
+ * memory placement of buffer.
+ */
+struct kexec_buf {
+ struct kimage *image;
+ char *buffer;
+ unsigned long bufsz;
+ unsigned long memsz;
+ unsigned long buf_align;
+ unsigned long buf_min;
+ unsigned long buf_max;
+ int top_down; /* allocate from top of memory hole */
+};
+typedef int (kexec_probe_t)(const char *kernel_buf, unsigned long kernel_size);
+typedef void *(kexec_load_t)(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len);
+typedef int (kexec_prep_entry_t)(struct kimage *image);
+typedef int (kexec_cleanup_t)(struct kimage *image);
+
+struct kexec_file_type {
+ const char *name;
+ kexec_probe_t *probe;
+ kexec_load_t *load;
+ kexec_prep_entry_t *prep_entry;
+ kexec_cleanup_t *cleanup;
+};
/* kexec interface functions */
extern void machine_kexec(struct kimage *image);
@@ -127,6 +174,11 @@ extern asmlinkage long sys_kexec_load(un
struct kexec_segment __user *segments,
unsigned long flags);
extern int kernel_kexec(void);
+extern int kexec_add_buffer(struct kimage *image, char *buffer,
+ unsigned long bufsz, unsigned long memsz,
+ unsigned long buf_align, unsigned long buf_min,
+ unsigned long buf_max, int buf_end,
+ unsigned long *load_addr);
#ifdef CONFIG_COMPAT
extern asmlinkage long compat_sys_kexec_load(unsigned long entry,
unsigned long nr_segments,
@@ -135,6 +187,8 @@ extern asmlinkage long compat_sys_kexec_
#endif
extern struct page *kimage_alloc_control_pages(struct kimage *image,
unsigned int order);
+extern void kimage_set_start_addr(struct kimage *image, unsigned long start);
+
extern void crash_kexec(struct pt_regs *);
int kexec_should_crash(struct task_struct *);
void crash_save_cpu(struct pt_regs *regs, int cpu);
@@ -182,6 +236,9 @@ extern struct kimage *kexec_crash_image;
#define KEXEC_FLAGS (KEXEC_ON_CRASH | KEXEC_PRESERVE_CONTEXT)
#endif
+/* Listof defined/legal kexec file flags */
+#define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH)
+
#define VMCOREINFO_BYTES (4096)
#define VMCOREINFO_NOTE_NAME "VMCOREINFO"
#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4)
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -301,6 +301,9 @@ asmlinkage long sys_restart_syscall(void
asmlinkage long sys_kexec_load(unsigned long entry, unsigned long nr_segments,
struct kexec_segment __user *segments,
unsigned long flags);
+asmlinkage long sys_kexec_file_load(int kernel_fd, int initrd_fd,
+ const char __user * cmdline_ptr,
+ unsigned long cmdline_len, unsigned long flags);
asmlinkage long sys_exit(int error_code);
asmlinkage long sys_exit_group(int error_code);
--- a/include/uapi/linux/kexec.h
+++ b/include/uapi/linux/kexec.h
@@ -13,6 +13,10 @@
#define KEXEC_PRESERVE_CONTEXT 0x00000002
#define KEXEC_ARCH_MASK 0xffff0000
+/* Kexec file load interface flags */
+#define KEXEC_FILE_UNLOAD 0x00000001
+#define KEXEC_FILE_ON_CRASH 0x00000002
+
/* These values match the ELF architecture values.
* Unless there is a good reason that should continue to be the case.
*/
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -120,6 +120,11 @@ static struct page *kimage_alloc_page(st
gfp_t gfp_mask,
unsigned long dest);
+void kimage_set_start_addr(struct kimage *image, unsigned long start)
+{
+ image->start = start;
+}
+
static int copy_user_segment_list(struct kimage *image,
unsigned long nr_segments,
struct kexec_segment __user *segments)
@@ -256,6 +261,225 @@ static struct kimage *do_kimage_alloc_in
static void kimage_free_page_list(struct list_head *list);
+static int copy_file_from_fd(int fd, void **buf, unsigned long *buf_len)
+{
+ struct fd f = fdget(fd);
+ int ret = 0;
+ struct kstat stat;
+ loff_t pos;
+ ssize_t bytes = 0;
+
+ if (!f.file)
+ return -EBADF;
+
+ ret = vfs_getattr(&f.file->f_path, &stat);
+ if (ret)
+ goto out;
+
+ if (stat.size > INT_MAX) {
+ ret = -EFBIG;
+ goto out;
+ }
+
+ /* Don't hand 0 to vmalloc, it whines. */
+ if (stat.size == 0) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ *buf = vmalloc(stat.size);
+ if (!*buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pos = 0;
+ while (pos < stat.size) {
+ bytes = kernel_read(f.file, pos, (char *)(*buf) + pos,
+ stat.size - pos);
+ if (bytes < 0) {
+ vfree(*buf);
+ ret = bytes;
+ goto out;
+ }
+
+ if (bytes == 0)
+ break;
+ pos += bytes;
+ }
+
+ *buf_len = pos;
+
+out:
+ fdput(f);
+ return ret;
+}
+
+/* Architectures can provide this probe function */
+int __attribute__ ((weak))
+arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ return -ENOEXEC;
+}
+
+void * __attribute__ ((weak))
+arch_kexec_kernel_image_load(struct kimage *image, char *kernel,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ return ERR_PTR(-ENOEXEC);
+}
+
+void __attribute__ ((weak))
+arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ return;
+}
+
+/*
+ * Free up tempory buffers allocated which are not needed after image has
+ * been loaded.
+ *
+ * Free up memory used by kernel, initrd, and comand line. This is temporary
+ * memory allocation which is not needed any more after these buffers have
+ * been loaded into separate segments and have been copied elsewhere
+ */
+static void kimage_file_post_load_cleanup(struct kimage *image)
+{
+ if (image->kernel_buf) {
+ vfree(image->kernel_buf);
+ image->kernel_buf = NULL;
+ }
+
+ if (image->initrd_buf) {
+ vfree(image->initrd_buf);
+ image->initrd_buf = NULL;
+ }
+
+ if (image->cmdline_buf) {
+ vfree(image->cmdline_buf);
+ image->cmdline_buf = NULL;
+ }
+
+ /* See if architcture has anything to cleanup post load */
+ arch_kimage_file_post_load_cleanup(image);
+}
+
+/*
+ * In file mode list of segments is prepared by kernel. Copy relevant
+ * data from user space, do error checking, prepare segment list
+ */
+static int kimage_file_prepare_segments(struct kimage *image, int kernel_fd,
+ int initrd_fd, const char __user *cmdline_ptr,
+ unsigned long cmdline_len)
+{
+ int ret = 0;
+ void *ldata;
+
+ ret = copy_file_from_fd(kernel_fd, &image->kernel_buf,
+ &image->kernel_buf_len);
+ if (ret)
+ goto out;
+
+ /* Call arch image probe handlers */
+ ret = arch_kexec_kernel_image_probe(image, image->kernel_buf,
+ image->kernel_buf_len);
+
+ if (ret)
+ goto out;
+
+ ret = copy_file_from_fd(initrd_fd, &image->initrd_buf,
+ &image->initrd_buf_len);
+ if (ret)
+ goto out;
+
+ image->cmdline_buf = vzalloc(cmdline_len);
+ if (!image->cmdline_buf)
+ goto out;
+
+ ret = copy_from_user(image->cmdline_buf, cmdline_ptr, cmdline_len);
+ if (ret) {
+ ret = -EFAULT;
+ goto out;
+ }
+
+ image->cmdline_buf_len = cmdline_len;
+
+ /* command line should be a string with last byte null */
+ if (image->cmdline_buf[cmdline_len - 1] != '\0') {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ /* Call arch image load handlers */
+ ldata = arch_kexec_kernel_image_load(image,
+ image->kernel_buf, image->kernel_buf_len,
+ image->initrd_buf, image->initrd_buf_len,
+ image->cmdline_buf, image->cmdline_buf_len);
+
+ if (IS_ERR(ldata)) {
+ ret = PTR_ERR(ldata);
+ goto out;
+ }
+
+ image->image_loader_data = ldata;
+out:
+ return ret;
+}
+
+static int kimage_file_normal_alloc(struct kimage **rimage, int kernel_fd,
+ int initrd_fd, const char __user *cmdline_ptr,
+ unsigned long cmdline_len)
+{
+ int result;
+ struct kimage *image;
+
+ /* Allocate and initialize a controlling structure */
+ image = do_kimage_alloc_init();
+ if (!image)
+ return -ENOMEM;
+
+ image->file_mode = 1;
+ image->file_handler_idx = -1;
+
+ result = kimage_file_prepare_segments(image, kernel_fd, initrd_fd,
+ cmdline_ptr, cmdline_len);
+ if (result)
+ goto out_free_image;
+
+ result = sanity_check_segment_list(image);
+ if (result)
+ goto out_free_post_load_bufs;
+
+ result = -ENOMEM;
+ image->control_code_page = kimage_alloc_control_pages(image,
+ get_order(KEXEC_CONTROL_PAGE_SIZE));
+ if (!image->control_code_page) {
+ printk(KERN_ERR "Could not allocate control_code_buffer\n");
+ goto out_free_post_load_bufs;
+ }
+
+ image->swap_page = kimage_alloc_control_pages(image, 0);
+ if (!image->swap_page) {
+ printk(KERN_ERR "Could not allocate swap buffer\n");
+ goto out_free_control_pages;
+ }
+
+ *rimage = image;
+ return 0;
+
+out_free_control_pages:
+ kimage_free_page_list(&image->control_pages);
+out_free_post_load_bufs:
+ kimage_file_post_load_cleanup(image);
+ kfree(image->image_loader_data);
+out_free_image:
+ kfree(image);
+ return result;
+}
+
static int kimage_normal_alloc(struct kimage **rimage, unsigned long entry,
unsigned long nr_segments,
struct kexec_segment __user *segments)
@@ -679,6 +903,14 @@ static void kimage_free(struct kimage *i
/* Free the kexec control pages... */
kimage_free_page_list(&image->control_pages);
+
+ kfree(image->image_loader_data);
+
+ /*
+ * Free up any temporary buffers allocated. This might hit if
+ * error occurred much later after buffer allocation.
+ */
+ kimage_file_post_load_cleanup(image);
kfree(image);
}
@@ -843,7 +1075,11 @@ static int kimage_load_normal_segment(st
PAGE_SIZE - (maddr & ~PAGE_MASK));
uchunk = min(ubytes, mchunk);
- result = copy_from_user(ptr, buf, uchunk);
+ /* For file based kexec, source pages are in kernel memory */
+ if (image->file_mode)
+ memcpy(ptr, buf, uchunk);
+ else
+ result = copy_from_user(ptr, buf, uchunk);
kunmap(page);
if (result) {
result = -EFAULT;
@@ -1093,6 +1329,72 @@ asmlinkage long compat_sys_kexec_load(un
}
#endif
+SYSCALL_DEFINE5(kexec_file_load, int, kernel_fd, int, initrd_fd, const char __user *, cmdline_ptr, unsigned long, cmdline_len, unsigned long, flags)
+{
+ int ret = 0, i;
+ struct kimage **dest_image, *image;
+
+ /* We only trust the superuser with rebooting the system. */
+ if (!capable(CAP_SYS_BOOT))
+ return -EPERM;
+
+ pr_debug("kexec_file_load: kernel_fd=%d initrd_fd=%d cmdline=0x%p"
+ " cmdline_len=%lu flags=0x%lx\n", kernel_fd, initrd_fd,
+ cmdline_ptr, cmdline_len, flags);
+
+ /* Make sure we have a legal set of flags */
+ if (flags != (flags & KEXEC_FILE_FLAGS))
+ return -EINVAL;
+
+ image = NULL;
+
+ if (!mutex_trylock(&kexec_mutex))
+ return -EBUSY;
+
+ dest_image = &kexec_image;
+ if (flags & KEXEC_FILE_ON_CRASH)
+ dest_image = &kexec_crash_image;
+
+ if (flags & KEXEC_FILE_UNLOAD)
+ goto exchange;
+
+ ret = kimage_file_normal_alloc(&image, kernel_fd, initrd_fd,
+ cmdline_ptr, cmdline_len);
+ if (ret)
+ goto out;
+
+ ret = machine_kexec_prepare(image);
+ if (ret)
+ goto out;
+
+ for (i = 0; i < image->nr_segments; i++) {
+ struct kexec_segment *ksegment;
+
+ ksegment = &image->segment[i];
+ pr_debug("Loading segment %d: buf=0x%p bufsz=0x%lx mem=0x%lx"
+ " memsz=0x%lx\n", i, ksegment->buf, ksegment->bufsz,
+ ksegment->mem, ksegment->memsz);
+ ret = kimage_load_segment(image, &image->segment[i]);
+ if (ret)
+ goto out;
+ pr_debug("Done loading segment %d\n", i);
+ }
+
+ kimage_terminate(image);
+
+ /*
+ * Free up any temporary buffers allocated which are not needed
+ * after image has been loaded
+ */
+ kimage_file_post_load_cleanup(image);
+exchange:
+ image = xchg(dest_image, image);
+out:
+ mutex_unlock(&kexec_mutex);
+ kimage_free(image);
+ return ret;
+}
+
void crash_kexec(struct pt_regs *regs)
{
/* Take the kexec_mutex here to prevent sys_kexec_load
@@ -1647,6 +1949,188 @@ static int __init crash_save_vmcoreinfo_
module_init(crash_save_vmcoreinfo_init)
+static int kexec_add_segment(struct kimage *image, char *buf,
+ unsigned long bufsz, unsigned long mem, unsigned long memsz)
+{
+ struct kexec_segment *ksegment;
+
+ ksegment = &image->segment[image->nr_segments];
+ ksegment->buf = buf;
+ ksegment->bufsz = bufsz;
+ ksegment->mem = mem;
+ ksegment->memsz = memsz;
+ image->nr_segments++;
+
+ return 0;
+}
+
+static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
+ struct kexec_buf *kbuf)
+{
+ struct kimage *image = kbuf->image;
+ unsigned long temp_start, temp_end;
+
+ temp_end = min(end, kbuf->buf_max);
+ temp_start = temp_end - kbuf->memsz;
+
+ do {
+ /* align down start */
+ temp_start = temp_start & (~ (kbuf->buf_align - 1));
+
+ if (temp_start < start || temp_start < kbuf->buf_min)
+ return 0;
+
+ temp_end = temp_start + kbuf->memsz - 1;
+
+ /*
+ * Make sure this does not conflict with any of existing
+ * segments
+ */
+ if (kimage_is_destination_range(image, temp_start, temp_end)) {
+ temp_start = temp_start - PAGE_SIZE;
+ continue;
+ }
+
+ /* We found a suitable memory range */
+ break;
+ } while(1);
+
+ /* If we are here, we found a suitable memory range */
+ kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+ kbuf->memsz);
+
+ /* Stop navigating through remaining System RAM ranges */
+ return 1;
+}
+
+static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
+ struct kexec_buf *kbuf)
+{
+ struct kimage *image = kbuf->image;
+ unsigned long temp_start, temp_end;
+
+ temp_start = max(start, kbuf->buf_min);
+
+ do {
+ temp_start = ALIGN(temp_start, kbuf->buf_align);
+ temp_end = temp_start + kbuf->memsz - 1;
+
+ if (temp_end > end || temp_end > kbuf->buf_max)
+ return 0;
+ /*
+ * Make sure this does not conflict with any of existing
+ * segments
+ */
+ if (kimage_is_destination_range(image, temp_start, temp_end)) {
+ temp_start = temp_start + PAGE_SIZE;
+ continue;
+ }
+
+ /* We found a suitable memory range */
+ break;
+ } while(1);
+
+ /* If we are here, we found a suitable memory range */
+ kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
+ kbuf->memsz);
+
+ /* Stop navigating through remaining System RAM ranges */
+ return 1;
+}
+
+static int walk_ram_range_callback(u64 start, u64 end, void *arg)
+{
+ struct kexec_buf *kbuf = (struct kexec_buf *)arg;
+ unsigned long sz = end - start + 1;
+
+ /* Returning 0 will take to next memory range */
+ if (sz < kbuf->memsz)
+ return 0;
+
+ if (end < kbuf->buf_min || start > kbuf->buf_max)
+ return 0;
+
+ /*
+ * Allocate memory top down with-in ram range. Otherwise bottom up
+ * allocation.
+ */
+ if (kbuf->top_down)
+ return locate_mem_hole_top_down(start, end, kbuf);
+ else
+ return locate_mem_hole_bottom_up(start, end, kbuf);
+}
+
+/*
+ * Helper functions for placing a buffer in a kexec segment. This assumes
+ * that kexec_mutex is held.
+ */
+int kexec_add_buffer(struct kimage *image, char *buffer,
+ unsigned long bufsz, unsigned long memsz,
+ unsigned long buf_align, unsigned long buf_min,
+ unsigned long buf_max, int top_down, unsigned long *load_addr)
+{
+
+ unsigned long nr_segments = image->nr_segments, new_nr_segments;
+ struct kexec_segment *ksegment;
+ struct kexec_buf *kbuf;
+
+ /* Currently adding segment this way is allowed only in file mode */
+ if (!image->file_mode)
+ return -EINVAL;
+
+ if (nr_segments >= KEXEC_SEGMENT_MAX)
+ return -EINVAL;
+
+ /*
+ * Make sure we are not trying to add buffer after allocating
+ * control pages. All segments need to be placed first before
+ * any control pages are allocated. As control page allocation
+ * logic goes through list of segments to make sure there are
+ * no destination overlaps.
+ */
+ WARN_ONCE(!list_empty(&image->control_pages), "Adding kexec buffer"
+ " after allocating control pages\n");
+
+ kbuf = kzalloc(sizeof(struct kexec_buf), GFP_KERNEL);
+ if (!kbuf)
+ return -ENOMEM;
+
+ kbuf->image = image;
+ kbuf->buffer = buffer;
+ kbuf->bufsz = bufsz;
+ /* Align memsz to next page boundary */
+ kbuf->memsz = ALIGN(memsz, PAGE_SIZE);
+
+ /* Align to atleast page size boundary */
+ kbuf->buf_align = max(buf_align, PAGE_SIZE);
+ kbuf->buf_min = buf_min;
+ kbuf->buf_max = buf_max;
+ kbuf->top_down = top_down;
+
+ /* Walk the RAM ranges and allocate a suitable range for the buffer */
+ walk_system_ram_res(0, -1, kbuf, walk_ram_range_callback);
+
+ kbuf->image = NULL;
+ kfree(kbuf);
+
+ /*
+ * If range could be found successfully, it would have incremented
+ * the nr_segments value.
+ */
+ new_nr_segments = image->nr_segments;
+
+ /* A suitable memory range could not be found for buffer */
+ if (new_nr_segments == nr_segments)
+ return -EADDRNOTAVAIL;
+
+ /* Found a suitable memory range */
+
+ ksegment = &image->segment[new_nr_segments - 1];
+ *load_addr = ksegment->mem;
+ return 0;
+}
+
+
/*
* Move into place and start executing a preloaded standalone
* executable. If nothing was preloaded return an error.
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -25,6 +25,7 @@ cond_syscall(sys_swapon);
cond_syscall(sys_swapoff);
cond_syscall(sys_kexec_load);
cond_syscall(compat_sys_kexec_load);
+cond_syscall(sys_kexec_file_load);
cond_syscall(sys_init_module);
cond_syscall(sys_finit_module);
cond_syscall(sys_delete_module);
|