Port before_v0.60/data_extraction before_v0.60/examples before_v0.60/duplicates (#847)

This PR is part of porting all old scripts #221 and includes a set of modules: - `data_extraction` - `examples` - `duplicates` ## 7 changed files: ### `data_extraction` - `data_extraction/ultimate_extractor.nu`: removed. Has already been ported to `modules/data_extraction/ultimate_extractor.nu` ### `duplicates` - `duplicates/duplicates.nu` -> `modules/duplicates/duplicates.nu` - `duplicates/example.nu` -> `modules/duplicates/example.nu` - `duplicates/README.md` -> `modules/duplicates/README.md`: unchanged ### `examples` - `examples/netstat.nu` -> `modules/examples/netstat.nu` - `examples/date_in_local_timezones.nu` -> `modules/examples/date_in_local_timezones.nu` - `befove_v0.60/assets/core_team.nu`: removed. This table has been embedded into `date_in_local_timezones.nu`
2025-08-01 06:37:46 +00:00 · 2024-05-26 21:37:29 +04:00 · 2024-05-26 21:37:29 +04:00 · 275a0f8f26
commit 275a0f8f26
parent 9d399d8902
9 changed files with 81 additions and 93 deletions
--- a/modules/duplicates/README.md
+++ b/modules/duplicates/README.md
@ -0,0 +1,5 @@
+# Duplicates Scripts
+
+### Definition
+
+These scripts are used to show how `group-by` can be used to identify duplicate rows. The example shows how that can be used to heuristically find duplicate files.
--- a/modules/duplicates/example.nu
+++ b/modules/duplicates/example.nu
@ -0,0 +1,15 @@
+# duplicates example
+use mod.nu *
+
+let info = "[{name: "John", lastname: "Doe"}, {name: "John", lastname: "Roe"}, {name: "Jane", lastname: "Soe"}]"
+print ($info | from json)
+print ($info | from json | duplicates name)
+
+#duplicates files example
+echo A | save A.txt
+echo A | save B.txt 
+# note that if I used "echo B | save B.txt" the function will give a false positive
+echo ABC | save C.txt
+print (ls)
+print (duplicates files)
+rm A.txt B.txt C.txt --permanent
--- a/modules/duplicates/mod.nu
+++ b/modules/duplicates/mod.nu
@ -0,0 +1,22 @@
+# duplicates returns the rows that correspond to duplicates of the given column.
+export def duplicates  [
+    column: string # Column to look duplicates at
+    --count(-c) # set it to display the number of times the value is repeated.
+] {
+    group-by {get $column | into string} |
+    transpose |
+    insert count { $in.column1 | flatten | length } |
+    where count > 1 |
+    reject column0 |
+    if  ($count | is-empty) { reject count } else { each { $in } }  |
+    flatten |
+    flatten
+}
+
+# duplicates files recursively finds duplicate files in the current working folder.
+# It uses a heuristic based on duplicate files having the same size.
+export def "duplicates files" [] {
+    do -i {ls **/*} | duplicates size
+}
+
+