1
Fork 0
mirror of https://github.com/RGBCube/nu_scripts synced 2025-08-01 06:37:46 +00:00

Port before_v0.60/data_extraction before_v0.60/examples before_v0.60/duplicates (#847)

This PR is part of porting all old scripts #221 and includes a set of
modules:

- `data_extraction`
- `examples` 
- `duplicates`

## 7 changed files:

### `data_extraction`
- `data_extraction/ultimate_extractor.nu`: removed. Has already been
ported to `modules/data_extraction/ultimate_extractor.nu`

### `duplicates`
- `duplicates/duplicates.nu` -> `modules/duplicates/duplicates.nu`
- `duplicates/example.nu` -> `modules/duplicates/example.nu`
- `duplicates/README.md` -> `modules/duplicates/README.md`: unchanged

### `examples`
- `examples/netstat.nu` -> `modules/examples/netstat.nu`
- `examples/date_in_local_timezones.nu` ->
`modules/examples/date_in_local_timezones.nu`
- `befove_v0.60/assets/core_team.nu`: removed. This table has been
embedded into `date_in_local_timezones.nu`
This commit is contained in:
Igor 2024-05-26 21:37:29 +04:00 committed by GitHub
parent 9d399d8902
commit 275a0f8f26
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 81 additions and 93 deletions

View file

@ -0,0 +1,5 @@
# Duplicates Scripts
### Definition
These scripts are used to show how `group-by` can be used to identify duplicate rows. The example shows how that can be used to heuristically find duplicate files.

View file

@ -0,0 +1,15 @@
# duplicates example
use mod.nu *
let info = "[{name: "John", lastname: "Doe"}, {name: "John", lastname: "Roe"}, {name: "Jane", lastname: "Soe"}]"
print ($info | from json)
print ($info | from json | duplicates name)
#duplicates files example
echo A | save A.txt
echo A | save B.txt
# note that if I used "echo B | save B.txt" the function will give a false positive
echo ABC | save C.txt
print (ls)
print (duplicates files)
rm A.txt B.txt C.txt --permanent

22
modules/duplicates/mod.nu Normal file
View file

@ -0,0 +1,22 @@
# duplicates returns the rows that correspond to duplicates of the given column.
export def duplicates [
column: string # Column to look duplicates at
--count(-c) # set it to display the number of times the value is repeated.
] {
group-by {get $column | into string} |
transpose |
insert count { $in.column1 | flatten | length } |
where count > 1 |
reject column0 |
if ($count | is-empty) { reject count } else { each { $in } } |
flatten |
flatten
}
# duplicates files recursively finds duplicate files in the current working folder.
# It uses a heuristic based on duplicate files having the same size.
export def "duplicates files" [] {
do -i {ls **/*} | duplicates size
}